Package | Description |
---|---|
org.apache.tika |
Apache Tika.
|
org.apache.tika.config |
Tika configuration tools.
|
org.apache.tika.detect |
Media type detection.
|
org.apache.tika.embedder | |
org.apache.tika.exception |
Tika exception.
|
org.apache.tika.extractor |
Extraction of component documents.
|
org.apache.tika.fork |
Forked parser.
|
org.apache.tika.io |
IO utilities.
|
org.apache.tika.language |
Language detection.
|
org.apache.tika.language.translate | |
org.apache.tika.metadata.serialization | |
org.apache.tika.mime |
Media type information.
|
org.apache.tika.parser |
Tika parsers.
|
org.apache.tika.parser.audio | |
org.apache.tika.parser.envi | |
org.apache.tika.parser.epub | |
org.apache.tika.parser.external |
External parser process.
|
org.apache.tika.parser.feed | |
org.apache.tika.parser.gdal | |
org.apache.tika.parser.iptc | |
org.apache.tika.parser.iwork | |
org.apache.tika.parser.strings | |
org.apache.tika.parser.video | |
org.apache.tika.parser.xml | |
org.apache.tika.sax |
SAX utilities.
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
Tika.parseToString(java.io.File file)
Parses the given file and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream,
Metadata metadata)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream,
Metadata metadata,
int maxLength)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.nio.file.Path path)
Parses the file at the given path and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.net.URL url)
Parses the resource at the given URL and returns the extracted
text content.
|
Modifier and Type | Method and Description |
---|---|
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.decorate(ConfigurableThreadPoolExecutor created,
org.w3c.dom.Element element) |
(package private) Parser |
TikaConfig.ParserXmlLoader.decorate(Parser created,
org.w3c.dom.Element element) |
(package private) abstract T |
TikaConfig.XmlLoader.decorate(T created,
org.w3c.dom.Element element) |
private static javax.xml.parsers.DocumentBuilder |
TikaConfig.getBuilder() |
private static java.io.InputStream |
TikaConfig.getConfigInputStream(java.lang.String config,
ServiceLoader serviceLoader) |
private static java.util.List<org.w3c.dom.Element> |
TikaConfig.getTopLevelElementChildren(org.w3c.dom.Element element,
java.lang.String parentName,
java.lang.String childrenName) |
(package private) T |
TikaConfig.XmlLoader.loadOne(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.loadOne(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
(package private) CT |
TikaConfig.XmlLoader.loadOverall(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
private static java.util.Set<MediaType> |
TikaConfig.mediaTypesListFromDomElement(org.w3c.dom.Element node,
java.lang.String tag) |
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.preLoadOne(java.lang.Class<? extends ConfigurableThreadPoolExecutor> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Detector |
TikaConfig.DetectorXmlLoader.preLoadOne(java.lang.Class<? extends Detector> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Parser |
TikaConfig.ParserXmlLoader.preLoadOne(java.lang.Class<? extends Parser> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) abstract T |
TikaConfig.XmlLoader.preLoadOne(java.lang.Class<? extends T> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Translator |
TikaConfig.TranslatorXmlLoader.preLoadOne(java.lang.Class<? extends Translator> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
private static MimeTypes |
TikaConfig.typesFromDomElement(org.w3c.dom.Element element) |
Constructor and Description |
---|
TikaConfig()
Creates a default Tika configuration.
|
TikaConfig(org.w3c.dom.Document document) |
TikaConfig(org.w3c.dom.Document document,
ServiceLoader loader) |
TikaConfig(org.w3c.dom.Element element) |
TikaConfig(org.w3c.dom.Element element,
java.lang.ClassLoader loader) |
TikaConfig(org.w3c.dom.Element element,
ServiceLoader loader) |
TikaConfig(java.io.File file) |
TikaConfig(java.io.File file,
ServiceLoader loader) |
TikaConfig(java.io.InputStream stream) |
TikaConfig(java.nio.file.Path path) |
TikaConfig(java.nio.file.Path path,
ServiceLoader loader) |
TikaConfig(java.lang.String file) |
TikaConfig(java.net.URL url) |
TikaConfig(java.net.URL url,
java.lang.ClassLoader loader) |
TikaConfig(java.net.URL url,
ServiceLoader loader) |
Modifier and Type | Method and Description |
---|---|
private static java.nio.charset.Charset |
AutoDetectReader.detect(java.io.InputStream input,
Metadata metadata,
java.util.List<EncodingDetector> detectors,
LoadErrorHandler handler) |
Constructor and Description |
---|
AutoDetectReader(java.io.BufferedInputStream stream,
Metadata metadata,
java.util.List<EncodingDetector> detectors,
LoadErrorHandler handler) |
AutoDetectReader(java.io.InputStream stream) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata,
ServiceLoader loader) |
Modifier and Type | Method and Description |
---|---|
void |
Embedder.embed(Metadata metadata,
java.io.InputStream originalStream,
java.io.OutputStream outputStream,
ParseContext context)
Embeds related document metadata from the given metadata object into the
given output stream.
|
void |
ExternalEmbedder.embed(Metadata metadata,
java.io.InputStream inputStream,
java.io.OutputStream outputStream,
ParseContext context)
Executes the configured external command and passes the given document
stream as a simple XHTML document to the given SAX content handler.
|
Modifier and Type | Class and Description |
---|---|
class |
AccessPermissionException
Exception to be thrown when a document does not allow content extraction.
|
class |
EncryptedDocumentException |
Modifier and Type | Method and Description |
---|---|
void |
ContainerExtractor.extract(TikaInputStream stream,
ContainerExtractor recurseExtractor,
EmbeddedResourceHandler handler)
Processes a container file, and extracts all the embedded
resources from within it.
|
void |
ParserContainerExtractor.extract(TikaInputStream stream,
ContainerExtractor recurseExtractor,
EmbeddedResourceHandler handler) |
void |
ParserContainerExtractor.RecursiveParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignored,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
private ForkClient |
ForkParser.acquireClient() |
java.lang.Throwable |
ForkClient.call(java.lang.String method,
java.lang.Object... args) |
void |
ForkParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
private void |
ForkClient.sendObject(java.lang.Object object,
java.util.List<ForkResource> resources)
Serializes the object first into an in-memory buffer and then
writes it to the output stream with a preceding size integer.
|
Constructor and Description |
---|
ForkClient(java.lang.ClassLoader loader,
java.lang.Object object,
java.util.List<java.lang.String> java) |
Modifier and Type | Class and Description |
---|---|
static class |
EndianUtils.BufferUnderrunException |
Modifier and Type | Method and Description |
---|---|
void |
TemporaryResources.dispose()
Calls the
TemporaryResources.close() method and wraps the potential
IOException into a TikaException for convenience
when used within Tika. |
Modifier and Type | Method and Description |
---|---|
static LanguageProfilerBuilder |
LanguageProfilerBuilder.create(java.lang.String name,
java.io.InputStream is,
java.lang.String encoding)
Creates a new Language profile from (preferably quite large - 5-10k of
lines) text file
|
float |
LanguageProfilerBuilder.getSimilarity(LanguageProfilerBuilder another)
Calculates a score how well NGramProfiles match each other
|
Modifier and Type | Method and Description |
---|---|
java.lang.String |
DefaultTranslator.translate(java.lang.String text,
java.lang.String targetLanguage)
Translate, using the first available service-loaded translator
|
java.lang.String |
Translator.translate(java.lang.String text,
java.lang.String targetLanguage)
Translate text to the given language.
|
java.lang.String |
DefaultTranslator.translate(java.lang.String text,
java.lang.String sourceLanguage,
java.lang.String targetLanguage)
Translate, using the first available service-loaded translator
|
java.lang.String |
Translator.translate(java.lang.String text,
java.lang.String sourceLanguage,
java.lang.String targetLanguage)
Translate text between given languages.
|
Modifier and Type | Method and Description |
---|---|
static Metadata |
JsonMetadata.fromJson(java.io.Reader reader)
Read metadata from reader.
|
static java.util.List<Metadata> |
JsonMetadataList.fromJson(java.io.Reader reader)
Read metadata from reader.
|
static void |
JsonMetadataList.toJson(java.util.List<Metadata> metadataList,
java.io.Writer writer)
Serializes a Metadata object to Json.
|
static void |
JsonMetadata.toJson(Metadata metadata,
java.io.Writer writer)
Serializes a Metadata object to Json.
|
Modifier and Type | Class and Description |
---|---|
class |
MimeTypeException
A class to encapsulate MimeType related exceptions.
|
Modifier and Type | Method and Description |
---|---|
javax.xml.parsers.SAXParser |
ParseContext.getSAXParser()
Returns the SAX parser specified in this parsing context.
|
void |
AbstractParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata)
Deprecated.
use the
Parser.parse(InputStream, ContentHandler, Metadata, ParseContext) method instead |
void |
AutoDetectParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata) |
void |
ParserDecorator.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Delegates the method call to the decorated parser.
|
void |
DigestingParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
NetworkParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
NetworkParser.ParsingTask.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
AutoDetectParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
CompositeParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Delegates the call to the matching component parser.
|
void |
RecursiveParserWrapper.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignore,
Metadata metadata,
ParseContext context)
Acts like a regular parser except it ignores the ContentHandler
and it automatically sets/overwrites the embedded Parser in the
ParseContext object.
|
void |
RecursiveParserWrapper.EmbeddedParserDecorator.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignore,
Metadata metadata,
ParseContext context) |
void |
ParserPostProcessor.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Forwards the call to the delegated parser and post-processes the
results as described above.
|
void |
CryptoParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
DelegatingParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Looks up the delegate parser from the parsing context and
delegates the parse operation to it.
|
void |
Parser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Parses a document stream into a sequence of XHTML SAX events.
|
void |
ErrorParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
private void |
NetworkParser.parse(TikaInputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
AudioParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
MidiParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
EnviHeaderParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
EpubParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
EpubContentParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
static void |
ExternalParsersFactory.attachExternalParsers(TikaConfig config) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create() |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(ServiceLoader loader) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(java.lang.String filename,
ServiceLoader loader) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(java.net.URL... urls) |
void |
ExternalParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Executes the configured external command and passes the given document
stream as a simple XHTML document to the given SAX content handler.
|
private void |
ExternalParser.parse(TikaInputStream stream,
XHTMLContentHandler xhtml,
Metadata metadata,
TemporaryResources tmp) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(org.w3c.dom.Document document) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(org.w3c.dom.Element element) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(java.io.InputStream stream) |
private static ExternalParser |
ExternalParsersConfigReader.readParser(org.w3c.dom.Element parserDef)
Builds and Returns an ExternalParser, or null if a check
command was given that didn't match.
|
Constructor and Description |
---|
CompositeExternalParser() |
CompositeExternalParser(MediaTypeRegistry registry) |
Modifier and Type | Method and Description |
---|---|
void |
FeedParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
GDALParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
IptcAnpaParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata)
Deprecated.
This method will be removed in Apache Tika 1.0.
|
void |
IptcAnpaParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
IWorkPackageParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
private int |
StringsParser.doStrings(java.io.File input,
StringsConfig config,
XHTMLContentHandler xhtml)
Runs the "strings" command on the given file.
|
void |
StringsParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
FLVParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
XMLParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
SecureContentHandler.throwIfCauseOf(org.xml.sax.SAXException e)
Converts the given
SAXException to a corresponding
TikaException if it's caused by this instance detecting
a zip bomb. |