Package | Description |
---|---|
org.apache.tika.config |
Tika configuration tools.
|
org.apache.tika.detect |
Media type detection.
|
org.apache.tika.embedder | |
org.apache.tika.extractor |
Extraction of component documents.
|
org.apache.tika.fork |
Forked parser.
|
org.apache.tika.mime |
Media type information.
|
org.apache.tika.parser |
Tika parsers.
|
org.apache.tika.parser.audio | |
org.apache.tika.parser.envi | |
org.apache.tika.parser.epub | |
org.apache.tika.parser.external |
External parser process.
|
org.apache.tika.parser.feed | |
org.apache.tika.parser.gdal | |
org.apache.tika.parser.iptc | |
org.apache.tika.parser.iwork | |
org.apache.tika.parser.strings | |
org.apache.tika.parser.video | |
org.apache.tika.parser.xml |
Modifier and Type | Method and Description |
---|---|
private static java.util.Set<MediaType> |
TikaConfig.mediaTypesListFromDomElement(org.w3c.dom.Element node,
java.lang.String tag) |
Modifier and Type | Method and Description |
---|---|
Parser |
TikaConfig.getParser(MediaType mimeType)
Deprecated.
Use the
TikaConfig.getParser() method instead |
Modifier and Type | Field and Description |
---|---|
private MediaType |
NNTrainedModelBuilder.type |
private MediaType |
MagicDetector.type
The matching media type.
|
Modifier and Type | Field and Description |
---|---|
private java.util.Map<MediaType,TrainedModel> |
TrainedModelDetector.MODEL_MAP |
private java.util.Map<java.util.regex.Pattern,MediaType> |
NameDetector.patterns
The regular expression patterns used for type detection.
|
Modifier and Type | Method and Description |
---|---|
MediaType |
Detector.detect(java.io.InputStream input,
Metadata metadata)
Detects the content type of the given input document.
|
MediaType |
TrainedModelDetector.detect(java.io.InputStream input,
Metadata metadata) |
MediaType |
TypeDetector.detect(java.io.InputStream input,
Metadata metadata)
Detects the content type of an input document based on a type hint
given in the input metadata.
|
MediaType |
TextDetector.detect(java.io.InputStream input,
Metadata metadata)
Looks at the beginning of the document input stream to determine
whether the document is text or not.
|
MediaType |
MagicDetector.detect(java.io.InputStream input,
Metadata metadata) |
MediaType |
EmptyDetector.detect(java.io.InputStream input,
Metadata metadata) |
MediaType |
CompositeDetector.detect(java.io.InputStream input,
Metadata metadata) |
MediaType |
NameDetector.detect(java.io.InputStream input,
Metadata metadata)
Detects the content type of an input document based on the document
name given in the input metadata.
|
MediaType |
NNTrainedModelBuilder.getType() |
Modifier and Type | Method and Description |
---|---|
static MagicDetector |
MagicDetector.parse(MediaType mediaType,
java.lang.String type,
java.lang.String offset,
java.lang.String value,
java.lang.String mask) |
protected void |
TrainedModelDetector.registerModels(MediaType type,
TrainedModel model) |
void |
NNTrainedModelBuilder.setType(MediaType type) |
Constructor and Description |
---|
MagicDetector(MediaType type,
byte[] pattern)
Creates a detector for input documents that have the exact given byte
pattern at the beginning of the document stream.
|
MagicDetector(MediaType type,
byte[] pattern,
byte[] mask,
boolean isRegex,
boolean isStringIgnoreCase,
int offsetRangeBegin,
int offsetRangeEnd)
Creates a detector for input documents that meet the specified
magic match.
|
MagicDetector(MediaType type,
byte[] pattern,
byte[] mask,
boolean isRegex,
int offsetRangeBegin,
int offsetRangeEnd)
Creates a detector for input documents that meet the specified
magic match.
|
MagicDetector(MediaType type,
byte[] pattern,
byte[] mask,
int offsetRangeBegin,
int offsetRangeEnd)
Creates a detector for input documents that meet the specified magic
match.
|
MagicDetector(MediaType type,
byte[] pattern,
int offset)
Creates a detector for input documents that have the exact given byte
pattern at the given offset of the document stream.
|
Constructor and Description |
---|
NameDetector(java.util.Map<java.util.regex.Pattern,MediaType> patterns)
Creates a new content type detector based on the given name patterns.
|
Modifier and Type | Field and Description |
---|---|
private java.util.Set<MediaType> |
ExternalEmbedder.supportedEmbedTypes
Media types supported by the external program.
|
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
ExternalEmbedder.getSupportedEmbedTypes() |
java.util.Set<MediaType> |
Embedder.getSupportedEmbedTypes(ParseContext context)
Returns the set of media types supported by this embedder when used with
the given parse context.
|
java.util.Set<MediaType> |
ExternalEmbedder.getSupportedEmbedTypes(ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
ExternalEmbedder.setSupportedEmbedTypes(java.util.Set<MediaType> supportedEmbedTypes) |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
ParserContainerExtractor.RecursiveParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
EmbeddedResourceHandler.handle(java.lang.String filename,
MediaType mediaType,
java.io.InputStream stream)
Called to process an embedded resource within the container.
|
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
ForkParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
static MediaType |
MediaType.APPLICATION_XML |
static MediaType |
MediaType.APPLICATION_ZIP |
private MediaType |
MagicMatch.mediaType |
static MediaType |
MediaType.OCTET_STREAM |
private MediaType |
ProbabilisticMimeDetectionSelector.rootMediaType |
static MediaType |
MediaType.TEXT_HTML |
static MediaType |
MediaType.TEXT_PLAIN |
private MediaType |
MimeType.type
The normalized media type name.
|
Modifier and Type | Field and Description |
---|---|
private java.util.Map<MediaType,MediaType> |
MediaTypeRegistry.inheritance
Known type inheritance relationships.
|
private java.util.Map<MediaType,MediaType> |
MediaTypeRegistry.inheritance
Known type inheritance relationships.
|
private java.util.Map<MediaType,MediaType> |
MediaTypeRegistry.registry
Registry of known media types, including type aliases.
|
private java.util.Map<MediaType,MediaType> |
MediaTypeRegistry.registry
Registry of known media types, including type aliases.
|
private static java.util.Map<java.lang.String,MediaType> |
MediaType.SIMPLE_TYPES
Set of basic types with normalized "type/subtype" names.
|
private java.util.Map<MediaType,MimeType> |
MimeTypes.types
All the registered MimeTypes indexed on their canonical names
|
Modifier and Type | Method and Description |
---|---|
static MediaType |
MediaType.application(java.lang.String type) |
private MediaType |
ProbabilisticMimeDetectionSelector.applyProbilities(java.util.List<MimeType> possibleTypes,
MimeType extMimeType,
MimeType metadataMimeType) |
static MediaType |
MediaType.audio(java.lang.String type) |
MediaType |
ProbabilisticMimeDetectionSelector.detect(java.io.InputStream input,
Metadata metadata) |
MediaType |
MimeTypes.detect(java.io.InputStream input,
Metadata metadata)
Automatically detects the MIME type of a document based on magic
markers in the stream prefix and any given metadata hints.
|
MediaType |
MediaType.getBaseType()
Returns the base form of the MediaType, excluding
any parameters, such as "text/plain" for
"text/plain; charset=utf-8"
|
MediaType |
MediaTypeRegistry.getSupertype(MediaType type)
Returns the supertype of the given type.
|
MediaType |
MimeType.getType()
Returns the normalized media type name.
|
static MediaType |
MediaType.image(java.lang.String type) |
MediaType |
MediaTypeRegistry.normalize(MediaType type) |
static MediaType |
MediaType.parse(java.lang.String string)
Parses the given string to a media type.
|
static MediaType |
MediaType.text(java.lang.String type) |
static MediaType |
MediaType.video(java.lang.String type) |
Modifier and Type | Method and Description |
---|---|
java.util.SortedSet<MediaType> |
MediaTypeRegistry.getAliases(MediaType type)
Returns the set of known aliases of the given canonical media type.
|
java.util.SortedSet<MediaType> |
MediaTypeRegistry.getChildTypes(MediaType type)
Returns the set of known children of the given canonical media type
|
java.util.SortedSet<MediaType> |
MediaTypeRegistry.getTypes()
Returns the set of all known canonical media types.
|
static java.util.Set<MediaType> |
MediaType.set(MediaType... types)
Convenience method that returns an unmodifiable set that contains
all the given media types.
|
static java.util.Set<MediaType> |
MediaType.set(java.lang.String... types)
Convenience method that parses the given media type strings and
returns an unmodifiable set that contains all the parsed types.
|
Modifier and Type | Method and Description |
---|---|
void |
MediaTypeRegistry.addAlias(MediaType type,
MediaType alias) |
(package private) void |
MimeTypes.addAlias(MimeType type,
MediaType alias)
Adds an alias for the given media type.
|
void |
MediaTypeRegistry.addSuperType(MediaType type,
MediaType supertype) |
void |
MediaTypeRegistry.addType(MediaType type) |
int |
MediaType.compareTo(MediaType that) |
java.util.SortedSet<MediaType> |
MediaTypeRegistry.getAliases(MediaType type)
Returns the set of known aliases of the given canonical media type.
|
java.util.SortedSet<MediaType> |
MediaTypeRegistry.getChildTypes(MediaType type)
Returns the set of known children of the given canonical media type
|
MediaType |
MediaTypeRegistry.getSupertype(MediaType type)
Returns the supertype of the given type.
|
boolean |
MediaTypeRegistry.isInstanceOf(MediaType a,
MediaType b)
Checks whether the given media type equals the given base type or
is a specialization of it.
|
boolean |
MediaTypeRegistry.isInstanceOf(java.lang.String a,
MediaType b)
Parses and normalises the given media type string and checks whether
the result equals the given base type or is a specialization of it.
|
boolean |
MediaTypeRegistry.isSpecializationOf(MediaType a,
MediaType b)
Checks whether the given media type a is a specialization of a more
generic type b.
|
MediaType |
MediaTypeRegistry.normalize(MediaType type) |
static java.util.Set<MediaType> |
MediaType.set(MediaType... types)
Convenience method that returns an unmodifiable set that contains
all the given media types.
|
void |
MimeTypes.setSuperType(MimeType type,
MediaType parent) |
Constructor and Description |
---|
MagicMatch(MediaType mediaType,
java.lang.String type,
java.lang.String offset,
java.lang.String value,
java.lang.String mask) |
MediaType(MediaType type,
java.nio.charset.Charset charset)
Creates a media type by adding the "charset" parameter to a base type.
|
MediaType(MediaType type,
java.util.Map<java.lang.String,java.lang.String> parameters) |
MediaType(MediaType type,
java.lang.String name,
java.lang.String value)
Creates a media type by adding a parameter to a base type.
|
MimeType(MediaType type)
Creates a media type with the give name and containing media type
registry.
|
Modifier and Type | Field and Description |
---|---|
private java.util.Set<MediaType> |
NetworkParser.supportedTypes |
private java.util.Set<MediaType> |
CryptoParser.types |
Modifier and Type | Method and Description |
---|---|
java.util.Map<MediaType,java.util.List<Parser>> |
CompositeParser.findDuplicateParsers(ParseContext context)
Utility method that goes through all the component parsers and finds
all media types for which more than one parser declares support.
|
java.util.Map<MediaType,Parser> |
CompositeParser.getParsers()
Returns the component parsers.
|
java.util.Map<MediaType,Parser> |
DefaultParser.getParsers(ParseContext context) |
java.util.Map<MediaType,Parser> |
CompositeParser.getParsers(ParseContext context) |
java.util.Set<MediaType> |
ParserDecorator.getSupportedTypes(ParseContext context)
Delegates the method call to the decorated parser.
|
java.util.Set<MediaType> |
NetworkParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
EmptyParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
CompositeParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
RecursiveParserWrapper.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
CryptoParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
DelegatingParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
Parser.getSupportedTypes(ParseContext context)
Returns the set of media types supported by this parser when used
with the given parse context.
|
java.util.Set<MediaType> |
ErrorParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Method and Description |
---|---|
void |
CompositeParser.setParsers(java.util.Map<MediaType,Parser> parsers)
Sets the component parsers.
|
static Parser |
ParserDecorator.withFallbacks(java.util.Collection<? extends Parser> parsers,
java.util.Set<MediaType> types)
Deprecated.
Do not use until the TODOs are resolved, see TIKA-1509
|
static Parser |
ParserDecorator.withoutTypes(Parser parser,
java.util.Set<MediaType> excludeTypes)
Decorates the given parser so that it never claims to support
parsing of the given media types, but will work for all others.
|
static Parser |
ParserDecorator.withTypes(Parser parser,
java.util.Set<MediaType> types)
Decorates the given parser so that it always claims to support
parsing of the given media types.
|
Constructor and Description |
---|
CryptoParser(java.lang.String transformation,
java.security.Provider provider,
java.util.Set<MediaType> types) |
CryptoParser(java.lang.String transformation,
java.util.Set<MediaType> types) |
NetworkParser(java.net.URI uri,
java.util.Set<MediaType> supportedTypes) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
AudioParser.SUPPORTED_TYPES |
private static java.util.Set<MediaType> |
MidiParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
AudioParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
MidiParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
EnviHeaderParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
EnviHeaderParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
EpubParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
EpubParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
EpubContentParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private java.util.Set<MediaType> |
ExternalParser.supportedTypes
Media types supported by the external program.
|
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
ExternalParser.getSupportedTypes() |
java.util.Set<MediaType> |
ExternalParser.getSupportedTypes(ParseContext context) |
private static java.util.Set<MediaType> |
ExternalParsersConfigReader.readMimeTypes(org.w3c.dom.Element mimeTypes) |
Modifier and Type | Method and Description |
---|---|
void |
ExternalParser.setSupportedTypes(java.util.Set<MediaType> supportedTypes) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
FeedParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
FeedParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
GDALParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private static MediaType |
IptcAnpaParser.TYPE |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
IptcAnpaParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
IptcAnpaParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private MediaType |
IWorkPackageParser.IWORKDocumentType.type |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
IWorkPackageParser.supportedTypes
This parser handles all iWorks formats.
|
Modifier and Type | Method and Description |
---|---|
MediaType |
IWorkPackageParser.IWORKDocumentType.getType() |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
IWorkPackageParser.getSupportedTypes(ParseContext context) |
Constructor and Description |
---|
IWORKDocumentType(java.lang.String namespace,
java.lang.String part,
MediaType type) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
Latin1StringsParser.SUPPORTED_TYPES
The set of supported types
|
private static java.util.Set<MediaType> |
StringsParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
Latin1StringsParser.getSupportedTypes(ParseContext arg0) |
java.util.Set<MediaType> |
StringsParser.getSupportedTypes(ParseContext context) |
private static java.util.Set<MediaType> |
Latin1StringsParser.getTypes()
Returns the set of supported types.
|
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
FLVParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
FLVParser.getSupportedTypes(ParseContext context) |
Modifier and Type | Field and Description |
---|---|
private static java.util.Set<MediaType> |
XMLParser.SUPPORTED_TYPES |
Modifier and Type | Method and Description |
---|---|
java.util.Set<MediaType> |
FictionBookParser.getSupportedTypes(ParseContext context) |
java.util.Set<MediaType> |
XMLParser.getSupportedTypes(ParseContext context) |