public class IptcAnpaParser extends java.lang.Object implements Parser
Modifier and Type | Field and Description |
---|---|
private static char |
BS |
private static char |
CR |
private static char |
CT |
private static char |
DL |
private static char |
DR |
private static char |
EOT |
private static char |
EQ |
private static char |
ETX |
private static char |
FF |
private int |
FMT_ANPA_1312 |
private int |
FMT_ANPA_UPI |
private int |
FMT_ANPA_UPI_DL |
private int |
FMT_IPTC_7901 |
private int |
FMT_IPTC_AP |
private int |
FMT_IPTC_BLM |
private int |
FMT_IPTC_CHAR |
private int |
FMT_IPTC_NYT |
private int |
FMT_IPTC_PHOTO |
private int |
FMT_IPTC_RTR |
private int |
FMT_NITF |
private int |
FMT_NITF_RB |
private int |
FMT_NITF_TT |
private int |
FORMAT |
private static char |
FS |
private static char |
HY |
private static char |
LF |
private static char |
LT |
private static long |
serialVersionUID
Serial version UID
|
private static char |
SL |
private static char |
SOH |
private static char |
SP |
private static char |
SR |
private static char |
STX |
private static java.util.Set<MediaType> |
SUPPORTED_TYPES |
private static char |
SYN |
private static char |
TB |
private static MediaType |
TYPE |
private static char |
XQ |
private static char |
XS |
Constructor and Description |
---|
IptcAnpaParser() |
Modifier and Type | Method and Description |
---|---|
private java.lang.String |
clean(java.lang.String value) |
private java.lang.String |
getFormatName() |
private byte[] |
getSection(java.io.InputStream is,
int maxsize,
byte bstart,
byte bfinish,
boolean ifincomplete) |
private byte[] |
getSection(java.io.InputStream is,
java.lang.String name) |
java.util.Set<MediaType> |
getSupportedTypes(ParseContext context)
Returns the set of media types supported by this parser when used
with the given parse context.
|
private java.util.HashMap<java.lang.String,java.lang.String> |
loadProperties(java.io.InputStream is)
scan the news messsage and store the metadata and data into a map
|
void |
parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata)
Deprecated.
This method will be removed in Apache Tika 1.0.
|
void |
parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Parses a document stream into a sequence of XHTML SAX events.
|
private boolean |
parseBody(byte[] value,
java.util.HashMap<java.lang.String,java.lang.String> properties) |
private boolean |
parseFooter(byte[] value,
java.util.HashMap<java.lang.String,java.lang.String> properties) |
private boolean |
parseHeader(byte[] value,
java.util.HashMap<java.lang.String,java.lang.String> properties) |
private int |
scanFormat(java.io.InputStream is) |
private void |
setFormat(int format) |
private void |
setMetadata(Metadata metadata,
java.util.HashMap<java.lang.String,java.lang.String> properties) |
private static final long serialVersionUID
private static final MediaType TYPE
private static final java.util.Set<MediaType> SUPPORTED_TYPES
private int FMT_ANPA_1312
private int FMT_ANPA_UPI
private int FMT_ANPA_UPI_DL
private int FMT_IPTC_7901
private int FMT_IPTC_PHOTO
private int FMT_IPTC_CHAR
private int FMT_NITF
private int FMT_NITF_TT
private int FMT_NITF_RB
private int FMT_IPTC_AP
private int FMT_IPTC_BLM
private int FMT_IPTC_NYT
private int FMT_IPTC_RTR
private int FORMAT
private static final char SOH
private static final char STX
private static final char ETX
private static final char EOT
private static final char SYN
private static final char BS
private static final char TB
private static final char LF
private static final char FF
private static final char CR
private static final char XQ
private static final char XS
private static final char FS
private static final char HY
private static final char SP
private static final char LT
private static final char EQ
private static final char CT
private static final char SL
private static final char SR
private static final char DL
private static final char DR
public java.util.Set<MediaType> getSupportedTypes(ParseContext context)
Parser
getSupportedTypes
in interface Parser
context
- parse contextpublic void parse(java.io.InputStream stream, org.xml.sax.ContentHandler handler, Metadata metadata, ParseContext context) throws java.io.IOException, org.xml.sax.SAXException, TikaException
Parser
The given document stream is consumed but not closed by this method. The responsibility to close the stream remains on the caller.
Information about the parsing context can be passed in the context parameter. See the parser implementations for the kinds of context information they expect.
parse
in interface Parser
stream
- the document stream (input)handler
- handler for the XHTML SAX events (output)metadata
- document metadata (input and output)context
- parse contextjava.io.IOException
- if the document stream could not be readorg.xml.sax.SAXException
- if the SAX events could not be processedTikaException
- if the document could not be parsedpublic void parse(java.io.InputStream stream, org.xml.sax.ContentHandler handler, Metadata metadata) throws java.io.IOException, org.xml.sax.SAXException, TikaException
java.io.IOException
org.xml.sax.SAXException
TikaException
private java.util.HashMap<java.lang.String,java.lang.String> loadProperties(java.io.InputStream is)
private int scanFormat(java.io.InputStream is)
private void setFormat(int format)
private java.lang.String getFormatName()
private byte[] getSection(java.io.InputStream is, java.lang.String name)
private byte[] getSection(java.io.InputStream is, int maxsize, byte bstart, byte bfinish, boolean ifincomplete)
private boolean parseHeader(byte[] value, java.util.HashMap<java.lang.String,java.lang.String> properties)
private boolean parseBody(byte[] value, java.util.HashMap<java.lang.String,java.lang.String> properties)
private boolean parseFooter(byte[] value, java.util.HashMap<java.lang.String,java.lang.String> properties)
private void setMetadata(Metadata metadata, java.util.HashMap<java.lang.String,java.lang.String> properties)
private java.lang.String clean(java.lang.String value)