public class PDFALTOSaxHandler
extends org.xml.sax.helpers.DefaultHandler
Modifier and Type | Field and Description |
---|---|
static Logger |
LOGGER |
Constructor and Description |
---|
PDFALTOSaxHandler(Document d,
java.util.List<GraphicObject> im) |
Modifier and Type | Method and Description |
---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
Analyzer |
getAnalyzer() |
java.util.List<LayoutToken> |
getTokenization() |
void |
setAnalyzer(Analyzer analyzer) |
void |
startElement(java.lang.String namespaceURI,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
java.lang.String |
trimAndNormaliseText(java.lang.String content) |
public PDFALTOSaxHandler(Document d, java.util.List<GraphicObject> im)
public void setAnalyzer(Analyzer analyzer)
public Analyzer getAnalyzer()
public java.util.List<LayoutToken> getTokenization()
public void characters(char[] ch, int start, int length)
characters
in interface org.xml.sax.ContentHandler
characters
in class org.xml.sax.helpers.DefaultHandler
public java.lang.String trimAndNormaliseText(java.lang.String content)
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws org.xml.sax.SAXException
endElement
in interface org.xml.sax.ContentHandler
endElement
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endDocument()
endDocument
in interface org.xml.sax.ContentHandler
endDocument
in class org.xml.sax.helpers.DefaultHandler
public void startElement(java.lang.String namespaceURI, java.lang.String localName, java.lang.String qName, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
startElement
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException