public class PDFALTOSaxHandler
extends org.xml.sax.helpers.DefaultHandler
| Modifier and Type | Field and Description |
|---|---|
static Logger |
LOGGER |
| Constructor and Description |
|---|
PDFALTOSaxHandler(Document d,
java.util.List<GraphicObject> im) |
| Modifier and Type | Method and Description |
|---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
Analyzer |
getAnalyzer() |
java.util.List<LayoutToken> |
getTokenization() |
void |
setAnalyzer(Analyzer analyzer) |
void |
startElement(java.lang.String namespaceURI,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
java.lang.String |
trimAndNormaliseText(java.lang.String content) |
public PDFALTOSaxHandler(Document d, java.util.List<GraphicObject> im)
public void setAnalyzer(Analyzer analyzer)
public Analyzer getAnalyzer()
public java.util.List<LayoutToken> getTokenization()
public void characters(char[] ch,
int start,
int length)
characters in interface org.xml.sax.ContentHandlercharacters in class org.xml.sax.helpers.DefaultHandlerpublic java.lang.String trimAndNormaliseText(java.lang.String content)
public void endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
throws org.xml.sax.SAXException
endElement in interface org.xml.sax.ContentHandlerendElement in class org.xml.sax.helpers.DefaultHandlerorg.xml.sax.SAXExceptionpublic void endDocument()
endDocument in interface org.xml.sax.ContentHandlerendDocument in class org.xml.sax.helpers.DefaultHandlerpublic void startElement(java.lang.String namespaceURI,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
throws org.xml.sax.SAXException
startElement in interface org.xml.sax.ContentHandlerstartElement in class org.xml.sax.helpers.DefaultHandlerorg.xml.sax.SAXException