public class GrobidAnalyzer extends java.lang.Object implements Analyzer
Modifier and Type | Method and Description |
---|---|
static GrobidAnalyzer |
getInstance() |
java.lang.String |
getName() |
java.util.List<java.lang.String> |
retokenize(java.util.List<java.lang.String> textTokenized)
Re-tokenizer entry point to be applied to text already tokenized in the PDF representation
|
java.util.List<java.lang.String> |
retokenize(java.util.List<java.lang.String> textTokenized,
Language lang) |
java.util.List<java.lang.String> |
tokenize(java.lang.String text)
Tokenizer entry point
|
java.util.List<java.lang.String> |
tokenize(java.lang.String text,
Language lang) |
java.util.List<LayoutToken> |
tokenizeWithLayoutToken(java.lang.String text) |
java.util.List<LayoutToken> |
tokenizeWithLayoutToken(java.lang.String text,
Language lang) |
public static GrobidAnalyzer getInstance()
public java.util.List<java.lang.String> tokenize(java.lang.String text)
public java.util.List<java.lang.String> tokenize(java.lang.String text, Language lang)
public java.util.List<java.lang.String> retokenize(java.util.List<java.lang.String> textTokenized)
retokenize
in interface Analyzer
public java.util.List<java.lang.String> retokenize(java.util.List<java.lang.String> textTokenized, Language lang)
public java.util.List<LayoutToken> tokenizeWithLayoutToken(java.lang.String text)
tokenizeWithLayoutToken
in interface Analyzer
public java.util.List<LayoutToken> tokenizeWithLayoutToken(java.lang.String text, Language lang)