public final class FastMatcher
extends java.lang.Object
Constructor and Description |
---|
FastMatcher() |
FastMatcher(java.io.File file) |
FastMatcher(java.io.File file,
Analyzer analyzer) |
FastMatcher(java.io.File file,
Analyzer analyzer,
boolean caseSensitive) |
FastMatcher(java.io.InputStream is) |
FastMatcher(java.io.InputStream is,
Analyzer analyzer) |
FastMatcher(java.io.InputStream is,
Analyzer analyzer,
boolean caseSensitive) |
Modifier and Type | Method and Description |
---|---|
int |
loadTerm(java.lang.String term,
Analyzer analyzer)
Load a term to the fast matcher, by default the standard delimiters will be ignored
|
int |
loadTerm(java.lang.String term,
Analyzer analyzer,
boolean ignoreDelimiters)
Load a term to the fast matcher, by default the loading will be case sensitive
|
int |
loadTerm(java.lang.String term,
Analyzer analyzer,
boolean ignoreDelimiters,
boolean caseSensitive)
Load a term to the fast matcher
|
int |
loadTerms(java.io.File file)
Load a set of terms to the fast matcher from a file listing terms one per line
|
int |
loadTerms(java.io.File file,
Analyzer analyzer,
boolean caseSensitive)
Load a set of terms to the fast matcher from a file listing terms one per line
|
int |
loadTerms(java.io.File file,
boolean caseSensitive)
Load a set of terms to the fast matcher from a file listing terms one per line
|
int |
loadTerms(java.io.InputStream is,
Analyzer analyzer,
boolean caseSensitive)
Load a set of term to the fast matcher from an input stream
|
java.util.List<OffsetPosition> |
matchCharacter(java.lang.String text)
Gives the character positions within a text where matches occur.
|
java.util.List<OffsetPosition> |
matchCharacter(java.lang.String text,
boolean caseSensitive)
Gives the character positions within a text where matches occur.
|
java.util.List<OffsetPosition> |
matchCharacterLayoutToken(java.util.List<LayoutToken> tokens)
Gives the character positions within a tokenized text where matches occur.
|
java.util.List<OffsetPosition> |
matchCharacterLayoutToken(java.util.List<LayoutToken> tokens,
boolean caseSensitive)
Gives the character positions within a tokenized text where matches occur.
|
java.util.List<OffsetPosition> |
matcherPairs(java.util.List<Pair<java.lang.String,java.lang.String>> tokens)
Identify terms in a piece of text and gives corresponding token positions.
|
java.util.List<OffsetPosition> |
matcherPairs(java.util.List<Pair<java.lang.String,java.lang.String>> tokens,
boolean caseSensitive)
Identify terms in a piece of text and gives corresponding token positions.
|
java.util.List<OffsetPosition> |
matchLayoutToken(java.util.List<LayoutToken> tokens)
Identify terms in a piece of text and gives corresponding token positions.
|
java.util.List<OffsetPosition> |
matchLayoutToken(java.util.List<LayoutToken> tokens,
boolean ignoreDelimiters,
boolean caseSensitive)
Identify terms in a piece of text and gives corresponding token positions.
|
java.util.List<OffsetPosition> |
matchToken(java.lang.String text)
Identify terms in a piece of text and gives corresponding token positions.
|
java.util.List<OffsetPosition> |
matchToken(java.lang.String text,
boolean caseSensitive)
Identify terms in a piece of text and gives corresponding token positions.
|
protected java.lang.String |
processToken(java.lang.String token)
Process token, if different than @newline
|
public FastMatcher()
public FastMatcher(java.io.File file)
public FastMatcher(java.io.File file, Analyzer analyzer)
public FastMatcher(java.io.File file, Analyzer analyzer, boolean caseSensitive)
public FastMatcher(java.io.InputStream is)
public FastMatcher(java.io.InputStream is, Analyzer analyzer)
public FastMatcher(java.io.InputStream is, Analyzer analyzer, boolean caseSensitive)
public int loadTerms(java.io.File file) throws java.io.IOException
java.io.IOException
public int loadTerms(java.io.File file, boolean caseSensitive) throws java.io.IOException
java.io.IOException
public int loadTerms(java.io.File file, Analyzer analyzer, boolean caseSensitive) throws java.io.IOException
java.io.IOException
public int loadTerms(java.io.InputStream is, Analyzer analyzer, boolean caseSensitive) throws java.io.IOException
java.io.IOException
public int loadTerm(java.lang.String term, Analyzer analyzer)
public int loadTerm(java.lang.String term, Analyzer analyzer, boolean ignoreDelimiters)
public int loadTerm(java.lang.String term, Analyzer analyzer, boolean ignoreDelimiters, boolean caseSensitive)
public java.util.List<OffsetPosition> matchToken(java.lang.String text)
text:
- the text to be processedpublic java.util.List<OffsetPosition> matchToken(java.lang.String text, boolean caseSensitive)
text:
- the text to be processedcaseSensitive:
- ensure case sensitive matching or notpublic java.util.List<OffsetPosition> matchLayoutToken(java.util.List<LayoutToken> tokens)
tokens
- the text to be processed as a list of LayoutToken objectspublic java.util.List<OffsetPosition> matchLayoutToken(java.util.List<LayoutToken> tokens, boolean ignoreDelimiters, boolean caseSensitive)
tokens
- the text to be processed as a list of LayoutToken objectsignoreDelimiters
- if true, ignore the delimiters in the matching processcaseSensitive:
- ensure case sensitive matching or notpublic java.util.List<OffsetPosition> matchCharacter(java.lang.String text)
By iterating over the OffsetPosition and applying substring, we get all the matches.
All the matches are returned.
text:
- the text to be processedcaseSensitive:
- ensure case sensitive matching or notpublic java.util.List<OffsetPosition> matchCharacter(java.lang.String text, boolean caseSensitive)
By iterating over the OffsetPosition and applying substring, we get all the matches.
All the matches are returned.
text:
- the text to be processedcaseSensitive:
- ensure case sensitive matching or notpublic java.util.List<OffsetPosition> matchCharacterLayoutToken(java.util.List<LayoutToken> tokens)
All the matches are returned.
tokens
- the text to be processed as a list of LayoutToken objectspublic java.util.List<OffsetPosition> matchCharacterLayoutToken(java.util.List<LayoutToken> tokens, boolean caseSensitive)
All the matches are returned.
tokens
- the text to be processed as a list of LayoutToken objectscaseSensitive
- ensure case sensitive matching or notpublic java.util.List<OffsetPosition> matcherPairs(java.util.List<Pair<java.lang.String,java.lang.String>> tokens)
tokens:
- the text to be processedpublic java.util.List<OffsetPosition> matcherPairs(java.util.List<Pair<java.lang.String,java.lang.String>> tokens, boolean caseSensitive)
tokens:
- the text to be processedcaseSensitive:
- ensure case sensitive matching or notprotected java.lang.String processToken(java.lang.String token)