public class Lexicon
extends java.lang.Object
Modifier and Type | Method and Description |
---|---|
void |
addDictionary(java.lang.String path,
java.lang.String lang) |
void |
addFirstNames(java.lang.String path) |
void |
addLastNames(java.lang.String path) |
java.util.List<OffsetPosition> |
charPositionsLocationNames(java.util.List<LayoutToken> s)
Soft look-up in location name gazetteer for a list of LayoutToken, return a list of
positions referring to the character positions in the input sequence.
|
java.util.List<OffsetPosition> |
charPositionsLocationNames(java.lang.String s)
Soft look-up in location name gazetteer for a string, return a list of positions referring
to the character positions within the string.
|
java.util.List<OffsetPosition> |
charPositionsOrganisationNames(java.util.List<LayoutToken> s)
Soft look-up in organisation names gazetteer for a tokenize sequence.
|
java.util.List<OffsetPosition> |
charPositionsOrganisationNames(java.lang.String s)
Soft look-up in organisation names gazetteer for a string.
|
java.util.List<OffsetPosition> |
charPositionsOrgForm(java.util.List<LayoutToken> s)
Soft look-up in org form names gazetteer for a tokenized string.
|
java.util.List<OffsetPosition> |
charPositionsOrgForm(java.lang.String s)
Soft look-up in org form names gazetteer for a string.
|
java.util.List<OffsetPosition> |
charPositionsPersonTitle(java.util.List<LayoutToken> s)
Soft look-up in person title name gazetteer for a list of LayoutToken.
|
java.util.List<OffsetPosition> |
charPositionsPersonTitle(java.lang.String s)
Soft look-up in person title name gazetteer for a string.
|
java.lang.String |
getCountryCode(java.lang.String country) |
static Lexicon |
getInstance() |
boolean |
inDictionary(java.lang.String s)
Lexical look-up, default is English
|
boolean |
inDictionary(java.lang.String s,
java.lang.String lang) |
boolean |
inFirstNames(java.lang.String s)
Look-up in first name gazetteer
|
void |
initCities() |
void |
initCollaborations() |
void |
initConferences() |
void |
initJournals() |
void |
initLocations() |
void |
initOrganisations() |
void |
initOrgForms() |
void |
initPersonSuffix() |
void |
initPersonTitles() |
void |
initPublishers() |
boolean |
inLastNames(java.lang.String s)
Look-up in last name gazetteer
|
boolean |
isCountry(java.lang.String tok) |
boolean |
isPunctuation(java.lang.String s)
Indicate if we have a punctuation
|
java.lang.String |
mapLanguageCode(java.lang.String code)
Map the language codes used by the language identifier component to the normal
language name.
|
java.util.List<OffsetPosition> |
tokenPositionsAbbrevJournalNames(java.util.List<LayoutToken> s)
Soft look-up in journal abbreviated name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsAbbrevJournalNames(java.lang.String s)
Soft look-up in journal abbreviated name gazetteer with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsArXivPattern(java.util.List<LayoutToken> tokens,
java.lang.String text)
Identify in tokenized input the positions of the arXiv identifier patterns
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsCityNames(java.util.List<LayoutToken> s)
Soft look-up in city name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsCityNames(java.lang.String s)
Soft look-up in city name gazetteer for a given string with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsCollaborationNames(java.util.List<LayoutToken> s)
Soft look-up in collaboration name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsConferenceNames(java.util.List<LayoutToken> s)
Soft look-up in conference/proceedings name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsConferenceNames(java.lang.String s)
Soft look-up in conference/proceedings name gazetteer with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsDOIPattern(java.util.List<LayoutToken> tokens,
java.lang.String text)
Identify in tokenized input the positions of the DOI patterns with token positons
|
java.util.List<OffsetPosition> |
tokenPositionsIdentifierPattern(java.util.List<LayoutToken> tokens)
Identify in tokenized input the positions of identifier patterns with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsISBNPattern(java.util.List<LayoutToken> tokens)
Identify in tokenized input the positions of ISBN patterns with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsISSNPattern(java.util.List<LayoutToken> tokens)
Identify in tokenized input the positions of ISSN patterns with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsJournalNames(java.util.List<LayoutToken> s)
Soft look-up in journal name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsJournalNames(java.lang.String s)
Soft look-up in journal name gazetteer with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsLocationNames(java.util.List<LayoutToken> s)
Soft look-up in location name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsLocationNames(java.lang.String s)
Soft look-up in location name gazetteer for a given string with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsOrganisationNames(java.util.List<LayoutToken> s)
Soft look-up in organisation name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsOrganisationNames(java.lang.String s)
Soft look-up in organisation name gazetteer for a given string with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsOrgForm(java.util.List<LayoutToken> s)
Soft look-up in organisation form name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsOrgForm(java.lang.String s)
Soft look-up in organisation form name gazetteer for a given string with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsPersonSuffix(java.util.List<LayoutToken> s)
Soft look-up in person name suffix gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsPersonTitle(java.util.List<LayoutToken> s)
Soft look-up in person title gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsPersonTitle(java.lang.String s)
Soft look-up in person title gazetteer for a given string with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsPublisherNames(java.util.List<LayoutToken> s)
Soft look-up in publisher name gazetteer for a given list of LayoutToken objects
with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsPublisherNames(java.lang.String s)
Soft look-up in conference/proceedings name gazetteer with token positions
|
java.util.List<OffsetPosition> |
tokenPositionsUrlPattern(java.util.List<LayoutToken> tokens)
Identify in tokenized input the positions of an URL pattern with token positions
|
public static Lexicon getInstance()
public final void addDictionary(java.lang.String path, java.lang.String lang)
public boolean isCountry(java.lang.String tok)
public java.lang.String getCountryCode(java.lang.String country)
public final void addFirstNames(java.lang.String path)
public final void addLastNames(java.lang.String path)
public boolean inDictionary(java.lang.String s)
s
- a string to testpublic boolean inDictionary(java.lang.String s, java.lang.String lang)
public void initJournals()
public void initConferences()
public void initPublishers()
public void initCities()
public void initCollaborations()
public void initOrganisations()
public void initOrgForms()
public void initLocations()
public void initPersonTitles()
public void initPersonSuffix()
public boolean inFirstNames(java.lang.String s)
public boolean inLastNames(java.lang.String s)
public boolean isPunctuation(java.lang.String s)
public java.lang.String mapLanguageCode(java.lang.String code)
code
- the language to be mappedpublic java.util.List<OffsetPosition> tokenPositionsJournalNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsJournalNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsAbbrevJournalNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsAbbrevJournalNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsConferenceNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsConferenceNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsPublisherNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsPublisherNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsCollaborationNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsCityNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsCityNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsOrganisationNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsOrganisationNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> charPositionsOrganisationNames(java.lang.String s)
s
- the input stringpublic java.util.List<OffsetPosition> charPositionsOrganisationNames(java.util.List<LayoutToken> s)
s
- the input list of LayoutTokenpublic java.util.List<OffsetPosition> tokenPositionsOrgForm(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsOrgForm(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> charPositionsOrgForm(java.lang.String s)
s
- the input stringpublic java.util.List<OffsetPosition> charPositionsOrgForm(java.util.List<LayoutToken> s)
s
- the input list of LayoutTokenpublic java.util.List<OffsetPosition> tokenPositionsLocationNames(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsLocationNames(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> charPositionsLocationNames(java.lang.String s)
s
- the input stringpublic java.util.List<OffsetPosition> charPositionsLocationNames(java.util.List<LayoutToken> s)
s
- the input list of LayoutTokenpublic java.util.List<OffsetPosition> tokenPositionsPersonTitle(java.lang.String s)
public java.util.List<OffsetPosition> tokenPositionsPersonTitle(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> tokenPositionsPersonSuffix(java.util.List<LayoutToken> s)
public java.util.List<OffsetPosition> charPositionsPersonTitle(java.lang.String s)
s
- the input stringpublic java.util.List<OffsetPosition> charPositionsPersonTitle(java.util.List<LayoutToken> s)
s
- the input list of LayoutTokenpublic java.util.List<OffsetPosition> tokenPositionsIdentifierPattern(java.util.List<LayoutToken> tokens)
public java.util.List<OffsetPosition> tokenPositionsDOIPattern(java.util.List<LayoutToken> tokens, java.lang.String text)
public java.util.List<OffsetPosition> tokenPositionsArXivPattern(java.util.List<LayoutToken> tokens, java.lang.String text)
public java.util.List<OffsetPosition> tokenPositionsISSNPattern(java.util.List<LayoutToken> tokens)
public java.util.List<OffsetPosition> tokenPositionsISBNPattern(java.util.List<LayoutToken> tokens)
public java.util.List<OffsetPosition> tokenPositionsUrlPattern(java.util.List<LayoutToken> tokens)