public class FeatureFactory
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
java.util.regex.Pattern |
acronym |
static java.util.List<java.lang.String> |
COUNTRY_CODES |
java.util.regex.Pattern |
email |
java.util.regex.Pattern |
email2 |
java.util.regex.Pattern |
http |
java.util.regex.Pattern |
isDigit |
java.util.regex.Pattern |
isPunct |
static java.util.List<java.lang.String> |
KEYWORDSPUB |
static java.util.List<java.lang.String> |
KIND_CODES |
Lexicon |
lexicon |
static java.util.List<java.lang.String> |
MONTHS |
boolean |
newline |
java.util.regex.Pattern |
year |
Modifier and Type | Method and Description |
---|---|
static FeatureFactory |
getInstance() |
java.lang.String |
getPattern(java.lang.String text)
Transform a text in a text pattern where punctuations are ignored, number shadowed and
remaining text in lowercase
|
int |
linearScaling(double pos,
double total,
int nbBins)
Given an double value between 0.0 and total, discretized into nbBins following a linear scale
|
int |
linearScaling(int pos,
int total,
int nbBins)
Given an integer value between 0 and total, discretized into nbBins following a linear scale
|
int |
logScaling(double pos,
double total,
int nbBins)
Given an double value between 0.0 and total, discretized into nbBins following a log scale
|
boolean |
test_all_capital(java.lang.String tok)
Test if all the letters of the string are capital letters
(characters can be also digits which are then ignored)
|
boolean |
test_char(java.lang.String tok,
char c)
Test for a given character occurrence in the string
|
boolean |
test_city(java.lang.String tok)
Test if the current string refers to a known city
|
boolean |
test_common(java.lang.String tok)
Test if the current string is a common name
|
boolean |
test_complex_number(java.lang.String tok)
Test for the current string is a number or a decimal number, i.e.
|
boolean |
test_country_codes(java.lang.String tok)
Test if the current string refers to country code
|
boolean |
test_country(java.lang.String tok)
Test if the current string refers to a country
|
static boolean |
test_digit(java.lang.String tok)
Test for the current string contains at least one digit
|
boolean |
test_first_capital(java.lang.String tok)
Test if the first letter of the string is a capital letter
|
boolean |
test_first_names(java.lang.String tok)
Test if the current string is a family name
|
boolean |
test_kind_codes(java.lang.String tok)
Test if the current string refers to a kind code
|
boolean |
test_last_names(java.lang.String tok)
Test if the current string is a family name
|
boolean |
test_month(java.lang.String tok)
Test if the current string refers to a month
|
boolean |
test_names(java.lang.String tok)
Test if the current string is a first name or family name
|
boolean |
test_number(java.lang.String tok)
Test for the current string contains only digit
|
public boolean newline
public Lexicon lexicon
public java.util.regex.Pattern year
public java.util.regex.Pattern http
public java.util.regex.Pattern isDigit
public java.util.regex.Pattern email2
public java.util.regex.Pattern email
public java.util.regex.Pattern acronym
public java.util.regex.Pattern isPunct
public static java.util.List<java.lang.String> KEYWORDSPUB
public static java.util.List<java.lang.String> MONTHS
public static java.util.List<java.lang.String> COUNTRY_CODES
public static java.util.List<java.lang.String> KIND_CODES
public static FeatureFactory getInstance()
public boolean test_first_capital(java.lang.String tok)
public boolean test_all_capital(java.lang.String tok)
public boolean test_char(java.lang.String tok, char c)
public static boolean test_digit(java.lang.String tok)
public boolean test_number(java.lang.String tok)
public boolean test_complex_number(java.lang.String tok)
public boolean test_common(java.lang.String tok)
public boolean test_names(java.lang.String tok)
public boolean test_first_names(java.lang.String tok)
public boolean test_last_names(java.lang.String tok)
public boolean test_month(java.lang.String tok)
public boolean test_country_codes(java.lang.String tok)
public boolean test_kind_codes(java.lang.String tok)
public boolean test_country(java.lang.String tok)
public boolean test_city(java.lang.String tok)
public int linearScaling(int pos, int total, int nbBins)
public int linearScaling(double pos, double total, int nbBins)
public int logScaling(double pos, double total, int nbBins)
public java.lang.String getPattern(java.lang.String text)