public class LanguageProfile
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
private static class |
LanguageProfile.Counter |
private class |
LanguageProfile.Interleaved |
Modifier and Type | Field and Description |
---|---|
private long |
count
The sum of all ngram counts in this profile.
|
static int |
DEFAULT_NGRAM_LENGTH |
private LanguageProfile.Interleaved |
interleaved
Sorted ngram cache for faster distance calculation.
|
private int |
length |
private java.util.Map<java.lang.String,LanguageProfile.Counter> |
ngrams
The ngrams that make up this profile.
|
static boolean |
useInterleaved |
Constructor and Description |
---|
LanguageProfile() |
LanguageProfile(int length) |
LanguageProfile(java.lang.String content) |
LanguageProfile(java.lang.String content,
int length) |
Modifier and Type | Method and Description |
---|---|
void |
add(java.lang.String ngram)
Adds a single occurrence of the given ngram to this profile.
|
void |
add(java.lang.String ngram,
long count)
Adds multiple occurrences of the given ngram to this profile.
|
double |
distance(LanguageProfile that)
Calculates the geometric distance between this and the given
other language profile.
|
private double |
distanceInterleaved(LanguageProfile that) |
private double |
distanceStandard(LanguageProfile that) |
long |
getCount() |
long |
getCount(java.lang.String ngram) |
private double |
square(double count) |
java.lang.String |
toString() |
private LanguageProfile.Interleaved |
updateInterleaved() |
public static final int DEFAULT_NGRAM_LENGTH
private final int length
private final java.util.Map<java.lang.String,LanguageProfile.Counter> ngrams
private LanguageProfile.Interleaved interleaved
public static boolean useInterleaved
private long count
public LanguageProfile(int length)
public LanguageProfile()
public LanguageProfile(java.lang.String content, int length)
public LanguageProfile(java.lang.String content)
public long getCount()
public long getCount(java.lang.String ngram)
public void add(java.lang.String ngram)
ngram
- the ngrampublic void add(java.lang.String ngram, long count)
ngram
- the ngramcount
- number of occurrences to addpublic double distance(LanguageProfile that)
that
- the other language profileprivate double distanceStandard(LanguageProfile that)
public java.lang.String toString()
toString
in class java.lang.Object
private double distanceInterleaved(LanguageProfile that)
private double square(double count)
private LanguageProfile.Interleaved updateInterleaved()