Optimize imports
Reformatted code (Java convention; tab is 4 spaces)
This commit is contained in:
parent
5edf6d8126
commit
a45df04a26
@ -7,4 +7,5 @@ public interface Intervalable extends Comparable {
|
||||
public int getEnd();
|
||||
|
||||
public int size();
|
||||
|
||||
}
|
||||
|
||||
@ -74,11 +74,11 @@ public class State {
|
||||
return nextState(character, false);
|
||||
}
|
||||
|
||||
public State nextStateIgnoreRootState(final Character character) {
|
||||
public State nextStateIgnoreRootState(Character character) {
|
||||
return nextState(character, true);
|
||||
}
|
||||
|
||||
public State addState(final String keyword) {
|
||||
public State addState(String keyword) {
|
||||
State state = this;
|
||||
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
@ -88,7 +88,7 @@ public class State {
|
||||
return state;
|
||||
}
|
||||
|
||||
public State addState(final Character character) {
|
||||
public State addState(Character character) {
|
||||
State nextState = nextStateIgnoreRootState(character);
|
||||
if (nextState == null) {
|
||||
nextState = new State(this.depth + 1);
|
||||
@ -101,14 +101,14 @@ public class State {
|
||||
return this.depth;
|
||||
}
|
||||
|
||||
public void addEmit(final String keyword) {
|
||||
public void addEmit(String keyword) {
|
||||
if (this.emits == null) {
|
||||
this.emits = new TreeSet<>();
|
||||
}
|
||||
this.emits.add(keyword);
|
||||
}
|
||||
|
||||
public void addEmit(final Collection<String> emits) {
|
||||
public void addEmit(Collection<String> emits) {
|
||||
for (String emit : emits) {
|
||||
addEmit(emit);
|
||||
}
|
||||
|
||||
@ -1,20 +1,22 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import static java.lang.Character.isWhitespace;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import org.ahocorasick.interval.IntervalTree;
|
||||
import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
|
||||
import static java.lang.Character.isWhitespace;
|
||||
|
||||
/**
|
||||
* Based on the Aho-Corasick white paper, Bell technologies:
|
||||
* http://cr.yp.to/bib/1975/aho.pdf
|
||||
*
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
public class Trie {
|
||||
@ -27,21 +29,20 @@ public class Trie {
|
||||
this.trieConfig = trieConfig;
|
||||
this.rootState = new State();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used by the builder to add a text search keyword.
|
||||
*
|
||||
*
|
||||
* @param keyword The search term to add to the list of search terms.
|
||||
*
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
private void addKeyword(String keyword) {
|
||||
if( keyword.isEmpty() ) {
|
||||
return;
|
||||
if (keyword.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if( isCaseInsensitive() ) {
|
||||
keyword = keyword.toLowerCase();
|
||||
|
||||
if (isCaseInsensitive()) {
|
||||
keyword = keyword.toLowerCase();
|
||||
}
|
||||
|
||||
addState(keyword).addEmit(keyword);
|
||||
@ -49,44 +50,44 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Delegates to addKeyword.
|
||||
*
|
||||
*
|
||||
* @param keywords List of search term to add to the list of search terms.
|
||||
*/
|
||||
private void addKeywords( final String[] keywords ) {
|
||||
for( final String keyword : keywords ) {
|
||||
addKeyword( keyword );
|
||||
}
|
||||
private void addKeywords(final String[] keywords) {
|
||||
for (final String keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Delegates to addKeyword.
|
||||
*
|
||||
*
|
||||
* @param keywords List of search term to add to the list of search terms.
|
||||
*/
|
||||
private void addKeywords( final Collection<String> keywords ) {
|
||||
for( final String keyword : keywords ) {
|
||||
addKeyword( keyword );
|
||||
}
|
||||
private void addKeywords(final Collection<String> keywords) {
|
||||
for (final String keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
private State addState(final String keyword) {
|
||||
return getRootState().addState(keyword);
|
||||
}
|
||||
|
||||
|
||||
public Collection<Token> tokenize(final String text) {
|
||||
final Collection<Token> tokens = new ArrayList<>();
|
||||
final Collection<Emit> collectedEmits = parseText(text);
|
||||
int lastCollectedPosition = -1;
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if (emit.getStart() - lastCollectedPosition > 1) {
|
||||
tokens.add(createFragment(emit, text, lastCollectedPosition));
|
||||
}
|
||||
|
||||
|
||||
tokens.add(createMatch(emit, text));
|
||||
lastCollectedPosition = emit.getEnd();
|
||||
}
|
||||
|
||||
|
||||
if (text.length() - lastCollectedPosition > 1) {
|
||||
tokens.add(createFragment(null, text, lastCollectedPosition));
|
||||
}
|
||||
@ -95,11 +96,11 @@ public class Trie {
|
||||
}
|
||||
|
||||
private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) {
|
||||
return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart()));
|
||||
return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
|
||||
}
|
||||
|
||||
private Token createMatch(Emit emit, String text) {
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit);
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@ -118,7 +119,7 @@ public class Trie {
|
||||
}
|
||||
|
||||
if (!trieConfig.isAllowOverlaps()) {
|
||||
IntervalTree intervalTree = new IntervalTree((List<Intervalable>)(List<?>)collectedEmits);
|
||||
IntervalTree intervalTree = new IntervalTree((List<Intervalable>) (List<?>) collectedEmits);
|
||||
intervalTree.removeOverlaps((List<Intervalable>) (List<?>) collectedEmits);
|
||||
}
|
||||
|
||||
@ -131,15 +132,15 @@ public class Trie {
|
||||
|
||||
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
|
||||
State currentState = getRootState();
|
||||
|
||||
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
Character character = text.charAt(position);
|
||||
|
||||
|
||||
// TODO: Maybe lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
}
|
||||
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) {
|
||||
return;
|
||||
@ -149,7 +150,7 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* The first matching text sequence.
|
||||
*
|
||||
*
|
||||
* @param text The text to search for keywords.
|
||||
* @return null if no matches found.
|
||||
*/
|
||||
@ -164,18 +165,18 @@ public class Trie {
|
||||
} else {
|
||||
// Fast path. Returns first match found.
|
||||
State currentState = getRootState();
|
||||
|
||||
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
Character character = text.charAt(position);
|
||||
|
||||
|
||||
// TODO: Lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
}
|
||||
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
Collection<String> emitStrs = currentState.emit();
|
||||
|
||||
|
||||
if (emitStrs != null && !emitStrs.isEmpty()) {
|
||||
for (final String emitStr : emitStrs) {
|
||||
final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr);
|
||||
@ -190,26 +191,26 @@ public class Trie {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isPartialMatch(final CharSequence searchText, final Emit emit) {
|
||||
return (emit.getStart() != 0 &&
|
||||
Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
(emit.getEnd() + 1 != searchText.length() &&
|
||||
Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
(emit.getEnd() + 1 != searchText.length() &&
|
||||
Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
}
|
||||
|
||||
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
final List<Emit> removeEmits = new ArrayList<>();
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if (isPartialMatch(searchText, emit)) {
|
||||
removeEmits.add(emit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (final Emit removeEmit : removeEmits) {
|
||||
collectedEmits.remove(removeEmit);
|
||||
}
|
||||
@ -218,15 +219,15 @@ public class Trie {
|
||||
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
final long size = searchText.length();
|
||||
final List<Emit> removeEmits = new ArrayList<>();
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) &&
|
||||
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
|
||||
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
|
||||
continue;
|
||||
}
|
||||
removeEmits.add(emit);
|
||||
}
|
||||
|
||||
|
||||
for (final Emit removeEmit : removeEmits) {
|
||||
collectedEmits.remove(removeEmit);
|
||||
}
|
||||
@ -234,12 +235,12 @@ public class Trie {
|
||||
|
||||
private State getState(State currentState, final Character character) {
|
||||
State newCurrentState = currentState.nextState(character);
|
||||
|
||||
|
||||
while (newCurrentState == null) {
|
||||
currentState = currentState.failure();
|
||||
newCurrentState = currentState.nextState(character);
|
||||
}
|
||||
|
||||
|
||||
return newCurrentState;
|
||||
}
|
||||
|
||||
@ -276,7 +277,7 @@ public class Trie {
|
||||
private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) {
|
||||
boolean emitted = false;
|
||||
final Collection<String> emits = currentState.emit();
|
||||
|
||||
|
||||
// TODO: The check for empty might be superfluous.
|
||||
if (emits != null && !emits.isEmpty()) {
|
||||
for (final String emit : emits) {
|
||||
@ -284,21 +285,21 @@ public class Trie {
|
||||
emitted = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return emitted;
|
||||
}
|
||||
|
||||
private boolean isCaseInsensitive() {
|
||||
return trieConfig.isCaseInsensitive();
|
||||
return trieConfig.isCaseInsensitive();
|
||||
}
|
||||
|
||||
|
||||
private State getRootState() {
|
||||
return this.rootState;
|
||||
return this.rootState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a fluent interface for constructing Trie instances.
|
||||
*
|
||||
*
|
||||
* @return The builder used to configure its Trie.
|
||||
*/
|
||||
public static TrieBuilder builder() {
|
||||
@ -314,14 +315,15 @@ public class Trie {
|
||||
/**
|
||||
* Default (empty) constructor.
|
||||
*/
|
||||
private TrieBuilder() {}
|
||||
private TrieBuilder() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore case when searching for keywords in
|
||||
* the text. This must be called before calling addKeyword because
|
||||
* the algorithm converts keywords to lowercase as they are added,
|
||||
* depending on this case sensitivity setting.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreCase() {
|
||||
@ -331,7 +333,7 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore overlapping keywords.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreOverlaps() {
|
||||
@ -341,9 +343,8 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Adds a keyword to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keyword The keyword to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
@ -351,34 +352,32 @@ public class Trie {
|
||||
this.trie.addKeyword(keyword);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keywords The keywords to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final String... keywords) {
|
||||
this.trie.addKeywords(keywords);
|
||||
return this;
|
||||
this.trie.addKeywords(keywords);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keywords The keywords to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final Collection<String> keywords) {
|
||||
this.trie.addKeywords(keywords);
|
||||
return this;
|
||||
this.trie.addKeywords(keywords);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords in the text.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWords() {
|
||||
@ -390,7 +389,7 @@ public class Trie {
|
||||
* Configure the Trie to match whole keywords that are separated by
|
||||
* whitespace in the text. For example, "this keyword thatkeyword"
|
||||
* would only match the first occurrence of "keyword".
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
|
||||
@ -401,7 +400,7 @@ public class Trie {
|
||||
/**
|
||||
* Configure the Trie to stop after the first keyword is found in the
|
||||
* text.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder stopOnHit() {
|
||||
@ -411,27 +410,25 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Configure the Trie based on the builder settings.
|
||||
*
|
||||
*
|
||||
* @return The configured Trie.
|
||||
*/
|
||||
public Trie build() {
|
||||
this.trie.constructFailureStates();
|
||||
return this.trie;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated Use ignoreCase()
|
||||
*
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreCase()
|
||||
*/
|
||||
public TrieBuilder caseInsensitive() {
|
||||
return ignoreCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use ignoreOverlaps()
|
||||
*
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreOverlaps()
|
||||
*/
|
||||
public TrieBuilder removeOverlaps() {
|
||||
return ignoreOverlaps();
|
||||
|
||||
@ -3,20 +3,20 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorByPositionTest {
|
||||
|
||||
@Test
|
||||
public void sortOnPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
sort(intervals, new IntervalableComparatorByPosition());
|
||||
Collections.sort(intervals, new IntervalableComparatorByPosition());
|
||||
assertEquals(4, intervals.get(0).size());
|
||||
assertEquals(6, intervals.get(1).size());
|
||||
assertEquals(2, intervals.get(2).size());
|
||||
|
||||
@ -3,20 +3,20 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorBySizeTest {
|
||||
|
||||
@Test
|
||||
public void sortOnSize() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
Collections.sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(6, intervals.get(0).size());
|
||||
assertEquals(4, intervals.get(1).size());
|
||||
assertEquals(2, intervals.get(2).size());
|
||||
@ -24,10 +24,10 @@ public class IntervalableComparatorBySizeTest {
|
||||
|
||||
@Test
|
||||
public void sortOnSizeThenPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 7));
|
||||
intervals.add(new Interval(2, 5));
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
Collections.sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(2, intervals.get(0).getStart());
|
||||
assertEquals(4, intervals.get(1).getStart());
|
||||
}
|
||||
|
||||
@ -7,10 +7,9 @@ import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import static java.util.concurrent.ThreadLocalRandom.current;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static org.ahocorasick.trie.Trie.builder;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TrieTest {
|
||||
@ -36,7 +35,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSame() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(ALPHABET[0]);
|
||||
@ -46,7 +45,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSameFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch(ALPHABET[0]);
|
||||
@ -55,7 +54,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeyword() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(" " + ALPHABET[0]);
|
||||
@ -65,7 +64,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeywordFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]);
|
||||
@ -74,7 +73,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void variousKeywordsOneMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(ALPHABET)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("bcd");
|
||||
@ -84,7 +83,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void variousKeywordsFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(ALPHABET)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("bcd");
|
||||
@ -93,7 +92,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestAndStopOnHit() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.stopOnHit()
|
||||
.build();
|
||||
@ -106,7 +105,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTest() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("ushers");
|
||||
@ -119,7 +118,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestWithCapitalKeywords() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeyword("HERS")
|
||||
.addKeyword("HIS")
|
||||
@ -136,7 +135,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("ushers");
|
||||
@ -145,7 +144,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestByCallback() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
|
||||
@ -167,7 +166,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void misleadingTest() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword("hers")
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("h he her hers");
|
||||
@ -177,7 +176,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void misleadingTestFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword("hers")
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("h he her hers");
|
||||
@ -186,7 +185,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void recipes() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(FOOD)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
@ -199,7 +198,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void recipesFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(FOOD)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
@ -209,7 +208,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void longAndShortOverlappingMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeyword("he")
|
||||
.addKeyword("hehehehe")
|
||||
.build();
|
||||
@ -226,7 +225,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlapping() {
|
||||
Trie trie = builder().removeOverlaps()
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -241,7 +240,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlappingFirstMatch() {
|
||||
Trie trie = builder().removeOverlaps()
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -253,7 +252,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void containsMatch() {
|
||||
Trie trie = builder().removeOverlaps()
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -263,7 +262,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void startOfChurchillSpeech() {
|
||||
Trie trie = builder().removeOverlaps()
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
.addKeyword("T")
|
||||
.addKeyword("u")
|
||||
.addKeyword("ur")
|
||||
@ -281,7 +280,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void partialMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("sugar")
|
||||
.build();
|
||||
@ -292,7 +291,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void partialMatchFirstMatch() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("sugar")
|
||||
.build();
|
||||
@ -303,7 +302,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void tokenizeFullSentence() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(GREEK_LETTERS)
|
||||
.build();
|
||||
Collection<Token> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
|
||||
@ -321,7 +320,7 @@ public class TrieTest {
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/5
|
||||
@Test
|
||||
public void testStringIndexOutOfBoundsException() {
|
||||
Trie trie = builder().ignoreCase().onlyWholeWords()
|
||||
Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -335,7 +334,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testIgnoreCase() {
|
||||
Trie trie = builder().ignoreCase()
|
||||
Trie trie = Trie.builder().ignoreCase()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -349,7 +348,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testIgnoreCaseFirstMatch() {
|
||||
Trie trie = builder().ignoreCase()
|
||||
Trie trie = Trie.builder().ignoreCase()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -359,7 +358,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void tokenizeTokensInSequence() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(GREEK_LETTERS)
|
||||
.build();
|
||||
Collection<Token> tokens = trie.tokenize("Alpha Beta Gamma");
|
||||
@ -369,7 +368,7 @@ public class TrieTest {
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/7
|
||||
@Test
|
||||
public void testZeroLength() {
|
||||
Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase()
|
||||
Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase()
|
||||
.addKeyword("")
|
||||
.build();
|
||||
trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel.");
|
||||
@ -380,7 +379,7 @@ public class TrieTest {
|
||||
public void testUnicode1() {
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
|
||||
Trie trie = builder().ignoreCase().onlyWholeWords()
|
||||
Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
.addKeyword("this")
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(target);
|
||||
@ -393,7 +392,7 @@ public class TrieTest {
|
||||
@Test
|
||||
public void testUnicode2() {
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("this")
|
||||
@ -405,7 +404,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testPartialMatchWhiteSpaces() {
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.onlyWholeWordsWhiteSpaceSeparated()
|
||||
.addKeyword("#sugar-123")
|
||||
.build();
|
||||
@ -423,7 +422,7 @@ public class TrieTest {
|
||||
|
||||
injectKeyword(text, keyword, interval);
|
||||
|
||||
Trie trie = builder()
|
||||
Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword(keyword)
|
||||
.build();
|
||||
@ -439,10 +438,10 @@ public class TrieTest {
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
private StringBuilder randomNumbers(final int count) {
|
||||
private StringBuilder randomNumbers(int count) {
|
||||
final StringBuilder sb = new StringBuilder(count);
|
||||
|
||||
for (int i = count - 1; i >= 0; i--) {
|
||||
while (--count > 0) {
|
||||
sb.append(randomInt(0, 10));
|
||||
}
|
||||
|
||||
@ -468,7 +467,7 @@ public class TrieTest {
|
||||
}
|
||||
|
||||
private int randomInt(final int min, final int max) {
|
||||
return current().nextInt(min, max);
|
||||
return ThreadLocalRandom.current().nextInt(min, max);
|
||||
}
|
||||
|
||||
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user