From 267e8950592e685b0fc21c2cf6dad7db2775356a Mon Sep 17 00:00:00 2001 From: djarvis Date: Tue, 29 Nov 2016 22:34:55 -0800 Subject: [PATCH 1/4] Added missing override annotations. Added final modifier to Interval member variables. Updated documentation for ignoreCase (issue #33) and moved the ignore methods to the top of the builder to reflect their preferred calling order. --- .../org/ahocorasick/interval/Interval.java | 8 ++- src/main/java/org/ahocorasick/trie/Emit.java | 1 - src/main/java/org/ahocorasick/trie/Trie.java | 55 +++++++++++-------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/Interval.java b/src/main/java/org/ahocorasick/interval/Interval.java index 50254cf..b6b756a 100644 --- a/src/main/java/org/ahocorasick/interval/Interval.java +++ b/src/main/java/org/ahocorasick/interval/Interval.java @@ -2,8 +2,8 @@ package org.ahocorasick.interval; public class Interval implements Intervalable { - private int start; - private int end; + private final int start; + private final int end; /** * Constructs an interval with a start and end position. @@ -21,6 +21,7 @@ public class Interval implements Intervalable { * * @return A number between 0 (start of text) and the text length. */ + @Override public int getStart() { return this.start; } @@ -30,6 +31,7 @@ public class Interval implements Intervalable { * * @return A number between getStart() + 1 and the text length. */ + @Override public int getEnd() { return this.end; } @@ -39,6 +41,7 @@ public class Interval implements Intervalable { * * @return The end position less the start position, plus one. */ + @Override public int size() { return end - start + 1; } @@ -47,6 +50,7 @@ public class Interval implements Intervalable { * Answers whether the given interval overlaps this interval * instance. * + * @param other * @return true The intervals overlap. */ public boolean overlapsWith(final Interval other) { diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..8c17253 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable { public String toString() { return super.toString() + "=" + this.keyword; } - } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 88097a5..6df993d 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -147,10 +147,17 @@ public class Trie { } } + /** + * The first matching text sequence. + * + * @param text The text to search for keywords. + * @return null if no matches found. + */ public Emit firstMatch(final CharSequence text) { if (!trieConfig.isAllowOverlaps()) { // Slow path. Needs to find all the matches to detect overlaps. - Collection parseText = parseText(text); + final Collection parseText = parseText(text); + if (parseText != null && !parseText.isEmpty()) { return parseText.iterator().next(); } @@ -170,7 +177,7 @@ public class Trie { Collection emitStrs = currentState.emit(); if (emitStrs != null && !emitStrs.isEmpty()) { - for (String emitStr : emitStrs) { + for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { @@ -309,6 +316,29 @@ public class Trie { */ private TrieBuilder() {} + /** + * Configure the Trie to ignore case when searching for keywords in + * the text. This must be called before calling addKeyword because + * the algorithm converts keywords to lowercase as they are added, + * depending on this case sensitivity setting. + * + * @return This builder. + */ + public TrieBuilder ignoreCase() { + this.trieConfig.setCaseInsensitive(true); + return this; + } + + /** + * Configure the Trie to ignore overlapping keywords. + * + * @return This builder. + */ + public TrieBuilder ignoreOverlaps() { + this.trieConfig.setAllowOverlaps(false); + return this; + } + /** * Adds a keyword to the Trie's list of text search keywords. * @@ -346,27 +376,6 @@ public class Trie { return this; } - /** - * Configure the Trie to ignore case when searching for keywords in - * the text. - * - * @return This builder. - */ - public TrieBuilder ignoreCase() { - this.trieConfig.setCaseInsensitive(true); - return this; - } - - /** - * Configure the Trie to ignore overlapping keywords. - * - * @return This builder. - */ - public TrieBuilder ignoreOverlaps() { - this.trieConfig.setAllowOverlaps(false); - return this; - } - /** * Configure the Trie to match whole keywords in the text. * From 255069624b60526844a61c2d7f158762febe9249 Mon Sep 17 00:00:00 2001 From: robert-bor Date: Wed, 30 Nov 2016 12:07:03 +0100 Subject: [PATCH 2/4] Optimize imports Reformatted code (Java convention; tab is 4 spaces) --- pom.xml | 3 +- .../org/ahocorasick/interval/Interval.java | 10 +- .../ahocorasick/interval/IntervalNode.java | 6 +- .../ahocorasick/interval/Intervalable.java | 2 + src/main/java/org/ahocorasick/trie/State.java | 70 ++++---- src/main/java/org/ahocorasick/trie/Trie.java | 159 +++++++++--------- .../java/org/ahocorasick/trie/TrieConfig.java | 12 +- .../ahocorasick/interval/IntervalTest.java | 16 +- .../interval/IntervalTreeTest.java | 6 +- .../IntervalableComparatorByPositionTest.java | 6 +- .../IntervalableComparatorBySizeTest.java | 10 +- .../java/org/ahocorasick/trie/StateTest.java | 7 +- .../java/org/ahocorasick/trie/TrieTest.java | 78 ++++----- 13 files changed, 200 insertions(+), 185 deletions(-) diff --git a/pom.xml b/pom.xml index 919b66d..1d2c17b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.ahocorasick diff --git a/src/main/java/org/ahocorasick/interval/Interval.java b/src/main/java/org/ahocorasick/interval/Interval.java index b6b756a..e3b0012 100644 --- a/src/main/java/org/ahocorasick/interval/Interval.java +++ b/src/main/java/org/ahocorasick/interval/Interval.java @@ -9,7 +9,7 @@ public class Interval implements Intervalable { * Constructs an interval with a start and end position. * * @param start The interval's starting text position. - * @param end The interval's ending text position. + * @param end The interval's ending text position. */ public Interval(final int start, final int end) { this.start = start; @@ -55,7 +55,7 @@ public class Interval implements Intervalable { */ public boolean overlapsWith(final Interval other) { return this.start <= other.getEnd() && - this.end >= other.getStart(); + this.end >= other.getStart(); } public boolean overlapsWith(int point) { @@ -67,9 +67,9 @@ public class Interval implements Intervalable { if (!(o instanceof Intervalable)) { return false; } - Intervalable other = (Intervalable)o; + Intervalable other = (Intervalable) o; return this.start == other.getStart() && - this.end == other.getEnd(); + this.end == other.getEnd(); } @Override @@ -82,7 +82,7 @@ public class Interval implements Intervalable { if (!(o instanceof Intervalable)) { return -1; } - Intervalable other = (Intervalable)o; + Intervalable other = (Intervalable) o; int comparison = this.start - other.getStart(); return comparison != 0 ? comparison : this.end - other.getEnd(); } diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index 11db0ae..22242fa 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -6,7 +6,7 @@ import java.util.List; public class IntervalNode { - private enum Direction { LEFT, RIGHT } + private enum Direction {LEFT, RIGHT} private IntervalNode left = null; private IntervalNode right = null; @@ -93,12 +93,12 @@ public class IntervalNode { List overlaps = new ArrayList(); for (Intervalable currentInterval : this.intervals) { switch (direction) { - case LEFT : + case LEFT: if (currentInterval.getStart() <= interval.getEnd()) { overlaps.add(currentInterval); } break; - case RIGHT : + case RIGHT: if (currentInterval.getEnd() >= interval.getStart()) { overlaps.add(currentInterval); } diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index 286a232..0dd5f69 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -3,7 +3,9 @@ package org.ahocorasick.interval; public interface Intervalable extends Comparable { public int getStart(); + public int getEnd(); + public int size(); } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index 0055d91..e192207 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -4,43 +4,51 @@ import java.util.*; /** *

- * A state has various important tasks it must attend to: + * A state has various important tasks it must attend to: *

- * - *
    - *
  • success; when a character points to another state, it must return that state
  • - *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a - * state with less depth
  • - *
  • emits; when this state is passed and keywords have been matched, the matches must be - * 'emitted' so that they can be used later on.
  • - *
- * *

- * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' - * it will still parse the next character and start from the root node. This ensures that the algorithm - * always runs. All other states always have a fail state. + *

    + *
  • success; when a character points to another state, it must return that state
  • + *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a + * state with less depth
  • + *
  • emits; when this state is passed and keywords have been matched, the matches must be + * 'emitted' so that they can be used later on.
  • + *
+ *

+ *

+ * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' + * it will still parse the next character and start from the root node. This ensures that the algorithm + * always runs. All other states always have a fail state. *

* * @author Robert Bor */ public class State { - /** effective the size of the keyword */ + /** + * effective the size of the keyword + */ private final int depth; - /** only used for the root state to refer to itself in case no matches have been found */ + /** + * only used for the root state to refer to itself in case no matches have been found + */ private final State rootState; /** * referred to in the white paper as the 'goto' structure. From a state it is possible to go * to other states, depending on the character passed. */ - private final Map success = new HashMap<>(); + private final Map success = new HashMap<>(); - /** if no matching states are found, the failure state will be returned */ + /** + * if no matching states are found, the failure state will be returned + */ private State failure; - /** whenever this state is reached, it will emit the matches keywords for future reference */ + /** + * whenever this state is reached, it will emit the matches keywords for future reference + */ private Set emits; public State() { @@ -54,11 +62,11 @@ public class State { private State nextState(final Character character, final boolean ignoreRootState) { State nextState = this.success.get(character); - + if (!ignoreRootState && nextState == null && this.rootState != null) { nextState = this.rootState; } - + return nextState; } @@ -69,21 +77,21 @@ public class State { public State nextStateIgnoreRootState(Character character) { return nextState(character, true); } - - public State addState( String keyword ) { - State state = this; - - for (final Character character : keyword.toCharArray()) { - state = state.addState(character); - } - - return state; + + public State addState(String keyword) { + State state = this; + + for (final Character character : keyword.toCharArray()) { + state = state.addState(character); + } + + return state; } public State addState(Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { - nextState = new State(this.depth+1); + nextState = new State(this.depth + 1); this.success.put(character, nextState); } return nextState; @@ -107,7 +115,7 @@ public class State { } public Collection emit() { - return this.emits == null ? Collections. emptyList() : this.emits; + return this.emits == null ? Collections.emptyList() : this.emits; } public State failure() { diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 6df993d..0c6ece0 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,20 +1,22 @@ package org.ahocorasick.trie; -import static java.lang.Character.isWhitespace; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingDeque; import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingDeque; + +import static java.lang.Character.isWhitespace; + /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -27,21 +29,20 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. - * * @throws NullPointerException if the keyword is null. */ private void addKeyword(String keyword) { - if( keyword.isEmpty() ) { - return; + if (keyword.isEmpty()) { + return; } - - if( isCaseInsensitive() ) { - keyword = keyword.toLowerCase(); + + if (isCaseInsensitive()) { + keyword = keyword.toLowerCase(); } addState(keyword).addEmit(keyword); @@ -49,44 +50,44 @@ public class Trie { /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final String[] keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final String[] keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } - + /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final Collection keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final Collection keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } private State addState(final String keyword) { return getRootState().addState(keyword); } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -95,11 +96,11 @@ public class Trie { } private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); + return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } @SuppressWarnings("unchecked") @@ -118,7 +119,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); + IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -131,15 +132,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -149,7 +150,7 @@ public class Trie { /** * The first matching text sequence. - * + * * @param text The text to search for keywords. * @return null if no matches found. */ @@ -164,18 +165,18 @@ public class Trie { } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); @@ -190,26 +191,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -218,15 +219,15 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -234,12 +235,12 @@ public class Trie { private State getState(State currentState, final Character character) { State newCurrentState = currentState.nextState(character); - + while (newCurrentState == null) { currentState = currentState.failure(); newCurrentState = currentState.nextState(character); } - + return newCurrentState; } @@ -276,7 +277,7 @@ public class Trie { private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -284,21 +285,21 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** * Provides a fluent interface for constructing Trie instances. - * + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { @@ -314,14 +315,15 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() {} + private TrieBuilder() { + } /** * Configure the Trie to ignore case when searching for keywords in * the text. This must be called before calling addKeyword because * the algorithm converts keywords to lowercase as they are added, * depending on this case sensitivity setting. - * + * * @return This builder. */ public TrieBuilder ignoreCase() { @@ -331,7 +333,7 @@ public class Trie { /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { @@ -341,9 +343,8 @@ public class Trie { /** * Adds a keyword to the Trie's list of text search keywords. - * + * * @param keyword The keyword to add to the list. - * * @return This builder. * @throws NullPointerException if the keyword is null. */ @@ -351,34 +352,32 @@ public class Trie { this.trie.addKeyword(keyword); return this; } - + /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { @@ -390,7 +389,7 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { @@ -401,7 +400,7 @@ public class Trie { /** * Configure the Trie to stop after the first keyword is found in the * text. - * + * * @return This builder. */ public TrieBuilder stopOnHit() { @@ -411,27 +410,25 @@ public class Trie { /** * Configure the Trie based on the builder settings. - * + * * @return The configured Trie. */ public Trie build() { this.trie.constructFailureStates(); return this.trie; } - + /** - * @deprecated Use ignoreCase() - * * @return This builder. + * @deprecated Use ignoreCase() */ public TrieBuilder caseInsensitive() { return ignoreCase(); } /** - * @deprecated Use ignoreOverlaps() - * * @return This builder. + * @deprecated Use ignoreOverlaps() */ public TrieBuilder removeOverlaps() { return ignoreOverlaps(); diff --git a/src/main/java/org/ahocorasick/trie/TrieConfig.java b/src/main/java/org/ahocorasick/trie/TrieConfig.java index f9f0125..f7487dd 100644 --- a/src/main/java/org/ahocorasick/trie/TrieConfig.java +++ b/src/main/java/org/ahocorasick/trie/TrieConfig.java @@ -12,9 +12,13 @@ public class TrieConfig { private boolean stopOnHit = false; - public boolean isStopOnHit() { return stopOnHit; } + public boolean isStopOnHit() { + return stopOnHit; + } - public void setStopOnHit(boolean stopOnHit) { this.stopOnHit = stopOnHit; } + public void setStopOnHit(boolean stopOnHit) { + this.stopOnHit = stopOnHit; + } public boolean isAllowOverlaps() { return allowOverlaps; @@ -32,7 +36,9 @@ public class TrieConfig { this.onlyWholeWords = onlyWholeWords; } - public boolean isOnlyWholeWordsWhiteSpaceSeparated() { return onlyWholeWordsWhiteSpaceSeparated; } + public boolean isOnlyWholeWordsWhiteSpaceSeparated() { + return onlyWholeWordsWhiteSpaceSeparated; + } public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated; diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index e61bad7..20d41ca 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -2,29 +2,29 @@ package org.ahocorasick.interval; import org.junit.Test; -import java.util.*; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; -import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.*; public class IntervalTest { @Test public void construct() { - Interval i = new Interval(1,3); + Interval i = new Interval(1, 3); assertEquals(1, i.getStart()); assertEquals(3, i.getEnd()); } @Test public void size() { - assertEquals(3, new Interval(0,2).size()); + assertEquals(3, new Interval(0, 2).size()); } @Test public void intervaloverlaps() { - assertTrue(new Interval(1,3).overlapsWith(new Interval(2,4))); + assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4))); } @Test @@ -34,7 +34,7 @@ public class IntervalTest { @Test public void pointOverlaps() { - assertTrue(new Interval(1,3).overlapsWith(2)); + assertTrue(new Interval(1, 3).overlapsWith(2)); } @Test diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java index f4a7f57..96c3670 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java @@ -9,7 +9,7 @@ import java.util.List; import static junit.framework.Assert.assertEquals; public class IntervalTreeTest { - + @Test public void findOverlaps() { List intervals = new ArrayList(); @@ -20,7 +20,7 @@ public class IntervalTreeTest { intervals.add(new Interval(4, 6)); intervals.add(new Interval(5, 7)); IntervalTree intervalTree = new IntervalTree(intervals); - List overlaps = intervalTree.findOverlaps(new Interval(1,3)); + List overlaps = intervalTree.findOverlaps(new Interval(1, 3)); assertEquals(3, overlaps.size()); Iterator overlapsIt = overlaps.iterator(); assertOverlap(overlapsIt.next(), 2, 4); @@ -47,5 +47,5 @@ public class IntervalTreeTest { assertEquals(expectedStart, interval.getStart()); assertEquals(expectedEnd, interval.getEnd()); } - + } diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index a6f1017..a36c831 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -13,9 +13,9 @@ public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { List intervals = new ArrayList(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); Collections.sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 208cf3d..8fc7db1 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -13,9 +13,9 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { List intervals = new ArrayList(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); @@ -25,8 +25,8 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { List intervals = new ArrayList(); - intervals.add(new Interval(4,7)); - intervals.add(new Interval(2,5)); + intervals.add(new Interval(4, 7)); + intervals.add(new Interval(2, 5)); Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); diff --git a/src/test/java/org/ahocorasick/trie/StateTest.java b/src/test/java/org/ahocorasick/trie/StateTest.java index 2a64370..2694305 100644 --- a/src/test/java/org/ahocorasick/trie/StateTest.java +++ b/src/test/java/org/ahocorasick/trie/StateTest.java @@ -1,6 +1,5 @@ package org.ahocorasick.trie; -import org.ahocorasick.trie.State; import org.junit.Test; import static junit.framework.Assert.assertEquals; @@ -11,9 +10,9 @@ public class StateTest { public void constructSequenceOfCharacters() { State rootState = new State(); rootState - .addState('a') - .addState('b') - .addState('c'); + .addState('a') + .addState('b') + .addState('c'); State currentState = rootState.nextState('a'); assertEquals(1, currentState.getDepth()); currentState = currentState.nextState('b'); diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index c4c780b..b529de4 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,34 +1,36 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.handler.EmitHandler; +import org.junit.Test; + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.concurrent.ThreadLocalRandom; + import static junit.framework.Assert.assertEquals; -import org.ahocorasick.trie.handler.EmitHandler; import static org.junit.Assert.assertTrue; -import org.junit.Test; public class TrieTest { private final static String[] ALPHABET = new String[]{ - "abc", "bcd", "cde" + "abc", "bcd", "cde" }; - + private final static String[] PRONOUNS = new String[]{ - "hers", "his", "she", "he" + "hers", "his", "she", "he" }; private final static String[] FOOD = new String[]{ - "veal", "cauliflower", "broccoli", "tomatoes" + "veal", "cauliflower", "broccoli", "tomatoes" }; private final static String[] GREEK_LETTERS = new String[]{ - "Alpha", "Beta", "Gamma" + "Alpha", "Beta", "Gamma" }; - + private final static String[] UNICODE = new String[]{ - "turning", "once", "again", "börkü" + "turning", "once", "again", "börkü" }; @Test @@ -406,7 +408,7 @@ public class TrieTest { .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); - Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test + Collection emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test assertEquals(1, emits.size()); // Match must not be made checkEmit(emits.iterator().next(), 0, 9, "#sugar-123"); } @@ -415,57 +417,57 @@ public class TrieTest { public void testLargeString() { final int interval = 100; final int textSize = 1000000; - final String keyword = FOOD[ 1 ]; - final StringBuilder text = randomNumbers( textSize ); + final String keyword = FOOD[1]; + final StringBuilder text = randomNumbers(textSize); - injectKeyword( text, keyword, interval ); + injectKeyword(text, keyword, interval); Trie trie = Trie.builder() - .onlyWholeWords() - .addKeyword( keyword ) - .build(); + .onlyWholeWords() + .addKeyword(keyword) + .build(); - final Collection emits = trie.parseText( text ); + final Collection emits = trie.parseText(text); - assertEquals( textSize / interval, emits.size() ); + assertEquals(textSize / interval, emits.size()); } - + /** * Generates a random sequence of ASCII numbers. - * + * * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers( int count ) { - final StringBuilder sb = new StringBuilder( count ); + private StringBuilder randomNumbers(int count) { + final StringBuilder sb = new StringBuilder(count); - while( --count > 0 ) { - sb.append( randomInt( 0, 10 ) ); + while (--count > 0) { + sb.append(randomInt(0, 10)); } return sb; } - + /** * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match - * any keyword. - * @param keyword A keyword to inject repeatedly in the text. + * + * @param source Should contain a bunch of random data that cannot match + * any keyword. + * @param keyword A keyword to inject repeatedly in the text. * @param interval How often to inject the keyword. */ - private void injectKeyword( - final StringBuilder source, - final String keyword, - final int interval ) { + private void injectKeyword( + final StringBuilder source, + final String keyword, + final int interval) { final int length = source.length(); - for( int i = 0; i < length; i += interval ) { - source.replace( i, i + keyword.length(), keyword ); + for (int i = 0; i < length; i += interval) { + source.replace(i, i + keyword.length(), keyword); } } - - private int randomInt( final int min, final int max ) { - return ThreadLocalRandom.current().nextInt( min, max ); + + private int randomInt(final int min, final int max) { + return ThreadLocalRandom.current().nextInt(min, max); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) { From 5edf6d8126f0f27c127886a51c80078004d882e3 Mon Sep 17 00:00:00 2001 From: djarvis Date: Tue, 29 Nov 2016 22:34:55 -0800 Subject: [PATCH 3/4] Added missing override annotations. Added final modifier to Interval member variables. Updated documentation for ignoreCase (issue #33) and moved the ignore methods to the top of the builder to reflect their preferred calling order. --- .../org/ahocorasick/interval/Interval.java | 8 +- src/main/java/org/ahocorasick/trie/Trie.java | 296 +++++++++--------- 2 files changed, 158 insertions(+), 146 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/Interval.java b/src/main/java/org/ahocorasick/interval/Interval.java index 96d4c60..e3b0012 100644 --- a/src/main/java/org/ahocorasick/interval/Interval.java +++ b/src/main/java/org/ahocorasick/interval/Interval.java @@ -2,8 +2,8 @@ package org.ahocorasick.interval; public class Interval implements Intervalable { - private int start; - private int end; + private final int start; + private final int end; /** * Constructs an interval with a start and end position. @@ -21,6 +21,7 @@ public class Interval implements Intervalable { * * @return A number between 0 (start of text) and the text length. */ + @Override public int getStart() { return this.start; } @@ -30,6 +31,7 @@ public class Interval implements Intervalable { * * @return A number between getStart() + 1 and the text length. */ + @Override public int getEnd() { return this.end; } @@ -39,6 +41,7 @@ public class Interval implements Intervalable { * * @return The end position less the start position, plus one. */ + @Override public int size() { return end - start + 1; } @@ -47,6 +50,7 @@ public class Interval implements Intervalable { * Answers whether the given interval overlaps this interval * instance. * + * @param other * @return true The intervals overlap. */ public boolean overlapsWith(final Interval other) { diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index eee9216..6df993d 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,24 +1,20 @@ package org.ahocorasick.trie; -import org.ahocorasick.interval.IntervalTree; -import org.ahocorasick.interval.Intervalable; -import org.ahocorasick.trie.handler.DefaultEmitHandler; -import org.ahocorasick.trie.handler.EmitHandler; - +import static java.lang.Character.isWhitespace; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Queue; import java.util.concurrent.LinkedBlockingDeque; - -import static java.lang.Character.*; - -import java.lang.Character; +import org.ahocorasick.interval.IntervalTree; +import org.ahocorasick.interval.Intervalable; +import org.ahocorasick.trie.handler.DefaultEmitHandler; +import org.ahocorasick.trie.handler.EmitHandler; /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -31,41 +27,66 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. + * * @throws NullPointerException if the keyword is null. */ private void addKeyword(String keyword) { - if (keyword.length() > 0) { - if (isCaseInsensitive()) { - keyword = keyword.toLowerCase(); - } - - addState(keyword).addEmit(keyword); + if( keyword.isEmpty() ) { + return; } + + if( isCaseInsensitive() ) { + keyword = keyword.toLowerCase(); + } + + addState(keyword).addEmit(keyword); + } + + /** + * Delegates to addKeyword. + * + * @param keywords List of search term to add to the list of search terms. + */ + private void addKeywords( final String[] keywords ) { + for( final String keyword : keywords ) { + addKeyword( keyword ); + } + } + + /** + * Delegates to addKeyword. + * + * @param keywords List of search term to add to the list of search terms. + */ + private void addKeywords( final Collection keywords ) { + for( final String keyword : keywords ) { + addKeyword( keyword ); + } } private State addState(final String keyword) { return getRootState().addState(keyword); } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -73,15 +94,12 @@ public class Trie { return tokens; } - private Token createFragment( - final Emit emit, - final String text, - final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); + private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); } - private Token createMatch(final Emit emit, final String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); + private Token createMatch(Emit emit, String text) { + return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); } @SuppressWarnings("unchecked") @@ -100,7 +118,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); + IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -113,15 +131,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = toLowerCase(character); + character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -129,28 +147,35 @@ public class Trie { } } + /** + * The first matching text sequence. + * + * @param text The text to search for keywords. + * @return null if no matches found. + */ public Emit firstMatch(final CharSequence text) { if (!trieConfig.isAllowOverlaps()) { // Slow path. Needs to find all the matches to detect overlaps. - Collection parseText = parseText(text); + final Collection parseText = parseText(text); + if (parseText != null && !parseText.isEmpty()) { return parseText.iterator().next(); } } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = toLowerCase(character); + character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); @@ -165,26 +190,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -193,30 +218,29 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } } - private State getState(final State initialState, final Character character) { - State currentState = initialState; - State updatedState = currentState.nextState(character); - - while (updatedState == null) { + private State getState(State currentState, final Character character) { + State newCurrentState = currentState.nextState(character); + + while (newCurrentState == null) { currentState = currentState.failure(); - updatedState = currentState.nextState(character); + newCurrentState = currentState.nextState(character); } - - return updatedState; + + return newCurrentState; } private void constructFailureStates() { @@ -249,13 +273,10 @@ public class Trie { } } - private boolean storeEmits( - final int position, - final State currentState, - final EmitHandler emitHandler) { + private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -263,31 +284,27 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** - * Constructs a TrieBuilder instance for configuring the Trie using a fluent - * interface. - * + * Provides a fluent interface for constructing Trie instances. + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { return new TrieBuilder(); } - /** - * Provides a fluent interface for constructing Trie instances. - */ public static class TrieBuilder { private final TrieConfig trieConfig = new TrieConfig(); @@ -297,73 +314,75 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() { - } + private TrieBuilder() {} /** - * Adds a keyword to the Trie's list of text search keywords. - * - * @param keyword The keyword to add to the list. - * @return This builder. - * @throws NullPointerException if the keyword is null. - */ - public TrieBuilder addKeyword(final CharSequence keyword) { - getTrie().addKeyword(keyword.toString()); - return this; - } - - /** - * Adds a list of keywords to the Trie's list of text search keywords. - * - * @param keywords The keywords to add to the list. - * @return This builder. - */ - public TrieBuilder addKeywords(final CharSequence... keywords) { - for (final CharSequence keyword : keywords) { - addKeyword(keyword); - } - - return this; - } - - /** - * Adds a list of keywords to the Trie's list of text search keywords. - * - * @param keywords The keywords to add to the list. - * @return This builder. - */ - public TrieBuilder addKeywords(final Collection keywords) { - return addKeywords(keywords.toArray(new CharSequence[keywords.size()])); - } - - /** - * Configure the Trie to ignore case when searching for keywords in the - * text. - * + * Configure the Trie to ignore case when searching for keywords in + * the text. This must be called before calling addKeyword because + * the algorithm converts keywords to lowercase as they are added, + * depending on this case sensitivity setting. + * * @return This builder. */ public TrieBuilder ignoreCase() { - getTrieConfig().setCaseInsensitive(true); + this.trieConfig.setCaseInsensitive(true); return this; } /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { - getTrieConfig().setAllowOverlaps(false); + this.trieConfig.setAllowOverlaps(false); return this; } + /** + * Adds a keyword to the Trie's list of text search keywords. + * + * @param keyword The keyword to add to the list. + * + * @return This builder. + * @throws NullPointerException if the keyword is null. + */ + public TrieBuilder addKeyword(final String keyword) { + this.trie.addKeyword(keyword); + return this; + } + + /** + * Adds a list of keywords to the Trie's list of text search keywords. + * + * @param keywords The keywords to add to the list. + * + * @return This builder. + */ + public TrieBuilder addKeywords(final String... keywords) { + this.trie.addKeywords(keywords); + return this; + } + + /** + * Adds a list of keywords to the Trie's list of text search keywords. + * + * @param keywords The keywords to add to the list. + * + * @return This builder. + */ + public TrieBuilder addKeywords(final Collection keywords) { + this.trie.addKeywords(keywords); + return this; + } + /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { - getTrieConfig().setOnlyWholeWords(true); + this.trieConfig.setOnlyWholeWords(true); return this; } @@ -371,52 +390,39 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { - getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true); + this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); return this; } /** - * Configure the Trie to stop searching for matches after the first - * keyword is found in the text. - * + * Configure the Trie to stop after the first keyword is found in the + * text. + * * @return This builder. */ - public TrieBuilder onlyFirstMatch() { - getTrieConfig().setStopOnHit(true); + public TrieBuilder stopOnHit() { + trie.trieConfig.setStopOnHit(true); return this; } /** - * Construct the Trie using the builder settings. - * + * Configure the Trie based on the builder settings. + * * @return The configured Trie. */ public Trie build() { - getTrie().constructFailureStates(); - return getTrie(); - } - - private Trie getTrie() { + this.trie.constructFailureStates(); return this.trie; } - - private TrieConfig getTrieConfig() { - return this.trieConfig; - } - - /** - * @deprecated Use onlyFirstMatch() - */ - public TrieBuilder stopOnHit() { - return onlyFirstMatch(); - } - + /** * @deprecated Use ignoreCase() + * + * @return This builder. */ public TrieBuilder caseInsensitive() { return ignoreCase(); @@ -424,6 +430,8 @@ public class Trie { /** * @deprecated Use ignoreOverlaps() + * + * @return This builder. */ public TrieBuilder removeOverlaps() { return ignoreOverlaps(); From a45df04a2637dc737188a14a63d56646a93619b6 Mon Sep 17 00:00:00 2001 From: robert-bor Date: Wed, 30 Nov 2016 12:07:03 +0100 Subject: [PATCH 4/4] Optimize imports Reformatted code (Java convention; tab is 4 spaces) --- .../ahocorasick/interval/Intervalable.java | 1 + src/main/java/org/ahocorasick/trie/State.java | 10 +- src/main/java/org/ahocorasick/trie/Trie.java | 159 +++++++++--------- .../IntervalableComparatorByPositionTest.java | 6 +- .../IntervalableComparatorBySizeTest.java | 10 +- .../java/org/ahocorasick/trie/TrieTest.java | 73 ++++---- 6 files changed, 128 insertions(+), 131 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index fed2982..0dd5f69 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -7,4 +7,5 @@ public interface Intervalable extends Comparable { public int getEnd(); public int size(); + } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index 8cf3e88..e192207 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -74,11 +74,11 @@ public class State { return nextState(character, false); } - public State nextStateIgnoreRootState(final Character character) { + public State nextStateIgnoreRootState(Character character) { return nextState(character, true); } - public State addState(final String keyword) { + public State addState(String keyword) { State state = this; for (final Character character : keyword.toCharArray()) { @@ -88,7 +88,7 @@ public class State { return state; } - public State addState(final Character character) { + public State addState(Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { nextState = new State(this.depth + 1); @@ -101,14 +101,14 @@ public class State { return this.depth; } - public void addEmit(final String keyword) { + public void addEmit(String keyword) { if (this.emits == null) { this.emits = new TreeSet<>(); } this.emits.add(keyword); } - public void addEmit(final Collection emits) { + public void addEmit(Collection emits) { for (String emit : emits) { addEmit(emit); } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 6df993d..0c6ece0 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,20 +1,22 @@ package org.ahocorasick.trie; -import static java.lang.Character.isWhitespace; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingDeque; import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingDeque; + +import static java.lang.Character.isWhitespace; + /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -27,21 +29,20 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. - * * @throws NullPointerException if the keyword is null. */ private void addKeyword(String keyword) { - if( keyword.isEmpty() ) { - return; + if (keyword.isEmpty()) { + return; } - - if( isCaseInsensitive() ) { - keyword = keyword.toLowerCase(); + + if (isCaseInsensitive()) { + keyword = keyword.toLowerCase(); } addState(keyword).addEmit(keyword); @@ -49,44 +50,44 @@ public class Trie { /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final String[] keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final String[] keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } - + /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final Collection keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final Collection keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } private State addState(final String keyword) { return getRootState().addState(keyword); } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -95,11 +96,11 @@ public class Trie { } private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); + return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } @SuppressWarnings("unchecked") @@ -118,7 +119,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); + IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -131,15 +132,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -149,7 +150,7 @@ public class Trie { /** * The first matching text sequence. - * + * * @param text The text to search for keywords. * @return null if no matches found. */ @@ -164,18 +165,18 @@ public class Trie { } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); @@ -190,26 +191,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -218,15 +219,15 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -234,12 +235,12 @@ public class Trie { private State getState(State currentState, final Character character) { State newCurrentState = currentState.nextState(character); - + while (newCurrentState == null) { currentState = currentState.failure(); newCurrentState = currentState.nextState(character); } - + return newCurrentState; } @@ -276,7 +277,7 @@ public class Trie { private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -284,21 +285,21 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** * Provides a fluent interface for constructing Trie instances. - * + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { @@ -314,14 +315,15 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() {} + private TrieBuilder() { + } /** * Configure the Trie to ignore case when searching for keywords in * the text. This must be called before calling addKeyword because * the algorithm converts keywords to lowercase as they are added, * depending on this case sensitivity setting. - * + * * @return This builder. */ public TrieBuilder ignoreCase() { @@ -331,7 +333,7 @@ public class Trie { /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { @@ -341,9 +343,8 @@ public class Trie { /** * Adds a keyword to the Trie's list of text search keywords. - * + * * @param keyword The keyword to add to the list. - * * @return This builder. * @throws NullPointerException if the keyword is null. */ @@ -351,34 +352,32 @@ public class Trie { this.trie.addKeyword(keyword); return this; } - + /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { @@ -390,7 +389,7 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { @@ -401,7 +400,7 @@ public class Trie { /** * Configure the Trie to stop after the first keyword is found in the * text. - * + * * @return This builder. */ public TrieBuilder stopOnHit() { @@ -411,27 +410,25 @@ public class Trie { /** * Configure the Trie based on the builder settings. - * + * * @return The configured Trie. */ public Trie build() { this.trie.constructFailureStates(); return this.trie; } - + /** - * @deprecated Use ignoreCase() - * * @return This builder. + * @deprecated Use ignoreCase() */ public TrieBuilder caseInsensitive() { return ignoreCase(); } /** - * @deprecated Use ignoreOverlaps() - * * @return This builder. + * @deprecated Use ignoreOverlaps() */ public TrieBuilder removeOverlaps() { return ignoreOverlaps(); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index 40ad64e..a36c831 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -3,20 +3,20 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { - List intervals = new ArrayList<>(); + List intervals = new ArrayList(); intervals.add(new Interval(4, 5)); intervals.add(new Interval(1, 4)); intervals.add(new Interval(3, 8)); - sort(intervals, new IntervalableComparatorByPosition()); + Collections.sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 31fc84d..8fc7db1 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -3,20 +3,20 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { - List intervals = new ArrayList<>(); + List intervals = new ArrayList(); intervals.add(new Interval(4, 5)); intervals.add(new Interval(1, 4)); intervals.add(new Interval(3, 8)); - sort(intervals, new IntervalableComparatorBySize()); + Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); @@ -24,10 +24,10 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { - List intervals = new ArrayList<>(); + List intervals = new ArrayList(); intervals.add(new Interval(4, 7)); intervals.add(new Interval(2, 5)); - sort(intervals, new IntervalableComparatorBySize()); + Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); } diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index 00070d3..b529de4 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -7,10 +7,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.concurrent.ThreadLocalRandom; -import static java.util.concurrent.ThreadLocalRandom.current; import static junit.framework.Assert.assertEquals; -import static org.ahocorasick.trie.Trie.builder; import static org.junit.Assert.assertTrue; public class TrieTest { @@ -36,7 +35,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSame() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(ALPHABET[0]); @@ -46,7 +45,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSameFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(ALPHABET[0]); @@ -55,7 +54,7 @@ public class TrieTest { @Test public void textIsLongerThanKeyword() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(" " + ALPHABET[0]); @@ -65,7 +64,7 @@ public class TrieTest { @Test public void textIsLongerThanKeywordFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]); @@ -74,7 +73,7 @@ public class TrieTest { @Test public void variousKeywordsOneMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(ALPHABET) .build(); Collection emits = trie.parseText("bcd"); @@ -84,7 +83,7 @@ public class TrieTest { @Test public void variousKeywordsFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(ALPHABET) .build(); Emit firstMatch = trie.firstMatch("bcd"); @@ -93,7 +92,7 @@ public class TrieTest { @Test public void ushersTestAndStopOnHit() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(PRONOUNS) .stopOnHit() .build(); @@ -106,7 +105,7 @@ public class TrieTest { @Test public void ushersTest() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(PRONOUNS) .build(); Collection emits = trie.parseText("ushers"); @@ -119,7 +118,7 @@ public class TrieTest { @Test public void ushersTestWithCapitalKeywords() { - Trie trie = builder() + Trie trie = Trie.builder() .ignoreCase() .addKeyword("HERS") .addKeyword("HIS") @@ -136,7 +135,7 @@ public class TrieTest { @Test public void ushersTestFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(PRONOUNS) .build(); Emit firstMatch = trie.firstMatch("ushers"); @@ -145,7 +144,7 @@ public class TrieTest { @Test public void ushersTestByCallback() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(PRONOUNS) .build(); @@ -167,7 +166,7 @@ public class TrieTest { @Test public void misleadingTest() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword("hers") .build(); Collection emits = trie.parseText("h he her hers"); @@ -177,7 +176,7 @@ public class TrieTest { @Test public void misleadingTestFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword("hers") .build(); Emit firstMatch = trie.firstMatch("h he her hers"); @@ -186,7 +185,7 @@ public class TrieTest { @Test public void recipes() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(FOOD) .build(); Collection emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -199,7 +198,7 @@ public class TrieTest { @Test public void recipesFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(FOOD) .build(); Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -209,7 +208,7 @@ public class TrieTest { @Test public void longAndShortOverlappingMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeyword("he") .addKeyword("hehehehe") .build(); @@ -226,7 +225,7 @@ public class TrieTest { @Test public void nonOverlapping() { - Trie trie = builder().removeOverlaps() + Trie trie = Trie.builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -241,7 +240,7 @@ public class TrieTest { @Test public void nonOverlappingFirstMatch() { - Trie trie = builder().removeOverlaps() + Trie trie = Trie.builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -253,7 +252,7 @@ public class TrieTest { @Test public void containsMatch() { - Trie trie = builder().removeOverlaps() + Trie trie = Trie.builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -263,7 +262,7 @@ public class TrieTest { @Test public void startOfChurchillSpeech() { - Trie trie = builder().removeOverlaps() + Trie trie = Trie.builder().removeOverlaps() .addKeyword("T") .addKeyword("u") .addKeyword("ur") @@ -281,7 +280,7 @@ public class TrieTest { @Test public void partialMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -292,7 +291,7 @@ public class TrieTest { @Test public void partialMatchFirstMatch() { - Trie trie = builder() + Trie trie = Trie.builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -303,7 +302,7 @@ public class TrieTest { @Test public void tokenizeFullSentence() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); @@ -321,7 +320,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/5 @Test public void testStringIndexOutOfBoundsException() { - Trie trie = builder().ignoreCase().onlyWholeWords() + Trie trie = Trie.builder().ignoreCase().onlyWholeWords() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -335,7 +334,7 @@ public class TrieTest { @Test public void testIgnoreCase() { - Trie trie = builder().ignoreCase() + Trie trie = Trie.builder().ignoreCase() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -349,7 +348,7 @@ public class TrieTest { @Test public void testIgnoreCaseFirstMatch() { - Trie trie = builder().ignoreCase() + Trie trie = Trie.builder().ignoreCase() .addKeywords(UNICODE) .build(); Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); @@ -359,7 +358,7 @@ public class TrieTest { @Test public void tokenizeTokensInSequence() { - Trie trie = builder() + Trie trie = Trie.builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Alpha Beta Gamma"); @@ -369,7 +368,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/7 @Test public void testZeroLength() { - Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase() + Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase() .addKeyword("") .build(); trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); @@ -380,7 +379,7 @@ public class TrieTest { public void testUnicode1() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char assertEquals("THIS", target.substring(5, 9)); // Java does it the right way - Trie trie = builder().ignoreCase().onlyWholeWords() + Trie trie = Trie.builder().ignoreCase().onlyWholeWords() .addKeyword("this") .build(); Collection emits = trie.parseText(target); @@ -393,7 +392,7 @@ public class TrieTest { @Test public void testUnicode2() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char - Trie trie = builder() + Trie trie = Trie.builder() .ignoreCase() .onlyWholeWords() .addKeyword("this") @@ -405,7 +404,7 @@ public class TrieTest { @Test public void testPartialMatchWhiteSpaces() { - Trie trie = builder() + Trie trie = Trie.builder() .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); @@ -423,7 +422,7 @@ public class TrieTest { injectKeyword(text, keyword, interval); - Trie trie = builder() + Trie trie = Trie.builder() .onlyWholeWords() .addKeyword(keyword) .build(); @@ -439,10 +438,10 @@ public class TrieTest { * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers(final int count) { + private StringBuilder randomNumbers(int count) { final StringBuilder sb = new StringBuilder(count); - for (int i = count - 1; i >= 0; i--) { + while (--count > 0) { sb.append(randomInt(0, 10)); } @@ -468,7 +467,7 @@ public class TrieTest { } private int randomInt(final int min, final int max) { - return current().nextInt(min, max); + return ThreadLocalRandom.current().nextInt(min, max); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {