diff --git a/pom.xml b/pom.xml index a997e98..806ef42 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.ahocorasick diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index 22242fa..92727c9 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -8,16 +8,16 @@ public class IntervalNode { private enum Direction {LEFT, RIGHT} - private IntervalNode left = null; - private IntervalNode right = null; + private IntervalNode left; + private IntervalNode right; private int point; - private List intervals = new ArrayList(); + private List intervals = new ArrayList<>(); - public IntervalNode(List intervals) { + public IntervalNode(final List intervals) { this.point = determineMedian(intervals); - List toLeft = new ArrayList(); - List toRight = new ArrayList(); + final List toLeft = new ArrayList<>(); + final List toRight = new ArrayList<>(); for (Intervalable interval : intervals) { if (interval.getEnd() < this.point) { @@ -37,7 +37,7 @@ public class IntervalNode { } } - public int determineMedian(List intervals) { + public int determineMedian(final List intervals) { int start = -1; int end = -1; for (Intervalable interval : intervals) { @@ -53,17 +53,19 @@ public class IntervalNode { return (start + end) / 2; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { + final List overlaps = new ArrayList<>(); - List overlaps = new ArrayList(); - - if (this.point < interval.getStart()) { // Tends to the right + if (this.point < interval.getStart()) { + // Tends to the right addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval)); - } else if (this.point > interval.getEnd()) { // Tends to the left + } else if (this.point > interval.getEnd()) { + // Tends to the left addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval)); - } else { // Somewhere in the middle + } else { + // Somewhere in the middle addToOverlaps(interval, overlaps, this.intervals); addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); @@ -72,26 +74,30 @@ public class IntervalNode { return overlaps; } - protected void addToOverlaps(Intervalable interval, List overlaps, List newOverlaps) { - for (Intervalable currentInterval : newOverlaps) { + protected void addToOverlaps( + final Intervalable interval, + final List overlaps, + final List newOverlaps) { + for (final Intervalable currentInterval : newOverlaps) { if (!currentInterval.equals(interval)) { overlaps.add(currentInterval); } } } - protected List checkForOverlapsToTheLeft(Intervalable interval) { + protected List checkForOverlapsToTheLeft(final Intervalable interval) { return checkForOverlaps(interval, Direction.LEFT); } - protected List checkForOverlapsToTheRight(Intervalable interval) { + protected List checkForOverlapsToTheRight(final Intervalable interval) { return checkForOverlaps(interval, Direction.RIGHT); } - protected List checkForOverlaps(Intervalable interval, Direction direction) { + protected List checkForOverlaps( + final Intervalable interval, final Direction direction) { + final List overlaps = new ArrayList<>(); - List overlaps = new ArrayList(); - for (Intervalable currentInterval : this.intervals) { + for (final Intervalable currentInterval : this.intervals) { switch (direction) { case LEFT: if (currentInterval.getStart() <= interval.getEnd()) { @@ -105,15 +111,13 @@ public class IntervalNode { break; } } + return overlaps; } - protected List findOverlappingRanges(IntervalNode node, Intervalable interval) { - if (node != null) { - return node.findOverlaps(interval); - } - return Collections.emptyList(); + return node == null + ? Collections.emptyList() + : node.findOverlaps(interval); } - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalTree.java b/src/main/java/org/ahocorasick/interval/IntervalTree.java index 40eeb0b..4dc43b9 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalTree.java +++ b/src/main/java/org/ahocorasick/interval/IntervalTree.java @@ -1,26 +1,27 @@ package org.ahocorasick.interval; -import java.util.Collections; import java.util.List; import java.util.Set; import java.util.TreeSet; +import static java.util.Collections.sort; + public class IntervalTree { - private IntervalNode rootNode = null; + private final IntervalNode rootNode; public IntervalTree(List intervals) { this.rootNode = new IntervalNode(intervals); } - public List removeOverlaps(List intervals) { + public List removeOverlaps(final List intervals) { // Sort the intervals on size, then left-most position - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); - Set removeIntervals = new TreeSet(); + final Set removeIntervals = new TreeSet<>(); - for (Intervalable interval : intervals) { + for (final Intervalable interval : intervals) { // If the interval was already removed, ignore it if (removeIntervals.contains(interval)) { continue; @@ -31,17 +32,17 @@ public class IntervalTree { } // Remove all intervals that were overlapping - for (Intervalable removeInterval : removeIntervals) { + for (final Intervalable removeInterval : removeIntervals) { intervals.remove(removeInterval); } // Sort the intervals, now on left-most position only - Collections.sort(intervals, new IntervalableComparatorByPosition()); + sort(intervals, new IntervalableComparatorByPosition()); return intervals; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { return rootNode.findOverlaps(interval); } diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index 0dd5f69..fed2982 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -7,5 +7,4 @@ public interface Intervalable extends Comparable { public int getEnd(); public int size(); - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java index d144995..2dc0491 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java @@ -5,7 +5,7 @@ import java.util.Comparator; public class IntervalableComparatorByPosition implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { return intervalable.getStart() - intervalable2.getStart(); } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java index 3814759..8b51ed1 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java @@ -5,11 +5,13 @@ import java.util.Comparator; public class IntervalableComparatorBySize implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { int comparison = intervalable2.size() - intervalable.size(); + if (comparison == 0) { comparison = intervalable.getStart() - intervalable2.getStart(); } + return comparison; } diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..8c17253 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable { public String toString() { return super.toString() + "=" + this.keyword; } - } diff --git a/src/main/java/org/ahocorasick/trie/MatchToken.java b/src/main/java/org/ahocorasick/trie/MatchToken.java index c2615dc..851472c 100644 --- a/src/main/java/org/ahocorasick/trie/MatchToken.java +++ b/src/main/java/org/ahocorasick/trie/MatchToken.java @@ -2,9 +2,9 @@ package org.ahocorasick.trie; public class MatchToken extends Token { - private Emit emit; + private final Emit emit; - public MatchToken(String fragment, Emit emit) { + public MatchToken(final String fragment, final Emit emit) { super(fragment); this.emit = emit; } @@ -18,5 +18,4 @@ public class MatchToken extends Token { public Emit getEmit() { return this.emit; } - } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index e192207..8cf3e88 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -74,11 +74,11 @@ public class State { return nextState(character, false); } - public State nextStateIgnoreRootState(Character character) { + public State nextStateIgnoreRootState(final Character character) { return nextState(character, true); } - public State addState(String keyword) { + public State addState(final String keyword) { State state = this; for (final Character character : keyword.toCharArray()) { @@ -88,7 +88,7 @@ public class State { return state; } - public State addState(Character character) { + public State addState(final Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { nextState = new State(this.depth + 1); @@ -101,14 +101,14 @@ public class State { return this.depth; } - public void addEmit(String keyword) { + public void addEmit(final String keyword) { if (this.emits == null) { this.emits = new TreeSet<>(); } this.emits.add(keyword); } - public void addEmit(Collection emits) { + public void addEmit(final Collection emits) { for (String emit : emits) { addEmit(emit); } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 941fa4e..eee9216 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,6 +1,9 @@ package org.ahocorasick.trie; -import static java.lang.Character.isWhitespace; +import org.ahocorasick.interval.IntervalTree; +import org.ahocorasick.interval.Intervalable; +import org.ahocorasick.trie.handler.DefaultEmitHandler; +import org.ahocorasick.trie.handler.EmitHandler; import java.util.ArrayList; import java.util.Collection; @@ -8,10 +11,9 @@ import java.util.List; import java.util.Queue; import java.util.concurrent.LinkedBlockingDeque; -import org.ahocorasick.interval.IntervalTree; -import org.ahocorasick.interval.Intervalable; -import org.ahocorasick.trie.handler.DefaultEmitHandler; -import org.ahocorasick.trie.handler.EmitHandler; +import static java.lang.Character.*; + +import java.lang.Character; /** * Based on the Aho-Corasick white paper, Bell technologies: @@ -37,36 +39,12 @@ public class Trie { * @throws NullPointerException if the keyword is null. */ private void addKeyword(String keyword) { - if (keyword.isEmpty()) { - return; - } + if (keyword.length() > 0) { + if (isCaseInsensitive()) { + keyword = keyword.toLowerCase(); + } - if (isCaseInsensitive()) { - keyword = keyword.toLowerCase(); - } - - addState(keyword).addEmit(keyword); - } - - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords(final String[] keywords) { - for (final String keyword : keywords) { - addKeyword(keyword); - } - } - - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords(final Collection keywords) { - for (final String keyword : keywords) { - addKeyword(keyword); + addState(keyword).addEmit(keyword); } } @@ -95,11 +73,14 @@ public class Trie { return tokens; } - private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { + private Token createFragment( + final Emit emit, + final String text, + final int lastCollectedPosition) { return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } - private Token createMatch(Emit emit, String text) { + private Token createMatch(final Emit emit, final String text) { return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } @@ -138,7 +119,7 @@ public class Trie { // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } currentState = getState(currentState, character); @@ -164,14 +145,14 @@ public class Trie { // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); if (emitStrs != null && !emitStrs.isEmpty()) { - for (String emitStr : emitStrs) { + for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { @@ -190,9 +171,9 @@ public class Trie { private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + isAlphabetic(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { @@ -226,15 +207,16 @@ public class Trie { } } - private State getState(State currentState, final Character character) { - State newCurrentState = currentState.nextState(character); + private State getState(final State initialState, final Character character) { + State currentState = initialState; + State updatedState = currentState.nextState(character); - while (newCurrentState == null) { + while (updatedState == null) { currentState = currentState.failure(); - newCurrentState = currentState.nextState(character); + updatedState = currentState.nextState(character); } - return newCurrentState; + return updatedState; } private void constructFailureStates() { @@ -267,7 +249,10 @@ public class Trie { } } - private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { + private boolean storeEmits( + final int position, + final State currentState, + final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); @@ -291,7 +276,8 @@ public class Trie { } /** - * Provides a fluent interface for constructing Trie instances. + * Constructs a TrieBuilder instance for configuring the Trie using a fluent + * interface. * * @return The builder used to configure its Trie. */ @@ -299,6 +285,9 @@ public class Trie { return new TrieBuilder(); } + /** + * Provides a fluent interface for constructing Trie instances. + */ public static class TrieBuilder { private final TrieConfig trieConfig = new TrieConfig(); @@ -318,8 +307,8 @@ public class Trie { * @return This builder. * @throws NullPointerException if the keyword is null. */ - public TrieBuilder addKeyword(final String keyword) { - this.trie.addKeyword(keyword); + public TrieBuilder addKeyword(final CharSequence keyword) { + getTrie().addKeyword(keyword.toString()); return this; } @@ -329,8 +318,11 @@ public class Trie { * @param keywords The keywords to add to the list. * @return This builder. */ - public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); + public TrieBuilder addKeywords(final CharSequence... keywords) { + for (final CharSequence keyword : keywords) { + addKeyword(keyword); + } + return this; } @@ -340,19 +332,18 @@ public class Trie { * @param keywords The keywords to add to the list. * @return This builder. */ - public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + public TrieBuilder addKeywords(final Collection keywords) { + return addKeywords(keywords.toArray(new CharSequence[keywords.size()])); } /** - * Configure the Trie to ignore case when searching for keywords in - * the text. + * Configure the Trie to ignore case when searching for keywords in the + * text. * * @return This builder. */ public TrieBuilder ignoreCase() { - this.trieConfig.setCaseInsensitive(true); + getTrieConfig().setCaseInsensitive(true); return this; } @@ -362,7 +353,7 @@ public class Trie { * @return This builder. */ public TrieBuilder ignoreOverlaps() { - this.trieConfig.setAllowOverlaps(false); + getTrieConfig().setAllowOverlaps(false); return this; } @@ -372,7 +363,7 @@ public class Trie { * @return This builder. */ public TrieBuilder onlyWholeWords() { - this.trieConfig.setOnlyWholeWords(true); + getTrieConfig().setOnlyWholeWords(true); return this; } @@ -384,33 +375,47 @@ public class Trie { * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { - this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); + getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true); return this; } /** - * Configure the Trie to stop after the first keyword is found in the - * text. + * Configure the Trie to stop searching for matches after the first + * keyword is found in the text. * * @return This builder. */ - public TrieBuilder stopOnHit() { - trie.trieConfig.setStopOnHit(true); + public TrieBuilder onlyFirstMatch() { + getTrieConfig().setStopOnHit(true); return this; } /** - * Configure the Trie based on the builder settings. + * Construct the Trie using the builder settings. * * @return The configured Trie. */ public Trie build() { - this.trie.constructFailureStates(); + getTrie().constructFailureStates(); + return getTrie(); + } + + private Trie getTrie() { return this.trie; } + private TrieConfig getTrieConfig() { + return this.trieConfig; + } + + /** + * @deprecated Use onlyFirstMatch() + */ + public TrieBuilder stopOnHit() { + return onlyFirstMatch(); + } + /** - * @return This builder. * @deprecated Use ignoreCase() */ public TrieBuilder caseInsensitive() { @@ -418,7 +423,6 @@ public class Trie { } /** - * @return This builder. * @deprecated Use ignoreOverlaps() */ public TrieBuilder removeOverlaps() { diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java index 656d1e2..4531f3d 100644 --- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -7,15 +7,14 @@ import java.util.List; public class DefaultEmitHandler implements EmitHandler { - private List emits = new ArrayList<>(); + private final List emits = new ArrayList<>(); @Override - public void emit(Emit emit) { + public void emit(final Emit emit) { this.emits.add(emit); } public List getEmits() { return this.emits; } - } diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index ff9b0c3..4a3598e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -2,11 +2,11 @@ package org.ahocorasick.interval; import org.junit.Test; -import java.util.*; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; -import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.*; public class IntervalTest { @@ -44,7 +44,7 @@ public class IntervalTest { @Test public void comparable() { - Set intervals = new TreeSet(); + Set intervals = new TreeSet<>(); intervals.add(new Interval(4, 6)); intervals.add(new Interval(2, 7)); intervals.add(new Interval(3, 4)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java index 96c3670..fc41a3e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java @@ -12,7 +12,7 @@ public class IntervalTreeTest { @Test public void findOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(1, 3)); intervals.add(new Interval(2, 4)); @@ -30,7 +30,7 @@ public class IntervalTreeTest { @Test public void removeOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(4, 5)); intervals.add(new Interval(2, 10)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index a36c831..40ad64e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -3,20 +3,20 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4, 5)); intervals.add(new Interval(1, 4)); intervals.add(new Interval(3, 8)); - Collections.sort(intervals, new IntervalableComparatorByPosition()); + sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 8fc7db1..31fc84d 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -3,20 +3,20 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4, 5)); intervals.add(new Interval(1, 4)); intervals.add(new Interval(3, 8)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); @@ -24,10 +24,10 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4, 7)); intervals.add(new Interval(2, 5)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); } diff --git a/src/test/java/org/ahocorasick/trie/StateTest.java b/src/test/java/org/ahocorasick/trie/StateTest.java index a9cc745..2694305 100644 --- a/src/test/java/org/ahocorasick/trie/StateTest.java +++ b/src/test/java/org/ahocorasick/trie/StateTest.java @@ -1,6 +1,5 @@ package org.ahocorasick.trie; -import org.ahocorasick.trie.State; import org.junit.Test; import static junit.framework.Assert.assertEquals; diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index e6129ce..00070d3 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,19 +1,18 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.handler.EmitHandler; +import org.junit.Test; + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.concurrent.ThreadLocalRandom; +import static java.util.concurrent.ThreadLocalRandom.current; import static junit.framework.Assert.assertEquals; - -import org.ahocorasick.trie.handler.EmitHandler; - +import static org.ahocorasick.trie.Trie.builder; import static org.junit.Assert.assertTrue; -import org.junit.Test; - public class TrieTest { private final static String[] ALPHABET = new String[]{ "abc", "bcd", "cde" @@ -37,7 +36,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSame() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(ALPHABET[0]); @@ -47,7 +46,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSameFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(ALPHABET[0]); @@ -56,7 +55,7 @@ public class TrieTest { @Test public void textIsLongerThanKeyword() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(" " + ALPHABET[0]); @@ -66,7 +65,7 @@ public class TrieTest { @Test public void textIsLongerThanKeywordFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]); @@ -75,7 +74,7 @@ public class TrieTest { @Test public void variousKeywordsOneMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Collection emits = trie.parseText("bcd"); @@ -85,7 +84,7 @@ public class TrieTest { @Test public void variousKeywordsFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Emit firstMatch = trie.firstMatch("bcd"); @@ -94,7 +93,7 @@ public class TrieTest { @Test public void ushersTestAndStopOnHit() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .stopOnHit() .build(); @@ -107,7 +106,7 @@ public class TrieTest { @Test public void ushersTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Collection emits = trie.parseText("ushers"); @@ -120,7 +119,7 @@ public class TrieTest { @Test public void ushersTestWithCapitalKeywords() { - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .addKeyword("HERS") .addKeyword("HIS") @@ -137,7 +136,7 @@ public class TrieTest { @Test public void ushersTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Emit firstMatch = trie.firstMatch("ushers"); @@ -146,7 +145,7 @@ public class TrieTest { @Test public void ushersTestByCallback() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); @@ -168,7 +167,7 @@ public class TrieTest { @Test public void misleadingTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Collection emits = trie.parseText("h he her hers"); @@ -178,7 +177,7 @@ public class TrieTest { @Test public void misleadingTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Emit firstMatch = trie.firstMatch("h he her hers"); @@ -187,7 +186,7 @@ public class TrieTest { @Test public void recipes() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Collection emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -200,7 +199,7 @@ public class TrieTest { @Test public void recipesFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -210,7 +209,7 @@ public class TrieTest { @Test public void longAndShortOverlappingMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("he") .addKeyword("hehehehe") .build(); @@ -227,7 +226,7 @@ public class TrieTest { @Test public void nonOverlapping() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -242,7 +241,7 @@ public class TrieTest { @Test public void nonOverlappingFirstMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -254,7 +253,7 @@ public class TrieTest { @Test public void containsMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -264,7 +263,7 @@ public class TrieTest { @Test public void startOfChurchillSpeech() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("T") .addKeyword("u") .addKeyword("ur") @@ -282,7 +281,7 @@ public class TrieTest { @Test public void partialMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -293,7 +292,7 @@ public class TrieTest { @Test public void partialMatchFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -304,7 +303,7 @@ public class TrieTest { @Test public void tokenizeFullSentence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); @@ -322,7 +321,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/5 @Test public void testStringIndexOutOfBoundsException() { - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -336,7 +335,7 @@ public class TrieTest { @Test public void testIgnoreCase() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -350,7 +349,7 @@ public class TrieTest { @Test public void testIgnoreCaseFirstMatch() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); @@ -360,7 +359,7 @@ public class TrieTest { @Test public void tokenizeTokensInSequence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Alpha Beta Gamma"); @@ -370,7 +369,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/7 @Test public void testZeroLength() { - Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase() + Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase() .addKeyword("") .build(); trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); @@ -381,7 +380,7 @@ public class TrieTest { public void testUnicode1() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char assertEquals("THIS", target.substring(5, 9)); // Java does it the right way - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeyword("this") .build(); Collection emits = trie.parseText(target); @@ -394,7 +393,7 @@ public class TrieTest { @Test public void testUnicode2() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .onlyWholeWords() .addKeyword("this") @@ -406,7 +405,7 @@ public class TrieTest { @Test public void testPartialMatchWhiteSpaces() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); @@ -424,7 +423,7 @@ public class TrieTest { injectKeyword(text, keyword, interval); - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword(keyword) .build(); @@ -440,10 +439,10 @@ public class TrieTest { * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers(int count) { + private StringBuilder randomNumbers(final int count) { final StringBuilder sb = new StringBuilder(count); - while (--count > 0) { + for (int i = count - 1; i >= 0; i--) { sb.append(randomInt(0, 10)); } @@ -469,7 +468,7 @@ public class TrieTest { } private int randomInt(final int min, final int max) { - return ThreadLocalRandom.current().nextInt(min, max); + return current().nextInt(min, max); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {