From 2b5c2d654db42a4aa0b35e7755c0142c7085af1a Mon Sep 17 00:00:00 2001 From: djarvis Date: Tue, 29 Nov 2016 18:38:00 -0800 Subject: [PATCH 1/3] Updated source base to leverage JDK 1.7 syntax. Added more final modifiers. Eliminated parameter modification inside method. Some formatting. Changed TrieBuilder to offer CharSequence instead of String; revised Trie accordingly. Removed some duplication. NetBeans automatically translated the code to use static imports (as per JDK 1.7 syntax). --- .../ahocorasick/interval/IntervalNode.java | 60 +++---- .../ahocorasick/interval/IntervalTree.java | 18 +-- .../ahocorasick/interval/Intervalable.java | 1 - .../IntervalableComparatorByPosition.java | 2 +- .../IntervalableComparatorBySize.java | 4 +- src/main/java/org/ahocorasick/trie/Emit.java | 1 - .../java/org/ahocorasick/trie/MatchToken.java | 5 +- src/main/java/org/ahocorasick/trie/State.java | 10 +- src/main/java/org/ahocorasick/trie/Trie.java | 148 +++++++++--------- .../trie/handler/DefaultEmitHandler.java | 8 +- .../ahocorasick/interval/IntervalTest.java | 2 +- .../interval/IntervalTreeTest.java | 4 +- .../IntervalableComparatorByPositionTest.java | 5 +- .../IntervalableComparatorBySizeTest.java | 10 +- .../java/org/ahocorasick/trie/TrieTest.java | 74 ++++----- 15 files changed, 180 insertions(+), 172 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index 11db0ae..f6775f5 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -8,16 +8,16 @@ public class IntervalNode { private enum Direction { LEFT, RIGHT } - private IntervalNode left = null; - private IntervalNode right = null; + private IntervalNode left; + private IntervalNode right; private int point; - private List intervals = new ArrayList(); + private List intervals = new ArrayList<>(); - public IntervalNode(List intervals) { + public IntervalNode(final List intervals) { this.point = determineMedian(intervals); - List toLeft = new ArrayList(); - List toRight = new ArrayList(); + final List toLeft = new ArrayList<>(); + final List toRight = new ArrayList<>(); for (Intervalable interval : intervals) { if (interval.getEnd() < this.point) { @@ -37,7 +37,7 @@ public class IntervalNode { } } - public int determineMedian(List intervals) { + public int determineMedian(final List intervals) { int start = -1; int end = -1; for (Intervalable interval : intervals) { @@ -53,17 +53,19 @@ public class IntervalNode { return (start + end) / 2; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { + final List overlaps = new ArrayList<>(); - List overlaps = new ArrayList(); - - if (this.point < interval.getStart()) { // Tends to the right + if (this.point < interval.getStart()) { + // Tends to the right addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval)); - } else if (this.point > interval.getEnd()) { // Tends to the left + } else if (this.point > interval.getEnd()) { + // Tends to the left addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval)); - } else { // Somewhere in the middle + } else { + // Somewhere in the middle addToOverlaps(interval, overlaps, this.intervals); addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); @@ -72,26 +74,30 @@ public class IntervalNode { return overlaps; } - protected void addToOverlaps(Intervalable interval, List overlaps, List newOverlaps) { - for (Intervalable currentInterval : newOverlaps) { + protected void addToOverlaps( + final Intervalable interval, + final List overlaps, + final List newOverlaps) { + for (final Intervalable currentInterval : newOverlaps) { if (!currentInterval.equals(interval)) { overlaps.add(currentInterval); } } } - protected List checkForOverlapsToTheLeft(Intervalable interval) { + protected List checkForOverlapsToTheLeft(final Intervalable interval) { return checkForOverlaps(interval, Direction.LEFT); } - protected List checkForOverlapsToTheRight(Intervalable interval) { + protected List checkForOverlapsToTheRight(final Intervalable interval) { return checkForOverlaps(interval, Direction.RIGHT); } - protected List checkForOverlaps(Intervalable interval, Direction direction) { - - List overlaps = new ArrayList(); - for (Intervalable currentInterval : this.intervals) { + protected List checkForOverlaps( + final Intervalable interval, final Direction direction) { + final List overlaps = new ArrayList<>(); + + for (final Intervalable currentInterval : this.intervals) { switch (direction) { case LEFT : if (currentInterval.getStart() <= interval.getEnd()) { @@ -105,15 +111,13 @@ public class IntervalNode { break; } } + return overlaps; } - - + protected List findOverlappingRanges(IntervalNode node, Intervalable interval) { - if (node != null) { - return node.findOverlaps(interval); - } - return Collections.emptyList(); + return node == null + ? Collections.emptyList() + : node.findOverlaps( interval ); } - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalTree.java b/src/main/java/org/ahocorasick/interval/IntervalTree.java index 40eeb0b..3be617e 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalTree.java +++ b/src/main/java/org/ahocorasick/interval/IntervalTree.java @@ -1,26 +1,26 @@ package org.ahocorasick.interval; -import java.util.Collections; +import static java.util.Collections.sort; import java.util.List; import java.util.Set; import java.util.TreeSet; public class IntervalTree { - private IntervalNode rootNode = null; + private final IntervalNode rootNode; public IntervalTree(List intervals) { this.rootNode = new IntervalNode(intervals); } - public List removeOverlaps(List intervals) { + public List removeOverlaps(final List intervals) { // Sort the intervals on size, then left-most position - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); - Set removeIntervals = new TreeSet(); + final Set removeIntervals = new TreeSet<>(); - for (Intervalable interval : intervals) { + for (final Intervalable interval : intervals) { // If the interval was already removed, ignore it if (removeIntervals.contains(interval)) { continue; @@ -31,17 +31,17 @@ public class IntervalTree { } // Remove all intervals that were overlapping - for (Intervalable removeInterval : removeIntervals) { + for (final Intervalable removeInterval : removeIntervals) { intervals.remove(removeInterval); } // Sort the intervals, now on left-most position only - Collections.sort(intervals, new IntervalableComparatorByPosition()); + sort(intervals, new IntervalableComparatorByPosition()); return intervals; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { return rootNode.findOverlaps(interval); } diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index 286a232..d17fae6 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -5,5 +5,4 @@ public interface Intervalable extends Comparable { public int getStart(); public int getEnd(); public int size(); - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java index d144995..2dc0491 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java @@ -5,7 +5,7 @@ import java.util.Comparator; public class IntervalableComparatorByPosition implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { return intervalable.getStart() - intervalable2.getStart(); } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java index 3814759..6a33fa8 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java @@ -5,11 +5,13 @@ import java.util.Comparator; public class IntervalableComparatorBySize implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { int comparison = intervalable2.size() - intervalable.size(); + if (comparison == 0) { comparison = intervalable.getStart() - intervalable2.getStart(); } + return comparison; } diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..8c17253 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable { public String toString() { return super.toString() + "=" + this.keyword; } - } diff --git a/src/main/java/org/ahocorasick/trie/MatchToken.java b/src/main/java/org/ahocorasick/trie/MatchToken.java index c2615dc..851472c 100644 --- a/src/main/java/org/ahocorasick/trie/MatchToken.java +++ b/src/main/java/org/ahocorasick/trie/MatchToken.java @@ -2,9 +2,9 @@ package org.ahocorasick.trie; public class MatchToken extends Token { - private Emit emit; + private final Emit emit; - public MatchToken(String fragment, Emit emit) { + public MatchToken(final String fragment, final Emit emit) { super(fragment); this.emit = emit; } @@ -18,5 +18,4 @@ public class MatchToken extends Token { public Emit getEmit() { return this.emit; } - } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index 0055d91..fce324e 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -66,11 +66,11 @@ public class State { return nextState(character, false); } - public State nextStateIgnoreRootState(Character character) { + public State nextStateIgnoreRootState(final Character character) { return nextState(character, true); } - public State addState( String keyword ) { + public State addState(final String keyword ) { State state = this; for (final Character character : keyword.toCharArray()) { @@ -80,7 +80,7 @@ public class State { return state; } - public State addState(Character character) { + public State addState(final Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { nextState = new State(this.depth+1); @@ -93,14 +93,14 @@ public class State { return this.depth; } - public void addEmit(String keyword) { + public void addEmit(final String keyword) { if (this.emits == null) { this.emits = new TreeSet<>(); } this.emits.add(keyword); } - public void addEmit(Collection emits) { + public void addEmit(final Collection emits) { for (String emit : emits) { addEmit(emit); } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 88097a5..9127160 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,6 +1,8 @@ package org.ahocorasick.trie; +import static java.lang.Character.isAlphabetic; import static java.lang.Character.isWhitespace; +import static java.lang.Character.toLowerCase; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -35,42 +37,18 @@ public class Trie { * * @throws NullPointerException if the keyword is null. */ - private void addKeyword(String keyword) { - if( keyword.isEmpty() ) { - return; + private void addKeyword( String keyword ) { + if( keyword.length() > 0 ) { + if( isCaseInsensitive() ) { + keyword = keyword.toLowerCase(); + } + + addState( keyword ).addEmit( keyword ); } - - if( isCaseInsensitive() ) { - keyword = keyword.toLowerCase(); - } - - addState(keyword).addEmit(keyword); } - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords( final String[] keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } - } - - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords( final Collection keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } - } - - private State addState(final String keyword) { - return getRootState().addState(keyword); + private State addState( final String keyword ) { + return getRootState().addState( keyword ); } public Collection tokenize(final String text) { @@ -94,11 +72,14 @@ public class Trie { return tokens; } - private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { + private Token createFragment( + final Emit emit, + final String text, + final int lastCollectedPosition) { return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); } - private Token createMatch(Emit emit, String text) { + private Token createMatch(final Emit emit, final String text) { return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); } @@ -137,7 +118,7 @@ public class Trie { // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } currentState = getState(currentState, character); @@ -163,14 +144,14 @@ public class Trie { // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); if (emitStrs != null && !emitStrs.isEmpty()) { - for (String emitStr : emitStrs) { + for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { @@ -189,9 +170,9 @@ public class Trie { private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + isAlphabetic(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { @@ -225,15 +206,16 @@ public class Trie { } } - private State getState(State currentState, final Character character) { - State newCurrentState = currentState.nextState(character); + private State getState(final State initialState, final Character character) { + State currentState = initialState; + State updatedState = currentState.nextState(character); - while (newCurrentState == null) { + while (updatedState == null) { currentState = currentState.failure(); - newCurrentState = currentState.nextState(character); + updatedState = currentState.nextState(character); } - return newCurrentState; + return updatedState; } private void constructFailureStates() { @@ -266,7 +248,10 @@ public class Trie { } } - private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { + private boolean storeEmits( + final int position, + final State currentState, + final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); @@ -290,7 +275,8 @@ public class Trie { } /** - * Provides a fluent interface for constructing Trie instances. + * Constructs a TrieBuilder instance for configuring the Trie using a fluent + * interface. * * @return The builder used to configure its Trie. */ @@ -298,6 +284,9 @@ public class Trie { return new TrieBuilder(); } + /** + * Provides a fluent interface for constructing Trie instances. + */ public static class TrieBuilder { private final TrieConfig trieConfig = new TrieConfig(); @@ -317,11 +306,11 @@ public class Trie { * @return This builder. * @throws NullPointerException if the keyword is null. */ - public TrieBuilder addKeyword(final String keyword) { - this.trie.addKeyword(keyword); + public TrieBuilder addKeyword(final CharSequence keyword) { + getTrie().addKeyword( keyword.toString() ); return this; } - + /** * Adds a list of keywords to the Trie's list of text search keywords. * @@ -329,9 +318,12 @@ public class Trie { * * @return This builder. */ - public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); - return this; + public TrieBuilder addKeywords(final CharSequence... keywords) { + for( final CharSequence keyword : keywords ) { + addKeyword( keyword ); + } + + return this; } /** @@ -341,19 +333,18 @@ public class Trie { * * @return This builder. */ - public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + public TrieBuilder addKeywords(final Collection keywords) { + return addKeywords( keywords.toArray( new CharSequence[ keywords.size() ] ) ); } /** - * Configure the Trie to ignore case when searching for keywords in - * the text. + * Configure the Trie to ignore case when searching for keywords in the + * text. * * @return This builder. */ public TrieBuilder ignoreCase() { - this.trieConfig.setCaseInsensitive(true); + getTrieConfig().setCaseInsensitive(true); return this; } @@ -363,7 +354,7 @@ public class Trie { * @return This builder. */ public TrieBuilder ignoreOverlaps() { - this.trieConfig.setAllowOverlaps(false); + getTrieConfig().setAllowOverlaps(false); return this; } @@ -373,7 +364,7 @@ public class Trie { * @return This builder. */ public TrieBuilder onlyWholeWords() { - this.trieConfig.setOnlyWholeWords(true); + getTrieConfig().setOnlyWholeWords(true); return this; } @@ -385,35 +376,48 @@ public class Trie { * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { - this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); + getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true); return this; } - + /** - * Configure the Trie to stop after the first keyword is found in the - * text. + * Configure the Trie to stop searching for matches after the first + * keyword is found in the text. * * @return This builder. */ - public TrieBuilder stopOnHit() { - trie.trieConfig.setStopOnHit(true); + public TrieBuilder onlyFirstMatch() { + getTrieConfig().setStopOnHit(true); return this; } /** - * Configure the Trie based on the builder settings. + * Construct the Trie using the builder settings. * * @return The configured Trie. */ public Trie build() { - this.trie.constructFailureStates(); + getTrie().constructFailureStates(); + return getTrie(); + } + + private Trie getTrie() { return this.trie; } + private TrieConfig getTrieConfig() { + return this.trieConfig; + } + + /** + * @deprecated Use onlyFirstMatch() + */ + public TrieBuilder stopOnHit() { + return onlyFirstMatch(); + } + /** * @deprecated Use ignoreCase() - * - * @return This builder. */ public TrieBuilder caseInsensitive() { return ignoreCase(); @@ -421,8 +425,6 @@ public class Trie { /** * @deprecated Use ignoreOverlaps() - * - * @return This builder. */ public TrieBuilder removeOverlaps() { return ignoreOverlaps(); diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java index 656d1e2..dd3e7a5 100644 --- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -1,21 +1,19 @@ package org.ahocorasick.trie.handler; -import org.ahocorasick.trie.Emit; - import java.util.ArrayList; import java.util.List; +import org.ahocorasick.trie.Emit; public class DefaultEmitHandler implements EmitHandler { - private List emits = new ArrayList<>(); + private final List emits = new ArrayList<>(); @Override - public void emit(Emit emit) { + public void emit(final Emit emit) { this.emits.add(emit); } public List getEmits() { return this.emits; } - } diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index e61bad7..aba9511 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -44,7 +44,7 @@ public class IntervalTest { @Test public void comparable() { - Set intervals = new TreeSet(); + Set intervals = new TreeSet<>(); intervals.add(new Interval(4, 6)); intervals.add(new Interval(2, 7)); intervals.add(new Interval(3, 4)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java index f4a7f57..47aea18 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java @@ -12,7 +12,7 @@ public class IntervalTreeTest { @Test public void findOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(1, 3)); intervals.add(new Interval(2, 4)); @@ -30,7 +30,7 @@ public class IntervalTreeTest { @Test public void removeOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(4, 5)); intervals.add(new Interval(2, 10)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index a6f1017..1dd6dc1 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -4,6 +4,7 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Collections; +import static java.util.Collections.sort; import java.util.List; import static junit.framework.Assert.assertEquals; @@ -12,11 +13,11 @@ public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4,5)); intervals.add(new Interval(1,4)); intervals.add(new Interval(3,8)); - Collections.sort(intervals, new IntervalableComparatorByPosition()); + sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 208cf3d..a0db17e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -4,6 +4,8 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Collections; +import static java.util.Collections.sort; +import static java.util.Collections.sort; import java.util.List; import static junit.framework.Assert.assertEquals; @@ -12,11 +14,11 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4,5)); intervals.add(new Interval(1,4)); intervals.add(new Interval(3,8)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); @@ -24,10 +26,10 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(4,7)); intervals.add(new Interval(2,5)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); } diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index c4c780b..e9d75d9 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -5,7 +5,9 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.concurrent.ThreadLocalRandom; +import static java.util.concurrent.ThreadLocalRandom.current; import static junit.framework.Assert.assertEquals; +import static org.ahocorasick.trie.Trie.builder; import org.ahocorasick.trie.handler.EmitHandler; import static org.junit.Assert.assertTrue; import org.junit.Test; @@ -33,7 +35,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSame() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(ALPHABET[0]); @@ -43,7 +45,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSameFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(ALPHABET[0]); @@ -52,7 +54,7 @@ public class TrieTest { @Test public void textIsLongerThanKeyword() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(" " + ALPHABET[0]); @@ -62,7 +64,7 @@ public class TrieTest { @Test public void textIsLongerThanKeywordFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]); @@ -71,7 +73,7 @@ public class TrieTest { @Test public void variousKeywordsOneMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Collection emits = trie.parseText("bcd"); @@ -81,7 +83,7 @@ public class TrieTest { @Test public void variousKeywordsFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Emit firstMatch = trie.firstMatch("bcd"); @@ -90,7 +92,7 @@ public class TrieTest { @Test public void ushersTestAndStopOnHit() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .stopOnHit() .build(); @@ -103,7 +105,7 @@ public class TrieTest { @Test public void ushersTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Collection emits = trie.parseText("ushers"); @@ -116,7 +118,7 @@ public class TrieTest { @Test public void ushersTestWithCapitalKeywords() { - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .addKeyword("HERS") .addKeyword("HIS") @@ -133,7 +135,7 @@ public class TrieTest { @Test public void ushersTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Emit firstMatch = trie.firstMatch("ushers"); @@ -142,7 +144,7 @@ public class TrieTest { @Test public void ushersTestByCallback() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); @@ -164,7 +166,7 @@ public class TrieTest { @Test public void misleadingTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Collection emits = trie.parseText("h he her hers"); @@ -174,7 +176,7 @@ public class TrieTest { @Test public void misleadingTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Emit firstMatch = trie.firstMatch("h he her hers"); @@ -183,7 +185,7 @@ public class TrieTest { @Test public void recipes() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Collection emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -196,7 +198,7 @@ public class TrieTest { @Test public void recipesFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -206,7 +208,7 @@ public class TrieTest { @Test public void longAndShortOverlappingMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("he") .addKeyword("hehehehe") .build(); @@ -223,7 +225,7 @@ public class TrieTest { @Test public void nonOverlapping() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -238,7 +240,7 @@ public class TrieTest { @Test public void nonOverlappingFirstMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -250,7 +252,7 @@ public class TrieTest { @Test public void containsMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -260,7 +262,7 @@ public class TrieTest { @Test public void startOfChurchillSpeech() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("T") .addKeyword("u") .addKeyword("ur") @@ -278,7 +280,7 @@ public class TrieTest { @Test public void partialMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -289,7 +291,7 @@ public class TrieTest { @Test public void partialMatchFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -300,7 +302,7 @@ public class TrieTest { @Test public void tokenizeFullSentence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); @@ -318,7 +320,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/5 @Test public void testStringIndexOutOfBoundsException() { - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -332,7 +334,7 @@ public class TrieTest { @Test public void testIgnoreCase() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -346,7 +348,7 @@ public class TrieTest { @Test public void testIgnoreCaseFirstMatch() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); @@ -356,7 +358,7 @@ public class TrieTest { @Test public void tokenizeTokensInSequence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Alpha Beta Gamma"); @@ -366,7 +368,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/7 @Test public void testZeroLength() { - Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase() + Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase() .addKeyword("") .build(); trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); @@ -377,7 +379,7 @@ public class TrieTest { public void testUnicode1() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char assertEquals("THIS", target.substring(5, 9)); // Java does it the right way - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeyword("this") .build(); Collection emits = trie.parseText(target); @@ -390,7 +392,7 @@ public class TrieTest { @Test public void testUnicode2() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .onlyWholeWords() .addKeyword("this") @@ -402,7 +404,7 @@ public class TrieTest { @Test public void testPartialMatchWhiteSpaces() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); @@ -420,7 +422,7 @@ public class TrieTest { injectKeyword( text, keyword, interval ); - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword( keyword ) .build(); @@ -436,10 +438,10 @@ public class TrieTest { * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers( int count ) { + private StringBuilder randomNumbers( final int count ) { final StringBuilder sb = new StringBuilder( count ); - - while( --count > 0 ) { + + for( int i = count - 1; i >= 0; i-- ) { sb.append( randomInt( 0, 10 ) ); } @@ -465,7 +467,7 @@ public class TrieTest { } private int randomInt( final int min, final int max ) { - return ThreadLocalRandom.current().nextInt( min, max ); + return current().nextInt( min, max ); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) { From 503a0f1c769b5d36c5dfa91f0958e6dd6c96e107 Mon Sep 17 00:00:00 2001 From: djarvis Date: Tue, 29 Nov 2016 18:38:00 -0800 Subject: [PATCH 2/3] Updated source base to leverage JDK 1.7 syntax. Added more final modifiers. Eliminated parameter modification inside method. Some formatting. Changed TrieBuilder to offer CharSequence instead of String; revised Trie accordingly. Removed some duplication. NetBeans automatically translated the code to use static imports (as per JDK 1.7 syntax). --- .../ahocorasick/interval/IntervalNode.java | 60 ++--- .../ahocorasick/interval/IntervalTree.java | 18 +- .../ahocorasick/interval/Intervalable.java | 1 - .../IntervalableComparatorByPosition.java | 2 +- .../IntervalableComparatorBySize.java | 4 +- src/main/java/org/ahocorasick/trie/Emit.java | 1 - .../java/org/ahocorasick/trie/MatchToken.java | 5 +- src/main/java/org/ahocorasick/trie/State.java | 74 +++--- src/main/java/org/ahocorasick/trie/Trie.java | 231 +++++++++--------- .../trie/handler/DefaultEmitHandler.java | 8 +- .../ahocorasick/interval/IntervalTest.java | 2 +- .../interval/IntervalTreeTest.java | 4 +- .../IntervalableComparatorByPositionTest.java | 11 +- .../IntervalableComparatorBySizeTest.java | 20 +- .../java/org/ahocorasick/trie/TrieTest.java | 144 ++++++----- 15 files changed, 292 insertions(+), 293 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index 22242fa..d875ea3 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -8,16 +8,16 @@ public class IntervalNode { private enum Direction {LEFT, RIGHT} - private IntervalNode left = null; - private IntervalNode right = null; + private IntervalNode left; + private IntervalNode right; private int point; - private List intervals = new ArrayList(); + private List intervals = new ArrayList<>(); - public IntervalNode(List intervals) { + public IntervalNode(final List intervals) { this.point = determineMedian(intervals); - List toLeft = new ArrayList(); - List toRight = new ArrayList(); + final List toLeft = new ArrayList<>(); + final List toRight = new ArrayList<>(); for (Intervalable interval : intervals) { if (interval.getEnd() < this.point) { @@ -37,7 +37,7 @@ public class IntervalNode { } } - public int determineMedian(List intervals) { + public int determineMedian(final List intervals) { int start = -1; int end = -1; for (Intervalable interval : intervals) { @@ -53,17 +53,19 @@ public class IntervalNode { return (start + end) / 2; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { + final List overlaps = new ArrayList<>(); - List overlaps = new ArrayList(); - - if (this.point < interval.getStart()) { // Tends to the right + if (this.point < interval.getStart()) { + // Tends to the right addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval)); - } else if (this.point > interval.getEnd()) { // Tends to the left + } else if (this.point > interval.getEnd()) { + // Tends to the left addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval)); - } else { // Somewhere in the middle + } else { + // Somewhere in the middle addToOverlaps(interval, overlaps, this.intervals); addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval)); addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval)); @@ -72,26 +74,30 @@ public class IntervalNode { return overlaps; } - protected void addToOverlaps(Intervalable interval, List overlaps, List newOverlaps) { - for (Intervalable currentInterval : newOverlaps) { + protected void addToOverlaps( + final Intervalable interval, + final List overlaps, + final List newOverlaps) { + for (final Intervalable currentInterval : newOverlaps) { if (!currentInterval.equals(interval)) { overlaps.add(currentInterval); } } } - protected List checkForOverlapsToTheLeft(Intervalable interval) { + protected List checkForOverlapsToTheLeft(final Intervalable interval) { return checkForOverlaps(interval, Direction.LEFT); } - protected List checkForOverlapsToTheRight(Intervalable interval) { + protected List checkForOverlapsToTheRight(final Intervalable interval) { return checkForOverlaps(interval, Direction.RIGHT); } - protected List checkForOverlaps(Intervalable interval, Direction direction) { - - List overlaps = new ArrayList(); - for (Intervalable currentInterval : this.intervals) { + protected List checkForOverlaps( + final Intervalable interval, final Direction direction) { + final List overlaps = new ArrayList<>(); + + for (final Intervalable currentInterval : this.intervals) { switch (direction) { case LEFT: if (currentInterval.getStart() <= interval.getEnd()) { @@ -105,15 +111,13 @@ public class IntervalNode { break; } } + return overlaps; } - - + protected List findOverlappingRanges(IntervalNode node, Intervalable interval) { - if (node != null) { - return node.findOverlaps(interval); - } - return Collections.emptyList(); + return node == null + ? Collections.emptyList() + : node.findOverlaps( interval ); } - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalTree.java b/src/main/java/org/ahocorasick/interval/IntervalTree.java index 40eeb0b..3be617e 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalTree.java +++ b/src/main/java/org/ahocorasick/interval/IntervalTree.java @@ -1,26 +1,26 @@ package org.ahocorasick.interval; -import java.util.Collections; +import static java.util.Collections.sort; import java.util.List; import java.util.Set; import java.util.TreeSet; public class IntervalTree { - private IntervalNode rootNode = null; + private final IntervalNode rootNode; public IntervalTree(List intervals) { this.rootNode = new IntervalNode(intervals); } - public List removeOverlaps(List intervals) { + public List removeOverlaps(final List intervals) { // Sort the intervals on size, then left-most position - Collections.sort(intervals, new IntervalableComparatorBySize()); + sort(intervals, new IntervalableComparatorBySize()); - Set removeIntervals = new TreeSet(); + final Set removeIntervals = new TreeSet<>(); - for (Intervalable interval : intervals) { + for (final Intervalable interval : intervals) { // If the interval was already removed, ignore it if (removeIntervals.contains(interval)) { continue; @@ -31,17 +31,17 @@ public class IntervalTree { } // Remove all intervals that were overlapping - for (Intervalable removeInterval : removeIntervals) { + for (final Intervalable removeInterval : removeIntervals) { intervals.remove(removeInterval); } // Sort the intervals, now on left-most position only - Collections.sort(intervals, new IntervalableComparatorByPosition()); + sort(intervals, new IntervalableComparatorByPosition()); return intervals; } - public List findOverlaps(Intervalable interval) { + public List findOverlaps(final Intervalable interval) { return rootNode.findOverlaps(interval); } diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index 0dd5f69..fed2982 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -7,5 +7,4 @@ public interface Intervalable extends Comparable { public int getEnd(); public int size(); - } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java index d144995..2dc0491 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorByPosition.java @@ -5,7 +5,7 @@ import java.util.Comparator; public class IntervalableComparatorByPosition implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { return intervalable.getStart() - intervalable2.getStart(); } diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java index 3814759..6a33fa8 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java @@ -5,11 +5,13 @@ import java.util.Comparator; public class IntervalableComparatorBySize implements Comparator { @Override - public int compare(Intervalable intervalable, Intervalable intervalable2) { + public int compare(final Intervalable intervalable, final Intervalable intervalable2) { int comparison = intervalable2.size() - intervalable.size(); + if (comparison == 0) { comparison = intervalable.getStart() - intervalable2.getStart(); } + return comparison; } diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..8c17253 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable { public String toString() { return super.toString() + "=" + this.keyword; } - } diff --git a/src/main/java/org/ahocorasick/trie/MatchToken.java b/src/main/java/org/ahocorasick/trie/MatchToken.java index c2615dc..851472c 100644 --- a/src/main/java/org/ahocorasick/trie/MatchToken.java +++ b/src/main/java/org/ahocorasick/trie/MatchToken.java @@ -2,9 +2,9 @@ package org.ahocorasick.trie; public class MatchToken extends Token { - private Emit emit; + private final Emit emit; - public MatchToken(String fragment, Emit emit) { + public MatchToken(final String fragment, final Emit emit) { super(fragment); this.emit = emit; } @@ -18,5 +18,4 @@ public class MatchToken extends Token { public Emit getEmit() { return this.emit; } - } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index e192207..fce324e 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -4,51 +4,43 @@ import java.util.*; /** *

- * A state has various important tasks it must attend to: + * A state has various important tasks it must attend to: *

- *

+ * *

    - *
  • success; when a character points to another state, it must return that state
  • - *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a - * state with less depth
  • - *
  • emits; when this state is passed and keywords have been matched, the matches must be - * 'emitted' so that they can be used later on.
  • + *
  • success; when a character points to another state, it must return that state
  • + *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a + * state with less depth
  • + *
  • emits; when this state is passed and keywords have been matched, the matches must be + * 'emitted' so that they can be used later on.
  • *
+ * *

- *

- * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' - * it will still parse the next character and start from the root node. This ensures that the algorithm - * always runs. All other states always have a fail state. + * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' + * it will still parse the next character and start from the root node. This ensures that the algorithm + * always runs. All other states always have a fail state. *

* * @author Robert Bor */ public class State { - /** - * effective the size of the keyword - */ + /** effective the size of the keyword */ private final int depth; - /** - * only used for the root state to refer to itself in case no matches have been found - */ + /** only used for the root state to refer to itself in case no matches have been found */ private final State rootState; /** * referred to in the white paper as the 'goto' structure. From a state it is possible to go * to other states, depending on the character passed. */ - private final Map success = new HashMap<>(); + private final Map success = new HashMap<>(); - /** - * if no matching states are found, the failure state will be returned - */ + /** if no matching states are found, the failure state will be returned */ private State failure; - /** - * whenever this state is reached, it will emit the matches keywords for future reference - */ + /** whenever this state is reached, it will emit the matches keywords for future reference */ private Set emits; public State() { @@ -62,11 +54,11 @@ public class State { private State nextState(final Character character, final boolean ignoreRootState) { State nextState = this.success.get(character); - + if (!ignoreRootState && nextState == null && this.rootState != null) { nextState = this.rootState; } - + return nextState; } @@ -74,24 +66,24 @@ public class State { return nextState(character, false); } - public State nextStateIgnoreRootState(Character character) { + public State nextStateIgnoreRootState(final Character character) { return nextState(character, true); } - - public State addState(String keyword) { - State state = this; - - for (final Character character : keyword.toCharArray()) { - state = state.addState(character); - } - - return state; + + public State addState(final String keyword ) { + State state = this; + + for (final Character character : keyword.toCharArray()) { + state = state.addState(character); + } + + return state; } - public State addState(Character character) { + public State addState(final Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { - nextState = new State(this.depth + 1); + nextState = new State(this.depth+1); this.success.put(character, nextState); } return nextState; @@ -101,21 +93,21 @@ public class State { return this.depth; } - public void addEmit(String keyword) { + public void addEmit(final String keyword) { if (this.emits == null) { this.emits = new TreeSet<>(); } this.emits.add(keyword); } - public void addEmit(Collection emits) { + public void addEmit(final Collection emits) { for (String emit : emits) { addEmit(emit); } } public Collection emit() { - return this.emits == null ? Collections.emptyList() : this.emits; + return this.emits == null ? Collections. emptyList() : this.emits; } public State failure() { diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 941fa4e..9127160 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,13 +1,13 @@ package org.ahocorasick.trie; +import static java.lang.Character.isAlphabetic; import static java.lang.Character.isWhitespace; - +import static java.lang.Character.toLowerCase; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Queue; import java.util.concurrent.LinkedBlockingDeque; - import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; @@ -16,7 +16,7 @@ import org.ahocorasick.trie.handler.EmitHandler; /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -29,65 +29,42 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. + * * @throws NullPointerException if the keyword is null. */ - private void addKeyword(String keyword) { - if (keyword.isEmpty()) { - return; - } + private void addKeyword( String keyword ) { + if( keyword.length() > 0 ) { + if( isCaseInsensitive() ) { + keyword = keyword.toLowerCase(); + } - if (isCaseInsensitive()) { - keyword = keyword.toLowerCase(); - } - - addState(keyword).addEmit(keyword); - } - - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords(final String[] keywords) { - for (final String keyword : keywords) { - addKeyword(keyword); + addState( keyword ).addEmit( keyword ); } } - /** - * Delegates to addKeyword. - * - * @param keywords List of search term to add to the list of search terms. - */ - private void addKeywords(final Collection keywords) { - for (final String keyword : keywords) { - addKeyword(keyword); - } + private State addState( final String keyword ) { + return getRootState().addState( keyword ); } - - private State addState(final String keyword) { - return getRootState().addState(keyword); - } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -95,12 +72,15 @@ public class Trie { return tokens; } - private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); + private Token createFragment( + final Emit emit, + final String text, + final int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); } - private Token createMatch(Emit emit, String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); + private Token createMatch(final Emit emit, final String text) { + return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); } @SuppressWarnings("unchecked") @@ -119,7 +99,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); + IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -132,15 +112,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -158,20 +138,20 @@ public class Trie { } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); + character = toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { - for (String emitStr : emitStrs) { + for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { @@ -184,26 +164,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -212,29 +192,30 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } } - private State getState(State currentState, final Character character) { - State newCurrentState = currentState.nextState(character); - - while (newCurrentState == null) { + private State getState(final State initialState, final Character character) { + State currentState = initialState; + State updatedState = currentState.nextState(character); + + while (updatedState == null) { currentState = currentState.failure(); - newCurrentState = currentState.nextState(character); + updatedState = currentState.nextState(character); } - - return newCurrentState; + + return updatedState; } private void constructFailureStates() { @@ -267,10 +248,13 @@ public class Trie { } } - private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { + private boolean storeEmits( + final int position, + final State currentState, + final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -278,27 +262,31 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** - * Provides a fluent interface for constructing Trie instances. - * + * Constructs a TrieBuilder instance for configuring the Trie using a fluent + * interface. + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { return new TrieBuilder(); } + /** + * Provides a fluent interface for constructing Trie instances. + */ public static class TrieBuilder { private final TrieConfig trieConfig = new TrieConfig(); @@ -308,71 +296,75 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() { - } + private TrieBuilder() {} /** * Adds a keyword to the Trie's list of text search keywords. - * + * * @param keyword The keyword to add to the list. + * * @return This builder. * @throws NullPointerException if the keyword is null. */ - public TrieBuilder addKeyword(final String keyword) { - this.trie.addKeyword(keyword); + public TrieBuilder addKeyword(final CharSequence keyword) { + getTrie().addKeyword( keyword.toString() ); return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. + * * @return This builder. */ - public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); + public TrieBuilder addKeywords(final CharSequence... keywords) { + for( final CharSequence keyword : keywords ) { + addKeyword( keyword ); + } + return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. + * * @return This builder. */ - public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + public TrieBuilder addKeywords(final Collection keywords) { + return addKeywords( keywords.toArray( new CharSequence[ keywords.size() ] ) ); } /** - * Configure the Trie to ignore case when searching for keywords in - * the text. - * + * Configure the Trie to ignore case when searching for keywords in the + * text. + * * @return This builder. */ public TrieBuilder ignoreCase() { - this.trieConfig.setCaseInsensitive(true); + getTrieConfig().setCaseInsensitive(true); return this; } /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { - this.trieConfig.setAllowOverlaps(false); + getTrieConfig().setAllowOverlaps(false); return this; } /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { - this.trieConfig.setOnlyWholeWords(true); + getTrieConfig().setOnlyWholeWords(true); return this; } @@ -380,37 +372,51 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { - this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); + getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true); return this; } - + /** - * Configure the Trie to stop after the first keyword is found in the - * text. - * + * Configure the Trie to stop searching for matches after the first + * keyword is found in the text. + * * @return This builder. */ - public TrieBuilder stopOnHit() { - trie.trieConfig.setStopOnHit(true); + public TrieBuilder onlyFirstMatch() { + getTrieConfig().setStopOnHit(true); return this; } /** - * Configure the Trie based on the builder settings. - * + * Construct the Trie using the builder settings. + * * @return The configured Trie. */ public Trie build() { - this.trie.constructFailureStates(); + getTrie().constructFailureStates(); + return getTrie(); + } + + private Trie getTrie() { return this.trie; } + + private TrieConfig getTrieConfig() { + return this.trieConfig; + } + + /** + * @deprecated Use onlyFirstMatch() + */ + public TrieBuilder stopOnHit() { + return onlyFirstMatch(); + } /** - * @return This builder. * @deprecated Use ignoreCase() */ public TrieBuilder caseInsensitive() { @@ -418,7 +424,6 @@ public class Trie { } /** - * @return This builder. * @deprecated Use ignoreOverlaps() */ public TrieBuilder removeOverlaps() { diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java index 656d1e2..dd3e7a5 100644 --- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -1,21 +1,19 @@ package org.ahocorasick.trie.handler; -import org.ahocorasick.trie.Emit; - import java.util.ArrayList; import java.util.List; +import org.ahocorasick.trie.Emit; public class DefaultEmitHandler implements EmitHandler { - private List emits = new ArrayList<>(); + private final List emits = new ArrayList<>(); @Override - public void emit(Emit emit) { + public void emit(final Emit emit) { this.emits.add(emit); } public List getEmits() { return this.emits; } - } diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index ff9b0c3..67d42d5 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -44,7 +44,7 @@ public class IntervalTest { @Test public void comparable() { - Set intervals = new TreeSet(); + Set intervals = new TreeSet<>(); intervals.add(new Interval(4, 6)); intervals.add(new Interval(2, 7)); intervals.add(new Interval(3, 4)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java index 96c3670..fc41a3e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java @@ -12,7 +12,7 @@ public class IntervalTreeTest { @Test public void findOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(1, 3)); intervals.add(new Interval(2, 4)); @@ -30,7 +30,7 @@ public class IntervalTreeTest { @Test public void removeOverlaps() { - List intervals = new ArrayList(); + List intervals = new ArrayList<>(); intervals.add(new Interval(0, 2)); intervals.add(new Interval(4, 5)); intervals.add(new Interval(2, 10)); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index a36c831..1dd6dc1 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -4,6 +4,7 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Collections; +import static java.util.Collections.sort; import java.util.List; import static junit.framework.Assert.assertEquals; @@ -12,11 +13,11 @@ public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { - List intervals = new ArrayList(); - intervals.add(new Interval(4, 5)); - intervals.add(new Interval(1, 4)); - intervals.add(new Interval(3, 8)); - Collections.sort(intervals, new IntervalableComparatorByPosition()); + List intervals = new ArrayList<>(); + intervals.add(new Interval(4,5)); + intervals.add(new Interval(1,4)); + intervals.add(new Interval(3,8)); + sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 8fc7db1..a0db17e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -4,6 +4,8 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Collections; +import static java.util.Collections.sort; +import static java.util.Collections.sort; import java.util.List; import static junit.framework.Assert.assertEquals; @@ -12,11 +14,11 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { - List intervals = new ArrayList(); - intervals.add(new Interval(4, 5)); - intervals.add(new Interval(1, 4)); - intervals.add(new Interval(3, 8)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + List intervals = new ArrayList<>(); + intervals.add(new Interval(4,5)); + intervals.add(new Interval(1,4)); + intervals.add(new Interval(3,8)); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); assertEquals(2, intervals.get(2).size()); @@ -24,10 +26,10 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { - List intervals = new ArrayList(); - intervals.add(new Interval(4, 7)); - intervals.add(new Interval(2, 5)); - Collections.sort(intervals, new IntervalableComparatorBySize()); + List intervals = new ArrayList<>(); + intervals.add(new Interval(4,7)); + intervals.add(new Interval(2,5)); + sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); } diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index e6129ce..e9d75d9 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -5,39 +5,37 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.concurrent.ThreadLocalRandom; - +import static java.util.concurrent.ThreadLocalRandom.current; import static junit.framework.Assert.assertEquals; - +import static org.ahocorasick.trie.Trie.builder; import org.ahocorasick.trie.handler.EmitHandler; - import static org.junit.Assert.assertTrue; - import org.junit.Test; public class TrieTest { private final static String[] ALPHABET = new String[]{ - "abc", "bcd", "cde" + "abc", "bcd", "cde" }; - + private final static String[] PRONOUNS = new String[]{ - "hers", "his", "she", "he" + "hers", "his", "she", "he" }; private final static String[] FOOD = new String[]{ - "veal", "cauliflower", "broccoli", "tomatoes" + "veal", "cauliflower", "broccoli", "tomatoes" }; private final static String[] GREEK_LETTERS = new String[]{ - "Alpha", "Beta", "Gamma" + "Alpha", "Beta", "Gamma" }; - + private final static String[] UNICODE = new String[]{ - "turning", "once", "again", "börkü" + "turning", "once", "again", "börkü" }; @Test public void keywordAndTextAreTheSame() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(ALPHABET[0]); @@ -47,7 +45,7 @@ public class TrieTest { @Test public void keywordAndTextAreTheSameFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(ALPHABET[0]); @@ -56,7 +54,7 @@ public class TrieTest { @Test public void textIsLongerThanKeyword() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Collection emits = trie.parseText(" " + ALPHABET[0]); @@ -66,7 +64,7 @@ public class TrieTest { @Test public void textIsLongerThanKeywordFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword(ALPHABET[0]) .build(); Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]); @@ -75,7 +73,7 @@ public class TrieTest { @Test public void variousKeywordsOneMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Collection emits = trie.parseText("bcd"); @@ -85,7 +83,7 @@ public class TrieTest { @Test public void variousKeywordsFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(ALPHABET) .build(); Emit firstMatch = trie.firstMatch("bcd"); @@ -94,7 +92,7 @@ public class TrieTest { @Test public void ushersTestAndStopOnHit() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .stopOnHit() .build(); @@ -107,7 +105,7 @@ public class TrieTest { @Test public void ushersTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Collection emits = trie.parseText("ushers"); @@ -120,7 +118,7 @@ public class TrieTest { @Test public void ushersTestWithCapitalKeywords() { - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .addKeyword("HERS") .addKeyword("HIS") @@ -137,7 +135,7 @@ public class TrieTest { @Test public void ushersTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); Emit firstMatch = trie.firstMatch("ushers"); @@ -146,7 +144,7 @@ public class TrieTest { @Test public void ushersTestByCallback() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(PRONOUNS) .build(); @@ -168,7 +166,7 @@ public class TrieTest { @Test public void misleadingTest() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Collection emits = trie.parseText("h he her hers"); @@ -178,7 +176,7 @@ public class TrieTest { @Test public void misleadingTestFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("hers") .build(); Emit firstMatch = trie.firstMatch("h he her hers"); @@ -187,7 +185,7 @@ public class TrieTest { @Test public void recipes() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Collection emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -200,7 +198,7 @@ public class TrieTest { @Test public void recipesFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(FOOD) .build(); Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); @@ -210,7 +208,7 @@ public class TrieTest { @Test public void longAndShortOverlappingMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeyword("he") .addKeyword("hehehehe") .build(); @@ -227,7 +225,7 @@ public class TrieTest { @Test public void nonOverlapping() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -242,7 +240,7 @@ public class TrieTest { @Test public void nonOverlappingFirstMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -254,7 +252,7 @@ public class TrieTest { @Test public void containsMatch() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("ab") .addKeyword("cba") .addKeyword("ababc") @@ -264,7 +262,7 @@ public class TrieTest { @Test public void startOfChurchillSpeech() { - Trie trie = Trie.builder().removeOverlaps() + Trie trie = builder().removeOverlaps() .addKeyword("T") .addKeyword("u") .addKeyword("ur") @@ -282,7 +280,7 @@ public class TrieTest { @Test public void partialMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -293,7 +291,7 @@ public class TrieTest { @Test public void partialMatchFirstMatch() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWords() .addKeyword("sugar") .build(); @@ -304,7 +302,7 @@ public class TrieTest { @Test public void tokenizeFullSentence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); @@ -322,7 +320,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/5 @Test public void testStringIndexOutOfBoundsException() { - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -336,7 +334,7 @@ public class TrieTest { @Test public void testIgnoreCase() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Collection emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); @@ -350,7 +348,7 @@ public class TrieTest { @Test public void testIgnoreCaseFirstMatch() { - Trie trie = Trie.builder().ignoreCase() + Trie trie = builder().ignoreCase() .addKeywords(UNICODE) .build(); Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); @@ -360,7 +358,7 @@ public class TrieTest { @Test public void tokenizeTokensInSequence() { - Trie trie = Trie.builder() + Trie trie = builder() .addKeywords(GREEK_LETTERS) .build(); Collection tokens = trie.tokenize("Alpha Beta Gamma"); @@ -370,7 +368,7 @@ public class TrieTest { // @see https://github.com/robert-bor/aho-corasick/issues/7 @Test public void testZeroLength() { - Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase() + Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase() .addKeyword("") .build(); trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); @@ -381,7 +379,7 @@ public class TrieTest { public void testUnicode1() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char assertEquals("THIS", target.substring(5, 9)); // Java does it the right way - Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + Trie trie = builder().ignoreCase().onlyWholeWords() .addKeyword("this") .build(); Collection emits = trie.parseText(target); @@ -394,7 +392,7 @@ public class TrieTest { @Test public void testUnicode2() { String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char - Trie trie = Trie.builder() + Trie trie = builder() .ignoreCase() .onlyWholeWords() .addKeyword("this") @@ -406,11 +404,11 @@ public class TrieTest { @Test public void testPartialMatchWhiteSpaces() { - Trie trie = Trie.builder() + Trie trie = builder() .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); - Collection emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test + Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test assertEquals(1, emits.size()); // Match must not be made checkEmit(emits.iterator().next(), 0, 9, "#sugar-123"); } @@ -419,57 +417,57 @@ public class TrieTest { public void testLargeString() { final int interval = 100; final int textSize = 1000000; - final String keyword = FOOD[1]; - final StringBuilder text = randomNumbers(textSize); + final String keyword = FOOD[ 1 ]; + final StringBuilder text = randomNumbers( textSize ); - injectKeyword(text, keyword, interval); + injectKeyword( text, keyword, interval ); - Trie trie = Trie.builder() - .onlyWholeWords() - .addKeyword(keyword) - .build(); + Trie trie = builder() + .onlyWholeWords() + .addKeyword( keyword ) + .build(); - final Collection emits = trie.parseText(text); + final Collection emits = trie.parseText( text ); - assertEquals(textSize / interval, emits.size()); + assertEquals( textSize / interval, emits.size() ); } - + /** * Generates a random sequence of ASCII numbers. - * + * * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers(int count) { - final StringBuilder sb = new StringBuilder(count); - - while (--count > 0) { - sb.append(randomInt(0, 10)); + private StringBuilder randomNumbers( final int count ) { + final StringBuilder sb = new StringBuilder( count ); + + for( int i = count - 1; i >= 0; i-- ) { + sb.append( randomInt( 0, 10 ) ); } return sb; } - + /** * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match - * any keyword. - * @param keyword A keyword to inject repeatedly in the text. + * + * @param source Should contain a bunch of random data that cannot match + * any keyword. + * @param keyword A keyword to inject repeatedly in the text. * @param interval How often to inject the keyword. */ - private void injectKeyword( - final StringBuilder source, - final String keyword, - final int interval) { + private void injectKeyword( + final StringBuilder source, + final String keyword, + final int interval ) { final int length = source.length(); - for (int i = 0; i < length; i += interval) { - source.replace(i, i + keyword.length(), keyword); + for( int i = 0; i < length; i += interval ) { + source.replace( i, i + keyword.length(), keyword ); } } - - private int randomInt(final int min, final int max) { - return ThreadLocalRandom.current().nextInt(min, max); + + private int randomInt( final int min, final int max ) { + return current().nextInt( min, max ); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) { From b5aaa51fdd587fc7d7bf93a154c02ed95a535eef Mon Sep 17 00:00:00 2001 From: robert-bor Date: Wed, 30 Nov 2016 09:10:21 +0100 Subject: [PATCH 3/3] Optimize imports Reformatted code (Java convention; tab is 4 spaces) --- pom.xml | 3 +- .../ahocorasick/interval/IntervalNode.java | 18 +-- .../ahocorasick/interval/IntervalTree.java | 3 +- .../IntervalableComparatorBySize.java | 4 +- src/main/java/org/ahocorasick/trie/State.java | 70 ++++---- src/main/java/org/ahocorasick/trie/Trie.java | 153 +++++++++--------- .../trie/handler/DefaultEmitHandler.java | 3 +- .../ahocorasick/interval/IntervalTest.java | 8 +- .../IntervalableComparatorByPositionTest.java | 9 +- .../IntervalableComparatorBySizeTest.java | 14 +- .../java/org/ahocorasick/trie/StateTest.java | 1 - .../java/org/ahocorasick/trie/TrieTest.java | 81 +++++----- 12 files changed, 187 insertions(+), 180 deletions(-) diff --git a/pom.xml b/pom.xml index a997e98..806ef42 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.ahocorasick diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index d875ea3..92727c9 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -75,9 +75,9 @@ public class IntervalNode { } protected void addToOverlaps( - final Intervalable interval, - final List overlaps, - final List newOverlaps) { + final Intervalable interval, + final List overlaps, + final List newOverlaps) { for (final Intervalable currentInterval : newOverlaps) { if (!currentInterval.equals(interval)) { overlaps.add(currentInterval); @@ -94,9 +94,9 @@ public class IntervalNode { } protected List checkForOverlaps( - final Intervalable interval, final Direction direction) { + final Intervalable interval, final Direction direction) { final List overlaps = new ArrayList<>(); - + for (final Intervalable currentInterval : this.intervals) { switch (direction) { case LEFT: @@ -111,13 +111,13 @@ public class IntervalNode { break; } } - + return overlaps; } - + protected List findOverlappingRanges(IntervalNode node, Intervalable interval) { return node == null - ? Collections.emptyList() - : node.findOverlaps( interval ); + ? Collections.emptyList() + : node.findOverlaps(interval); } } diff --git a/src/main/java/org/ahocorasick/interval/IntervalTree.java b/src/main/java/org/ahocorasick/interval/IntervalTree.java index 3be617e..4dc43b9 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalTree.java +++ b/src/main/java/org/ahocorasick/interval/IntervalTree.java @@ -1,10 +1,11 @@ package org.ahocorasick.interval; -import static java.util.Collections.sort; import java.util.List; import java.util.Set; import java.util.TreeSet; +import static java.util.Collections.sort; + public class IntervalTree { private final IntervalNode rootNode; diff --git a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java index 6a33fa8..8b51ed1 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java +++ b/src/main/java/org/ahocorasick/interval/IntervalableComparatorBySize.java @@ -7,11 +7,11 @@ public class IntervalableComparatorBySize implements Comparator { @Override public int compare(final Intervalable intervalable, final Intervalable intervalable2) { int comparison = intervalable2.size() - intervalable.size(); - + if (comparison == 0) { comparison = intervalable.getStart() - intervalable2.getStart(); } - + return comparison; } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index fce324e..8cf3e88 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -4,43 +4,51 @@ import java.util.*; /** *

- * A state has various important tasks it must attend to: + * A state has various important tasks it must attend to: *

- * - *
    - *
  • success; when a character points to another state, it must return that state
  • - *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a - * state with less depth
  • - *
  • emits; when this state is passed and keywords have been matched, the matches must be - * 'emitted' so that they can be used later on.
  • - *
- * *

- * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' - * it will still parse the next character and start from the root node. This ensures that the algorithm - * always runs. All other states always have a fail state. + *

    + *
  • success; when a character points to another state, it must return that state
  • + *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a + * state with less depth
  • + *
  • emits; when this state is passed and keywords have been matched, the matches must be + * 'emitted' so that they can be used later on.
  • + *
+ *

+ *

+ * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' + * it will still parse the next character and start from the root node. This ensures that the algorithm + * always runs. All other states always have a fail state. *

* * @author Robert Bor */ public class State { - /** effective the size of the keyword */ + /** + * effective the size of the keyword + */ private final int depth; - /** only used for the root state to refer to itself in case no matches have been found */ + /** + * only used for the root state to refer to itself in case no matches have been found + */ private final State rootState; /** * referred to in the white paper as the 'goto' structure. From a state it is possible to go * to other states, depending on the character passed. */ - private final Map success = new HashMap<>(); + private final Map success = new HashMap<>(); - /** if no matching states are found, the failure state will be returned */ + /** + * if no matching states are found, the failure state will be returned + */ private State failure; - /** whenever this state is reached, it will emit the matches keywords for future reference */ + /** + * whenever this state is reached, it will emit the matches keywords for future reference + */ private Set emits; public State() { @@ -54,11 +62,11 @@ public class State { private State nextState(final Character character, final boolean ignoreRootState) { State nextState = this.success.get(character); - + if (!ignoreRootState && nextState == null && this.rootState != null) { nextState = this.rootState; } - + return nextState; } @@ -69,21 +77,21 @@ public class State { public State nextStateIgnoreRootState(final Character character) { return nextState(character, true); } - - public State addState(final String keyword ) { - State state = this; - - for (final Character character : keyword.toCharArray()) { - state = state.addState(character); - } - - return state; + + public State addState(final String keyword) { + State state = this; + + for (final Character character : keyword.toCharArray()) { + state = state.addState(character); + } + + return state; } public State addState(final Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { - nextState = new State(this.depth+1); + nextState = new State(this.depth + 1); this.success.put(character, nextState); } return nextState; @@ -107,7 +115,7 @@ public class State { } public Collection emit() { - return this.emits == null ? Collections. emptyList() : this.emits; + return this.emits == null ? Collections.emptyList() : this.emits; } public State failure() { diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 9127160..eee9216 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,22 +1,24 @@ package org.ahocorasick.trie; -import static java.lang.Character.isAlphabetic; -import static java.lang.Character.isWhitespace; -import static java.lang.Character.toLowerCase; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingDeque; import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingDeque; + +import static java.lang.Character.*; + +import java.lang.Character; + /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -29,42 +31,41 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. - * * @throws NullPointerException if the keyword is null. */ - private void addKeyword( String keyword ) { - if( keyword.length() > 0 ) { - if( isCaseInsensitive() ) { + private void addKeyword(String keyword) { + if (keyword.length() > 0) { + if (isCaseInsensitive()) { keyword = keyword.toLowerCase(); } - addState( keyword ).addEmit( keyword ); + addState(keyword).addEmit(keyword); } } - private State addState( final String keyword ) { - return getRootState().addState( keyword ); + private State addState(final String keyword) { + return getRootState().addState(keyword); } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -73,14 +74,14 @@ public class Trie { } private Token createFragment( - final Emit emit, - final String text, - final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); + final Emit emit, + final String text, + final int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(final Emit emit, final String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } @SuppressWarnings("unchecked") @@ -99,7 +100,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); + IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -112,15 +113,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -138,18 +139,18 @@ public class Trie { } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); @@ -164,26 +165,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -192,15 +193,15 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -209,12 +210,12 @@ public class Trie { private State getState(final State initialState, final Character character) { State currentState = initialState; State updatedState = currentState.nextState(character); - + while (updatedState == null) { currentState = currentState.failure(); updatedState = currentState.nextState(character); } - + return updatedState; } @@ -249,12 +250,12 @@ public class Trie { } private boolean storeEmits( - final int position, - final State currentState, - final EmitHandler emitHandler) { + final int position, + final State currentState, + final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -262,22 +263,22 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** * Constructs a TrieBuilder instance for configuring the Trie using a fluent * interface. - * + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { @@ -296,31 +297,30 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() {} + private TrieBuilder() { + } /** * Adds a keyword to the Trie's list of text search keywords. - * + * * @param keyword The keyword to add to the list. - * * @return This builder. * @throws NullPointerException if the keyword is null. */ public TrieBuilder addKeyword(final CharSequence keyword) { - getTrie().addKeyword( keyword.toString() ); + getTrie().addKeyword(keyword.toString()); return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final CharSequence... keywords) { - for( final CharSequence keyword : keywords ) { - addKeyword( keyword ); + for (final CharSequence keyword : keywords) { + addKeyword(keyword); } return this; @@ -328,19 +328,18 @@ public class Trie { /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final Collection keywords) { - return addKeywords( keywords.toArray( new CharSequence[ keywords.size() ] ) ); + return addKeywords(keywords.toArray(new CharSequence[keywords.size()])); } /** * Configure the Trie to ignore case when searching for keywords in the * text. - * + * * @return This builder. */ public TrieBuilder ignoreCase() { @@ -350,7 +349,7 @@ public class Trie { /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { @@ -360,7 +359,7 @@ public class Trie { /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { @@ -372,18 +371,18 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true); return this; } - + /** * Configure the Trie to stop searching for matches after the first * keyword is found in the text. - * + * * @return This builder. */ public TrieBuilder onlyFirstMatch() { @@ -393,27 +392,27 @@ public class Trie { /** * Construct the Trie using the builder settings. - * + * * @return The configured Trie. */ public Trie build() { getTrie().constructFailureStates(); return getTrie(); } - + private Trie getTrie() { return this.trie; } - + private TrieConfig getTrieConfig() { return this.trieConfig; } - + /** * @deprecated Use onlyFirstMatch() */ public TrieBuilder stopOnHit() { - return onlyFirstMatch(); + return onlyFirstMatch(); } /** diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java index dd3e7a5..4531f3d 100644 --- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -1,8 +1,9 @@ package org.ahocorasick.trie.handler; +import org.ahocorasick.trie.Emit; + import java.util.ArrayList; import java.util.List; -import org.ahocorasick.trie.Emit; public class DefaultEmitHandler implements EmitHandler { diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index 67d42d5..4a3598e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -2,11 +2,11 @@ package org.ahocorasick.interval; import org.junit.Test; -import java.util.*; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; -import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.*; public class IntervalTest { diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index 1dd6dc1..40ad64e 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -3,10 +3,9 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; -import java.util.Collections; -import static java.util.Collections.sort; import java.util.List; +import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorByPositionTest { @@ -14,9 +13,9 @@ public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { List intervals = new ArrayList<>(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index a0db17e..31fc84d 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -3,11 +3,9 @@ package org.ahocorasick.interval; import org.junit.Test; import java.util.ArrayList; -import java.util.Collections; -import static java.util.Collections.sort; -import static java.util.Collections.sort; import java.util.List; +import static java.util.Collections.sort; import static junit.framework.Assert.assertEquals; public class IntervalableComparatorBySizeTest { @@ -15,9 +13,9 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { List intervals = new ArrayList<>(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); @@ -27,8 +25,8 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { List intervals = new ArrayList<>(); - intervals.add(new Interval(4,7)); - intervals.add(new Interval(2,5)); + intervals.add(new Interval(4, 7)); + intervals.add(new Interval(2, 5)); sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); diff --git a/src/test/java/org/ahocorasick/trie/StateTest.java b/src/test/java/org/ahocorasick/trie/StateTest.java index a9cc745..2694305 100644 --- a/src/test/java/org/ahocorasick/trie/StateTest.java +++ b/src/test/java/org/ahocorasick/trie/StateTest.java @@ -1,6 +1,5 @@ package org.ahocorasick.trie; -import org.ahocorasick.trie.State; import org.junit.Test; import static junit.framework.Assert.assertEquals; diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index e9d75d9..00070d3 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,36 +1,37 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.handler.EmitHandler; +import org.junit.Test; + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.concurrent.ThreadLocalRandom; + import static java.util.concurrent.ThreadLocalRandom.current; import static junit.framework.Assert.assertEquals; import static org.ahocorasick.trie.Trie.builder; -import org.ahocorasick.trie.handler.EmitHandler; import static org.junit.Assert.assertTrue; -import org.junit.Test; public class TrieTest { private final static String[] ALPHABET = new String[]{ - "abc", "bcd", "cde" + "abc", "bcd", "cde" }; - + private final static String[] PRONOUNS = new String[]{ - "hers", "his", "she", "he" + "hers", "his", "she", "he" }; private final static String[] FOOD = new String[]{ - "veal", "cauliflower", "broccoli", "tomatoes" + "veal", "cauliflower", "broccoli", "tomatoes" }; private final static String[] GREEK_LETTERS = new String[]{ - "Alpha", "Beta", "Gamma" + "Alpha", "Beta", "Gamma" }; - + private final static String[] UNICODE = new String[]{ - "turning", "once", "again", "börkü" + "turning", "once", "again", "börkü" }; @Test @@ -408,7 +409,7 @@ public class TrieTest { .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); - Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test + Collection emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test assertEquals(1, emits.size()); // Match must not be made checkEmit(emits.iterator().next(), 0, 9, "#sugar-123"); } @@ -417,57 +418,57 @@ public class TrieTest { public void testLargeString() { final int interval = 100; final int textSize = 1000000; - final String keyword = FOOD[ 1 ]; - final StringBuilder text = randomNumbers( textSize ); + final String keyword = FOOD[1]; + final StringBuilder text = randomNumbers(textSize); - injectKeyword( text, keyword, interval ); + injectKeyword(text, keyword, interval); Trie trie = builder() - .onlyWholeWords() - .addKeyword( keyword ) - .build(); + .onlyWholeWords() + .addKeyword(keyword) + .build(); - final Collection emits = trie.parseText( text ); + final Collection emits = trie.parseText(text); - assertEquals( textSize / interval, emits.size() ); + assertEquals(textSize / interval, emits.size()); } - + /** * Generates a random sequence of ASCII numbers. - * + * * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers( final int count ) { - final StringBuilder sb = new StringBuilder( count ); - - for( int i = count - 1; i >= 0; i-- ) { - sb.append( randomInt( 0, 10 ) ); + private StringBuilder randomNumbers(final int count) { + final StringBuilder sb = new StringBuilder(count); + + for (int i = count - 1; i >= 0; i--) { + sb.append(randomInt(0, 10)); } return sb; } - + /** * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match - * any keyword. - * @param keyword A keyword to inject repeatedly in the text. + * + * @param source Should contain a bunch of random data that cannot match + * any keyword. + * @param keyword A keyword to inject repeatedly in the text. * @param interval How often to inject the keyword. */ - private void injectKeyword( - final StringBuilder source, - final String keyword, - final int interval ) { + private void injectKeyword( + final StringBuilder source, + final String keyword, + final int interval) { final int length = source.length(); - for( int i = 0; i < length; i += interval ) { - source.replace( i, i + keyword.length(), keyword ); + for (int i = 0; i < length; i += interval) { + source.replace(i, i + keyword.length(), keyword); } } - - private int randomInt( final int min, final int max ) { - return current().nextInt( min, max ); + + private int randomInt(final int min, final int max) { + return current().nextInt(min, max); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {