From 267e8950592e685b0fc21c2cf6dad7db2775356a Mon Sep 17 00:00:00 2001 From: djarvis Date: Tue, 29 Nov 2016 22:34:55 -0800 Subject: [PATCH 1/2] Added missing override annotations. Added final modifier to Interval member variables. Updated documentation for ignoreCase (issue #33) and moved the ignore methods to the top of the builder to reflect their preferred calling order. --- .../org/ahocorasick/interval/Interval.java | 8 ++- src/main/java/org/ahocorasick/trie/Emit.java | 1 - src/main/java/org/ahocorasick/trie/Trie.java | 55 +++++++++++-------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/ahocorasick/interval/Interval.java b/src/main/java/org/ahocorasick/interval/Interval.java index 50254cf..b6b756a 100644 --- a/src/main/java/org/ahocorasick/interval/Interval.java +++ b/src/main/java/org/ahocorasick/interval/Interval.java @@ -2,8 +2,8 @@ package org.ahocorasick.interval; public class Interval implements Intervalable { - private int start; - private int end; + private final int start; + private final int end; /** * Constructs an interval with a start and end position. @@ -21,6 +21,7 @@ public class Interval implements Intervalable { * * @return A number between 0 (start of text) and the text length. */ + @Override public int getStart() { return this.start; } @@ -30,6 +31,7 @@ public class Interval implements Intervalable { * * @return A number between getStart() + 1 and the text length. */ + @Override public int getEnd() { return this.end; } @@ -39,6 +41,7 @@ public class Interval implements Intervalable { * * @return The end position less the start position, plus one. */ + @Override public int size() { return end - start + 1; } @@ -47,6 +50,7 @@ public class Interval implements Intervalable { * Answers whether the given interval overlaps this interval * instance. * + * @param other * @return true The intervals overlap. */ public boolean overlapsWith(final Interval other) { diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..8c17253 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable { public String toString() { return super.toString() + "=" + this.keyword; } - } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 88097a5..6df993d 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -147,10 +147,17 @@ public class Trie { } } + /** + * The first matching text sequence. + * + * @param text The text to search for keywords. + * @return null if no matches found. + */ public Emit firstMatch(final CharSequence text) { if (!trieConfig.isAllowOverlaps()) { // Slow path. Needs to find all the matches to detect overlaps. - Collection parseText = parseText(text); + final Collection parseText = parseText(text); + if (parseText != null && !parseText.isEmpty()) { return parseText.iterator().next(); } @@ -170,7 +177,7 @@ public class Trie { Collection emitStrs = currentState.emit(); if (emitStrs != null && !emitStrs.isEmpty()) { - for (String emitStr : emitStrs) { + for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); if (trieConfig.isOnlyWholeWords()) { if (!isPartialMatch(text, emit)) { @@ -309,6 +316,29 @@ public class Trie { */ private TrieBuilder() {} + /** + * Configure the Trie to ignore case when searching for keywords in + * the text. This must be called before calling addKeyword because + * the algorithm converts keywords to lowercase as they are added, + * depending on this case sensitivity setting. + * + * @return This builder. + */ + public TrieBuilder ignoreCase() { + this.trieConfig.setCaseInsensitive(true); + return this; + } + + /** + * Configure the Trie to ignore overlapping keywords. + * + * @return This builder. + */ + public TrieBuilder ignoreOverlaps() { + this.trieConfig.setAllowOverlaps(false); + return this; + } + /** * Adds a keyword to the Trie's list of text search keywords. * @@ -346,27 +376,6 @@ public class Trie { return this; } - /** - * Configure the Trie to ignore case when searching for keywords in - * the text. - * - * @return This builder. - */ - public TrieBuilder ignoreCase() { - this.trieConfig.setCaseInsensitive(true); - return this; - } - - /** - * Configure the Trie to ignore overlapping keywords. - * - * @return This builder. - */ - public TrieBuilder ignoreOverlaps() { - this.trieConfig.setAllowOverlaps(false); - return this; - } - /** * Configure the Trie to match whole keywords in the text. * From 255069624b60526844a61c2d7f158762febe9249 Mon Sep 17 00:00:00 2001 From: robert-bor Date: Wed, 30 Nov 2016 12:07:03 +0100 Subject: [PATCH 2/2] Optimize imports Reformatted code (Java convention; tab is 4 spaces) --- pom.xml | 3 +- .../org/ahocorasick/interval/Interval.java | 10 +- .../ahocorasick/interval/IntervalNode.java | 6 +- .../ahocorasick/interval/Intervalable.java | 2 + src/main/java/org/ahocorasick/trie/State.java | 70 ++++---- src/main/java/org/ahocorasick/trie/Trie.java | 159 +++++++++--------- .../java/org/ahocorasick/trie/TrieConfig.java | 12 +- .../ahocorasick/interval/IntervalTest.java | 16 +- .../interval/IntervalTreeTest.java | 6 +- .../IntervalableComparatorByPositionTest.java | 6 +- .../IntervalableComparatorBySizeTest.java | 10 +- .../java/org/ahocorasick/trie/StateTest.java | 7 +- .../java/org/ahocorasick/trie/TrieTest.java | 78 ++++----- 13 files changed, 200 insertions(+), 185 deletions(-) diff --git a/pom.xml b/pom.xml index 919b66d..1d2c17b 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.ahocorasick diff --git a/src/main/java/org/ahocorasick/interval/Interval.java b/src/main/java/org/ahocorasick/interval/Interval.java index b6b756a..e3b0012 100644 --- a/src/main/java/org/ahocorasick/interval/Interval.java +++ b/src/main/java/org/ahocorasick/interval/Interval.java @@ -9,7 +9,7 @@ public class Interval implements Intervalable { * Constructs an interval with a start and end position. * * @param start The interval's starting text position. - * @param end The interval's ending text position. + * @param end The interval's ending text position. */ public Interval(final int start, final int end) { this.start = start; @@ -55,7 +55,7 @@ public class Interval implements Intervalable { */ public boolean overlapsWith(final Interval other) { return this.start <= other.getEnd() && - this.end >= other.getStart(); + this.end >= other.getStart(); } public boolean overlapsWith(int point) { @@ -67,9 +67,9 @@ public class Interval implements Intervalable { if (!(o instanceof Intervalable)) { return false; } - Intervalable other = (Intervalable)o; + Intervalable other = (Intervalable) o; return this.start == other.getStart() && - this.end == other.getEnd(); + this.end == other.getEnd(); } @Override @@ -82,7 +82,7 @@ public class Interval implements Intervalable { if (!(o instanceof Intervalable)) { return -1; } - Intervalable other = (Intervalable)o; + Intervalable other = (Intervalable) o; int comparison = this.start - other.getStart(); return comparison != 0 ? comparison : this.end - other.getEnd(); } diff --git a/src/main/java/org/ahocorasick/interval/IntervalNode.java b/src/main/java/org/ahocorasick/interval/IntervalNode.java index 11db0ae..22242fa 100644 --- a/src/main/java/org/ahocorasick/interval/IntervalNode.java +++ b/src/main/java/org/ahocorasick/interval/IntervalNode.java @@ -6,7 +6,7 @@ import java.util.List; public class IntervalNode { - private enum Direction { LEFT, RIGHT } + private enum Direction {LEFT, RIGHT} private IntervalNode left = null; private IntervalNode right = null; @@ -93,12 +93,12 @@ public class IntervalNode { List overlaps = new ArrayList(); for (Intervalable currentInterval : this.intervals) { switch (direction) { - case LEFT : + case LEFT: if (currentInterval.getStart() <= interval.getEnd()) { overlaps.add(currentInterval); } break; - case RIGHT : + case RIGHT: if (currentInterval.getEnd() >= interval.getStart()) { overlaps.add(currentInterval); } diff --git a/src/main/java/org/ahocorasick/interval/Intervalable.java b/src/main/java/org/ahocorasick/interval/Intervalable.java index 286a232..0dd5f69 100644 --- a/src/main/java/org/ahocorasick/interval/Intervalable.java +++ b/src/main/java/org/ahocorasick/interval/Intervalable.java @@ -3,7 +3,9 @@ package org.ahocorasick.interval; public interface Intervalable extends Comparable { public int getStart(); + public int getEnd(); + public int size(); } diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index 0055d91..e192207 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -4,43 +4,51 @@ import java.util.*; /** *

- * A state has various important tasks it must attend to: + * A state has various important tasks it must attend to: *

- * - *
    - *
  • success; when a character points to another state, it must return that state
  • - *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a - * state with less depth
  • - *
  • emits; when this state is passed and keywords have been matched, the matches must be - * 'emitted' so that they can be used later on.
  • - *
- * *

- * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' - * it will still parse the next character and start from the root node. This ensures that the algorithm - * always runs. All other states always have a fail state. + *

    + *
  • success; when a character points to another state, it must return that state
  • + *
  • failure; when a character has no matching state, the algorithm must be able to fall back on a + * state with less depth
  • + *
  • emits; when this state is passed and keywords have been matched, the matches must be + * 'emitted' so that they can be used later on.
  • + *
+ *

+ *

+ * The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails' + * it will still parse the next character and start from the root node. This ensures that the algorithm + * always runs. All other states always have a fail state. *

* * @author Robert Bor */ public class State { - /** effective the size of the keyword */ + /** + * effective the size of the keyword + */ private final int depth; - /** only used for the root state to refer to itself in case no matches have been found */ + /** + * only used for the root state to refer to itself in case no matches have been found + */ private final State rootState; /** * referred to in the white paper as the 'goto' structure. From a state it is possible to go * to other states, depending on the character passed. */ - private final Map success = new HashMap<>(); + private final Map success = new HashMap<>(); - /** if no matching states are found, the failure state will be returned */ + /** + * if no matching states are found, the failure state will be returned + */ private State failure; - /** whenever this state is reached, it will emit the matches keywords for future reference */ + /** + * whenever this state is reached, it will emit the matches keywords for future reference + */ private Set emits; public State() { @@ -54,11 +62,11 @@ public class State { private State nextState(final Character character, final boolean ignoreRootState) { State nextState = this.success.get(character); - + if (!ignoreRootState && nextState == null && this.rootState != null) { nextState = this.rootState; } - + return nextState; } @@ -69,21 +77,21 @@ public class State { public State nextStateIgnoreRootState(Character character) { return nextState(character, true); } - - public State addState( String keyword ) { - State state = this; - - for (final Character character : keyword.toCharArray()) { - state = state.addState(character); - } - - return state; + + public State addState(String keyword) { + State state = this; + + for (final Character character : keyword.toCharArray()) { + state = state.addState(character); + } + + return state; } public State addState(Character character) { State nextState = nextStateIgnoreRootState(character); if (nextState == null) { - nextState = new State(this.depth+1); + nextState = new State(this.depth + 1); this.success.put(character, nextState); } return nextState; @@ -107,7 +115,7 @@ public class State { } public Collection emit() { - return this.emits == null ? Collections. emptyList() : this.emits; + return this.emits == null ? Collections.emptyList() : this.emits; } public State failure() { diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 6df993d..0c6ece0 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,20 +1,22 @@ package org.ahocorasick.trie; -import static java.lang.Character.isWhitespace; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingDeque; import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingDeque; + +import static java.lang.Character.isWhitespace; + /** * Based on the Aho-Corasick white paper, Bell technologies: * http://cr.yp.to/bib/1975/aho.pdf - * + * * @author Robert Bor */ public class Trie { @@ -27,21 +29,20 @@ public class Trie { this.trieConfig = trieConfig; this.rootState = new State(); } - + /** * Used by the builder to add a text search keyword. - * + * * @param keyword The search term to add to the list of search terms. - * * @throws NullPointerException if the keyword is null. */ private void addKeyword(String keyword) { - if( keyword.isEmpty() ) { - return; + if (keyword.isEmpty()) { + return; } - - if( isCaseInsensitive() ) { - keyword = keyword.toLowerCase(); + + if (isCaseInsensitive()) { + keyword = keyword.toLowerCase(); } addState(keyword).addEmit(keyword); @@ -49,44 +50,44 @@ public class Trie { /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final String[] keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final String[] keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } - + /** * Delegates to addKeyword. - * + * * @param keywords List of search term to add to the list of search terms. */ - private void addKeywords( final Collection keywords ) { - for( final String keyword : keywords ) { - addKeyword( keyword ); - } + private void addKeywords(final Collection keywords) { + for (final String keyword : keywords) { + addKeyword(keyword); + } } private State addState(final String keyword) { return getRootState().addState(keyword); } - + public Collection tokenize(final String text) { final Collection tokens = new ArrayList<>(); final Collection collectedEmits = parseText(text); int lastCollectedPosition = -1; - + for (final Emit emit : collectedEmits) { if (emit.getStart() - lastCollectedPosition > 1) { tokens.add(createFragment(emit, text, lastCollectedPosition)); } - + tokens.add(createMatch(emit, text)); lastCollectedPosition = emit.getEnd(); } - + if (text.length() - lastCollectedPosition > 1) { tokens.add(createFragment(null, text, lastCollectedPosition)); } @@ -95,11 +96,11 @@ public class Trie { } private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); + return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart())); } private Token createMatch(Emit emit, String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit); } @SuppressWarnings("unchecked") @@ -118,7 +119,7 @@ public class Trie { } if (!trieConfig.isAllowOverlaps()) { - IntervalTree intervalTree = new IntervalTree((List)(List)collectedEmits); + IntervalTree intervalTree = new IntervalTree((List) (List) collectedEmits); intervalTree.removeOverlaps((List) (List) collectedEmits); } @@ -131,15 +132,15 @@ public class Trie { public void parseText(final CharSequence text, final EmitHandler emitHandler) { State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Maybe lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) { return; @@ -149,7 +150,7 @@ public class Trie { /** * The first matching text sequence. - * + * * @param text The text to search for keywords. * @return null if no matches found. */ @@ -164,18 +165,18 @@ public class Trie { } else { // Fast path. Returns first match found. State currentState = getRootState(); - + for (int position = 0; position < text.length(); position++) { Character character = text.charAt(position); - + // TODO: Lowercase the entire string at once? if (trieConfig.isCaseInsensitive()) { character = Character.toLowerCase(character); } - + currentState = getState(currentState, character); Collection emitStrs = currentState.emit(); - + if (emitStrs != null && !emitStrs.isEmpty()) { for (final String emitStr : emitStrs) { final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr); @@ -190,26 +191,26 @@ public class Trie { } } } - + return null; } private boolean isPartialMatch(final CharSequence searchText, final Emit emit) { return (emit.getStart() != 0 && - Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || - (emit.getEnd() + 1 != searchText.length() && - Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); + Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || + (emit.getEnd() + 1 != searchText.length() && + Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if (isPartialMatch(searchText, emit)) { removeEmits.add(emit); } } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -218,15 +219,15 @@ public class Trie { private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { final long size = searchText.length(); final List removeEmits = new ArrayList<>(); - + for (final Emit emit : collectedEmits) { if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) && - (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { + (emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) { continue; } removeEmits.add(emit); } - + for (final Emit removeEmit : removeEmits) { collectedEmits.remove(removeEmit); } @@ -234,12 +235,12 @@ public class Trie { private State getState(State currentState, final Character character) { State newCurrentState = currentState.nextState(character); - + while (newCurrentState == null) { currentState = currentState.failure(); newCurrentState = currentState.nextState(character); } - + return newCurrentState; } @@ -276,7 +277,7 @@ public class Trie { private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) { boolean emitted = false; final Collection emits = currentState.emit(); - + // TODO: The check for empty might be superfluous. if (emits != null && !emits.isEmpty()) { for (final String emit : emits) { @@ -284,21 +285,21 @@ public class Trie { emitted = true; } } - + return emitted; } private boolean isCaseInsensitive() { - return trieConfig.isCaseInsensitive(); + return trieConfig.isCaseInsensitive(); } - + private State getRootState() { - return this.rootState; + return this.rootState; } /** * Provides a fluent interface for constructing Trie instances. - * + * * @return The builder used to configure its Trie. */ public static TrieBuilder builder() { @@ -314,14 +315,15 @@ public class Trie { /** * Default (empty) constructor. */ - private TrieBuilder() {} + private TrieBuilder() { + } /** * Configure the Trie to ignore case when searching for keywords in * the text. This must be called before calling addKeyword because * the algorithm converts keywords to lowercase as they are added, * depending on this case sensitivity setting. - * + * * @return This builder. */ public TrieBuilder ignoreCase() { @@ -331,7 +333,7 @@ public class Trie { /** * Configure the Trie to ignore overlapping keywords. - * + * * @return This builder. */ public TrieBuilder ignoreOverlaps() { @@ -341,9 +343,8 @@ public class Trie { /** * Adds a keyword to the Trie's list of text search keywords. - * + * * @param keyword The keyword to add to the list. - * * @return This builder. * @throws NullPointerException if the keyword is null. */ @@ -351,34 +352,32 @@ public class Trie { this.trie.addKeyword(keyword); return this; } - + /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final String... keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Adds a list of keywords to the Trie's list of text search keywords. - * + * * @param keywords The keywords to add to the list. - * * @return This builder. */ public TrieBuilder addKeywords(final Collection keywords) { - this.trie.addKeywords(keywords); - return this; + this.trie.addKeywords(keywords); + return this; } /** * Configure the Trie to match whole keywords in the text. - * + * * @return This builder. */ public TrieBuilder onlyWholeWords() { @@ -390,7 +389,7 @@ public class Trie { * Configure the Trie to match whole keywords that are separated by * whitespace in the text. For example, "this keyword thatkeyword" * would only match the first occurrence of "keyword". - * + * * @return This builder. */ public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { @@ -401,7 +400,7 @@ public class Trie { /** * Configure the Trie to stop after the first keyword is found in the * text. - * + * * @return This builder. */ public TrieBuilder stopOnHit() { @@ -411,27 +410,25 @@ public class Trie { /** * Configure the Trie based on the builder settings. - * + * * @return The configured Trie. */ public Trie build() { this.trie.constructFailureStates(); return this.trie; } - + /** - * @deprecated Use ignoreCase() - * * @return This builder. + * @deprecated Use ignoreCase() */ public TrieBuilder caseInsensitive() { return ignoreCase(); } /** - * @deprecated Use ignoreOverlaps() - * * @return This builder. + * @deprecated Use ignoreOverlaps() */ public TrieBuilder removeOverlaps() { return ignoreOverlaps(); diff --git a/src/main/java/org/ahocorasick/trie/TrieConfig.java b/src/main/java/org/ahocorasick/trie/TrieConfig.java index f9f0125..f7487dd 100644 --- a/src/main/java/org/ahocorasick/trie/TrieConfig.java +++ b/src/main/java/org/ahocorasick/trie/TrieConfig.java @@ -12,9 +12,13 @@ public class TrieConfig { private boolean stopOnHit = false; - public boolean isStopOnHit() { return stopOnHit; } + public boolean isStopOnHit() { + return stopOnHit; + } - public void setStopOnHit(boolean stopOnHit) { this.stopOnHit = stopOnHit; } + public void setStopOnHit(boolean stopOnHit) { + this.stopOnHit = stopOnHit; + } public boolean isAllowOverlaps() { return allowOverlaps; @@ -32,7 +36,9 @@ public class TrieConfig { this.onlyWholeWords = onlyWholeWords; } - public boolean isOnlyWholeWordsWhiteSpaceSeparated() { return onlyWholeWordsWhiteSpaceSeparated; } + public boolean isOnlyWholeWordsWhiteSpaceSeparated() { + return onlyWholeWordsWhiteSpaceSeparated; + } public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated; diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java index e61bad7..20d41ca 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java @@ -2,29 +2,29 @@ package org.ahocorasick.interval; import org.junit.Test; -import java.util.*; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; -import static junit.framework.Assert.assertTrue; +import static junit.framework.Assert.*; public class IntervalTest { @Test public void construct() { - Interval i = new Interval(1,3); + Interval i = new Interval(1, 3); assertEquals(1, i.getStart()); assertEquals(3, i.getEnd()); } @Test public void size() { - assertEquals(3, new Interval(0,2).size()); + assertEquals(3, new Interval(0, 2).size()); } @Test public void intervaloverlaps() { - assertTrue(new Interval(1,3).overlapsWith(new Interval(2,4))); + assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4))); } @Test @@ -34,7 +34,7 @@ public class IntervalTest { @Test public void pointOverlaps() { - assertTrue(new Interval(1,3).overlapsWith(2)); + assertTrue(new Interval(1, 3).overlapsWith(2)); } @Test diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java index f4a7f57..96c3670 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java @@ -9,7 +9,7 @@ import java.util.List; import static junit.framework.Assert.assertEquals; public class IntervalTreeTest { - + @Test public void findOverlaps() { List intervals = new ArrayList(); @@ -20,7 +20,7 @@ public class IntervalTreeTest { intervals.add(new Interval(4, 6)); intervals.add(new Interval(5, 7)); IntervalTree intervalTree = new IntervalTree(intervals); - List overlaps = intervalTree.findOverlaps(new Interval(1,3)); + List overlaps = intervalTree.findOverlaps(new Interval(1, 3)); assertEquals(3, overlaps.size()); Iterator overlapsIt = overlaps.iterator(); assertOverlap(overlapsIt.next(), 2, 4); @@ -47,5 +47,5 @@ public class IntervalTreeTest { assertEquals(expectedStart, interval.getStart()); assertEquals(expectedEnd, interval.getEnd()); } - + } diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java index a6f1017..a36c831 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java @@ -13,9 +13,9 @@ public class IntervalableComparatorByPositionTest { @Test public void sortOnPosition() { List intervals = new ArrayList(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); Collections.sort(intervals, new IntervalableComparatorByPosition()); assertEquals(4, intervals.get(0).size()); assertEquals(6, intervals.get(1).size()); diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java index 208cf3d..8fc7db1 100644 --- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java +++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java @@ -13,9 +13,9 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSize() { List intervals = new ArrayList(); - intervals.add(new Interval(4,5)); - intervals.add(new Interval(1,4)); - intervals.add(new Interval(3,8)); + intervals.add(new Interval(4, 5)); + intervals.add(new Interval(1, 4)); + intervals.add(new Interval(3, 8)); Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(6, intervals.get(0).size()); assertEquals(4, intervals.get(1).size()); @@ -25,8 +25,8 @@ public class IntervalableComparatorBySizeTest { @Test public void sortOnSizeThenPosition() { List intervals = new ArrayList(); - intervals.add(new Interval(4,7)); - intervals.add(new Interval(2,5)); + intervals.add(new Interval(4, 7)); + intervals.add(new Interval(2, 5)); Collections.sort(intervals, new IntervalableComparatorBySize()); assertEquals(2, intervals.get(0).getStart()); assertEquals(4, intervals.get(1).getStart()); diff --git a/src/test/java/org/ahocorasick/trie/StateTest.java b/src/test/java/org/ahocorasick/trie/StateTest.java index 2a64370..2694305 100644 --- a/src/test/java/org/ahocorasick/trie/StateTest.java +++ b/src/test/java/org/ahocorasick/trie/StateTest.java @@ -1,6 +1,5 @@ package org.ahocorasick.trie; -import org.ahocorasick.trie.State; import org.junit.Test; import static junit.framework.Assert.assertEquals; @@ -11,9 +10,9 @@ public class StateTest { public void constructSequenceOfCharacters() { State rootState = new State(); rootState - .addState('a') - .addState('b') - .addState('c'); + .addState('a') + .addState('b') + .addState('c'); State currentState = rootState.nextState('a'); assertEquals(1, currentState.getDepth()); currentState = currentState.nextState('b'); diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index c4c780b..b529de4 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,34 +1,36 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.handler.EmitHandler; +import org.junit.Test; + import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.concurrent.ThreadLocalRandom; + import static junit.framework.Assert.assertEquals; -import org.ahocorasick.trie.handler.EmitHandler; import static org.junit.Assert.assertTrue; -import org.junit.Test; public class TrieTest { private final static String[] ALPHABET = new String[]{ - "abc", "bcd", "cde" + "abc", "bcd", "cde" }; - + private final static String[] PRONOUNS = new String[]{ - "hers", "his", "she", "he" + "hers", "his", "she", "he" }; private final static String[] FOOD = new String[]{ - "veal", "cauliflower", "broccoli", "tomatoes" + "veal", "cauliflower", "broccoli", "tomatoes" }; private final static String[] GREEK_LETTERS = new String[]{ - "Alpha", "Beta", "Gamma" + "Alpha", "Beta", "Gamma" }; - + private final static String[] UNICODE = new String[]{ - "turning", "once", "again", "börkü" + "turning", "once", "again", "börkü" }; @Test @@ -406,7 +408,7 @@ public class TrieTest { .onlyWholeWordsWhiteSpaceSeparated() .addKeyword("#sugar-123") .build(); - Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test + Collection emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test assertEquals(1, emits.size()); // Match must not be made checkEmit(emits.iterator().next(), 0, 9, "#sugar-123"); } @@ -415,57 +417,57 @@ public class TrieTest { public void testLargeString() { final int interval = 100; final int textSize = 1000000; - final String keyword = FOOD[ 1 ]; - final StringBuilder text = randomNumbers( textSize ); + final String keyword = FOOD[1]; + final StringBuilder text = randomNumbers(textSize); - injectKeyword( text, keyword, interval ); + injectKeyword(text, keyword, interval); Trie trie = Trie.builder() - .onlyWholeWords() - .addKeyword( keyword ) - .build(); + .onlyWholeWords() + .addKeyword(keyword) + .build(); - final Collection emits = trie.parseText( text ); + final Collection emits = trie.parseText(text); - assertEquals( textSize / interval, emits.size() ); + assertEquals(textSize / interval, emits.size()); } - + /** * Generates a random sequence of ASCII numbers. - * + * * @param count The number of numbers to generate. * @return A character sequence filled with random digits. */ - private StringBuilder randomNumbers( int count ) { - final StringBuilder sb = new StringBuilder( count ); + private StringBuilder randomNumbers(int count) { + final StringBuilder sb = new StringBuilder(count); - while( --count > 0 ) { - sb.append( randomInt( 0, 10 ) ); + while (--count > 0) { + sb.append(randomInt(0, 10)); } return sb; } - + /** * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match - * any keyword. - * @param keyword A keyword to inject repeatedly in the text. + * + * @param source Should contain a bunch of random data that cannot match + * any keyword. + * @param keyword A keyword to inject repeatedly in the text. * @param interval How often to inject the keyword. */ - private void injectKeyword( - final StringBuilder source, - final String keyword, - final int interval ) { + private void injectKeyword( + final StringBuilder source, + final String keyword, + final int interval) { final int length = source.length(); - for( int i = 0; i < length; i += interval ) { - source.replace( i, i + keyword.length(), keyword ); + for (int i = 0; i < length; i += interval) { + source.replace(i, i + keyword.length(), keyword); } } - - private int randomInt( final int min, final int max ) { - return ThreadLocalRandom.current().nextInt( min, max ); + + private int randomInt(final int min, final int max) { + return ThreadLocalRandom.current().nextInt(min, max); } private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {