ignoreWhiteSpace() {
+
+ trieConfig.setIgnoreWhiteSpace(true);
+ return this;
+ }
+
}
+
}
diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java
index c1e8d7c..bc8efad 100644
--- a/src/main/java/org/ahocorasick/trie/State.java
+++ b/src/main/java/org/ahocorasick/trie/State.java
@@ -2,6 +2,9 @@ package org.ahocorasick.trie;
import java.util.*;
+import lombok.Getter;
+import lombok.Setter;
+
/**
*
* A state has various important tasks it must attend to:
@@ -26,6 +29,7 @@ public class State {
/**
* effective the size of the keyword
*/
+ @Getter
private final int depth;
/**
@@ -42,6 +46,8 @@ public class State {
/**
* if no matching states are found, the failure state will be returned
*/
+ @Setter
+ @Getter
private State failure;
/**
@@ -49,16 +55,22 @@ public class State {
*/
private Set emits;
+
public State() {
+
this(0);
}
+
public State(final int depth) {
+
this.depth = depth;
this.rootState = depth == 0 ? this : null;
}
+
private State nextState(final Character character, final boolean ignoreRootState) {
+
State nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) {
@@ -68,15 +80,21 @@ public class State {
return nextState;
}
+
public State nextState(final Character character) {
+
return nextState(character, false);
}
+
public State nextStateIgnoreRootState(Character character) {
+
return nextState(character, true);
}
+
public State addState(String keyword) {
+
State state = this;
for (final Character character : keyword.toCharArray()) {
@@ -86,7 +104,9 @@ public class State {
return state;
}
+
public State addState(Character character) {
+
State nextState = nextStateIgnoreRootState(character);
if (nextState == null) {
nextState = new State(this.depth + 1);
@@ -95,40 +115,39 @@ public class State {
return nextState;
}
- public int getDepth() {
- return this.depth;
- }
public void addEmit(String keyword) {
+
if (this.emits == null) {
this.emits = new TreeSet<>();
}
this.emits.add(keyword);
}
+
public void addEmit(Collection emits) {
+
for (String emit : emits) {
addEmit(emit);
}
}
+
public Collection emit() {
+
return this.emits == null ? Collections.emptyList() : this.emits;
}
- public State failure() {
- return this.failure;
- }
-
- public void setFailure(State failState) {
- this.failure = failState;
- }
public Collection getStates() {
+
return this.success.values();
}
+
public Collection getTransitions() {
+
return this.success.keySet();
}
+
}
\ No newline at end of file
diff --git a/src/main/java/org/ahocorasick/trie/Token.java b/src/main/java/org/ahocorasick/trie/Token.java
index 4e79a35..dcd9814 100644
--- a/src/main/java/org/ahocorasick/trie/Token.java
+++ b/src/main/java/org/ahocorasick/trie/Token.java
@@ -1,17 +1,25 @@
package org.ahocorasick.trie;
public abstract class Token {
+
private String fragment;
+
public Token(String fragment) {
+
this.fragment = fragment;
}
+
public String getFragment() {
+
return this.fragment;
}
+
public abstract boolean isMatch();
+
public abstract Emit getEmit();
+
}
diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java
index 5ffee10..a7838fc 100644
--- a/src/main/java/org/ahocorasick/trie/Trie.java
+++ b/src/main/java/org/ahocorasick/trie/Trie.java
@@ -15,20 +15,26 @@ import org.ahocorasick.trie.handler.StatefulEmitHandler;
*
* @author Robert Bor
*/
-public class Trie {
+public final class Trie {
private final PayloadTrie payloadTrie;
+
private Trie(final PayloadTrie payloadTrie) {
+
this.payloadTrie = payloadTrie;
}
+
public Collection tokenize(final String text) {
+
Collection> tokens = this.payloadTrie.tokenize(text);
return asTokens(tokens);
}
+
private static Collection asTokens(Collection> tokens) {
+
Collection result = new ArrayList<>();
for (PayloadToken payloadToken : tokens) {
result.add(new DefaultToken(payloadToken));
@@ -36,7 +42,9 @@ public class Trie {
return result;
}
+
private static Collection asEmits(Collection> emits) {
+
Collection result = new ArrayList<>();
for (PayloadEmit emit : emits) {
result.add(asEmit(emit));
@@ -44,30 +52,40 @@ public class Trie {
return result;
}
+
private static Emit asEmit(PayloadEmit payloadEmit) {
+
return new Emit(payloadEmit.getStart(), payloadEmit.getEnd(), payloadEmit.getKeyword());
}
+
public Collection parseText(final CharSequence text) {
+
Collection> parsedText = this.payloadTrie.parseText(text);
return asEmits(parsedText);
}
+
@SuppressWarnings("UnusedReturnValue")
- public Collection parseText( final CharSequence text, final StatefulEmitHandler emitHandler) {
- Collection> parsedText = this.payloadTrie.parseText(text,
- new StatefulPayloadEmitDelegateHandler(emitHandler));
+ public Collection parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
+
+ Collection> parsedText = this.payloadTrie.parseText(text, new StatefulPayloadEmitDelegateHandler(emitHandler));
return asEmits(parsedText);
}
+
public boolean containsMatch(final CharSequence text) {
+
return firstMatch(text) != null;
}
+
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
+
this.payloadTrie.parseText(text, new PayloadEmitDelegateHandler(emitHandler));
}
+
/**
* The first matching text sequence.
*
@@ -75,35 +93,38 @@ public class Trie {
* @return {@code null} if no matches found.
*/
public Emit firstMatch(final CharSequence text) {
+
assert text != null;
- final PayloadEmit payload = this.payloadTrie.firstMatch( text );
- return payload == null
- ? null
- : new Emit( payload.getStart(),
- payload.getEnd(),
- payload.getKeyword() );
+ final PayloadEmit payload = this.payloadTrie.firstMatch(text);
+ return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword());
}
+
/**
* Provides a fluent interface for constructing Trie instances.
*
* @return The builder used to configure its Trie.
*/
public static TrieBuilder builder() {
+
return new TrieBuilder();
}
- public static class TrieBuilder {
+
+ public static final class TrieBuilder {
private final PayloadTrieBuilder delegate = PayloadTrie.builder();
+
/**
* Default (empty) constructor.
*/
private TrieBuilder() {
+
}
+
/**
* Configure the Trie to ignore case when searching for keywords in the text.
* This must be called before calling addKeyword because the algorithm converts
@@ -113,21 +134,37 @@ public class Trie {
* @return This builder.
*/
public TrieBuilder ignoreCase() {
+
delegate.ignoreCase();
// this.trieConfig.setCaseInsensitive(true);
return this;
}
+
/**
* Configure the Trie to ignore overlapping keywords.
*
* @return This builder.
*/
public TrieBuilder ignoreOverlaps() {
+
delegate.ignoreOverlaps();
return this;
}
+
+ /**
+ * Configure the Trie to ignore whitespaces.
+ *
+ * @return This builder.
+ */
+ public TrieBuilder ignoreWhiteSpace() {
+
+ delegate.ignoreWhiteSpace();
+ return this;
+ }
+
+
/**
* Adds a keyword to the Trie's list of text search keywords.
*
@@ -136,10 +173,12 @@ public class Trie {
* @throws NullPointerException if the keyword is null.
*/
public TrieBuilder addKeyword(final String keyword) {
+
delegate.addKeyword(keyword, null);
return this;
}
+
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
@@ -147,12 +186,14 @@ public class Trie {
* @return This builder.
*/
public TrieBuilder addKeywords(final String... keywords) {
+
for (String keyword : keywords) {
delegate.addKeyword(keyword, null);
}
return this;
}
+
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
@@ -160,23 +201,27 @@ public class Trie {
* @return This builder.
*/
@SuppressWarnings("unused")
- public TrieBuilder addKeywords( final Collection keywords ) {
+ public TrieBuilder addKeywords(final Collection keywords) {
+
for (String keyword : keywords) {
this.delegate.addKeyword(keyword, null);
}
return this;
}
+
/**
* Configure the Trie to match whole keywords in the text.
*
* @return This builder.
*/
public TrieBuilder onlyWholeWords() {
+
this.delegate.onlyWholeWords();
return this;
}
+
/**
* Configure the Trie to match whole keywords that are separated by whitespace
* in the text. For example, "this keyword thatkeyword" would only match the
@@ -185,44 +230,35 @@ public class Trie {
* @return This builder.
*/
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
+
this.delegate.onlyWholeWordsWhiteSpaceSeparated();
return this;
}
+
/**
* Configure the Trie to stop after the first keyword is found in the text.
*
* @return This builder.
*/
public TrieBuilder stopOnHit() {
+
this.delegate.stopOnHit();
return this;
}
+
/**
* Configure the Trie based on the builder settings.
*
* @return The configured Trie.
*/
public Trie build() {
+
PayloadTrie payloadTrie = this.delegate.build();
return new Trie(payloadTrie);
}
- /**
- * @return This builder.
- * @deprecated Use ignoreCase()
- */
- public TrieBuilder caseInsensitive() {
- return ignoreCase();
- }
-
- /**
- * @return This builder.
- * @deprecated Use ignoreOverlaps()
- */
- public TrieBuilder removeOverlaps() {
- return ignoreOverlaps();
- }
}
+
}
diff --git a/src/main/java/org/ahocorasick/trie/TrieConfig.java b/src/main/java/org/ahocorasick/trie/TrieConfig.java
index f7487dd..daf38fb 100644
--- a/src/main/java/org/ahocorasick/trie/TrieConfig.java
+++ b/src/main/java/org/ahocorasick/trie/TrieConfig.java
@@ -4,51 +4,86 @@ public class TrieConfig {
private boolean allowOverlaps = true;
- private boolean onlyWholeWords = false;
+ private boolean onlyWholeWords;
- private boolean onlyWholeWordsWhiteSpaceSeparated = false;
+ private boolean onlyWholeWordsWhiteSpaceSeparated;
- private boolean caseInsensitive = false;
+ private boolean caseInsensitive;
+
+ private boolean ignoreWhiteSpace;
+
+ private boolean stopOnHit;
- private boolean stopOnHit = false;
public boolean isStopOnHit() {
+
return stopOnHit;
}
+
public void setStopOnHit(boolean stopOnHit) {
+
this.stopOnHit = stopOnHit;
}
+
public boolean isAllowOverlaps() {
+
return allowOverlaps;
}
+
public void setAllowOverlaps(boolean allowOverlaps) {
+
this.allowOverlaps = allowOverlaps;
}
+
public boolean isOnlyWholeWords() {
+
return onlyWholeWords;
}
+
public void setOnlyWholeWords(boolean onlyWholeWords) {
+
this.onlyWholeWords = onlyWholeWords;
}
+
public boolean isOnlyWholeWordsWhiteSpaceSeparated() {
+
return onlyWholeWordsWhiteSpaceSeparated;
}
+
public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
+
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;
}
+
public boolean isCaseInsensitive() {
+
return caseInsensitive;
}
+
+ public boolean isIgnoreWhiteSpace() {
+
+ return ignoreWhiteSpace;
+ }
+
+
public void setCaseInsensitive(boolean caseInsensitive) {
+
this.caseInsensitive = caseInsensitive;
}
+
+
+ public void setIgnoreWhiteSpace(boolean ignoreWhiteSpace) {
+
+ this.ignoreWhiteSpace = ignoreWhiteSpace;
+ }
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java
index eaa170c..4cb208a 100644
--- a/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulEmitHandler.java
@@ -9,12 +9,16 @@ public abstract class AbstractStatefulEmitHandler implements StatefulEmitHandler
private final List emits = new ArrayList<>();
+
public void addEmit(final Emit emit) {
+
this.emits.add(emit);
}
+
@Override
public List getEmits() {
+
return this.emits;
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulPayloadEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulPayloadEmitHandler.java
index 6d5d088..4552f0f 100644
--- a/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulPayloadEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/AbstractStatefulPayloadEmitHandler.java
@@ -9,12 +9,16 @@ public abstract class AbstractStatefulPayloadEmitHandler implements StatefulP
private final List> emits = new ArrayList<>();
+
public void addEmit(final PayloadEmit emit) {
+
this.emits.add(emit);
}
+
@Override
public List> getEmits() {
+
return this.emits;
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java
index 80a18c1..a1e4935 100644
--- a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java
@@ -9,14 +9,19 @@ public class DefaultEmitHandler implements StatefulEmitHandler {
private final List emits = new ArrayList<>();
+
@Override
public boolean emit(final Emit emit) {
+
this.emits.add(emit);
return true;
}
+
@Override
public List getEmits() {
+
return this.emits;
}
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultPayloadEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultPayloadEmitHandler.java
index 8e7b1c3..0414d4d 100644
--- a/src/main/java/org/ahocorasick/trie/handler/DefaultPayloadEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/DefaultPayloadEmitHandler.java
@@ -9,14 +9,19 @@ public class DefaultPayloadEmitHandler implements StatefulPayloadEmitHandler<
private final List> emits = new ArrayList<>();
+
@Override
public boolean emit(final PayloadEmit emit) {
+
this.emits.add(emit);
return true;
}
+
@Override
public List> getEmits() {
+
return this.emits;
}
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java
index 1332ec2..176bb0b 100644
--- a/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java
@@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.Emit;
public interface EmitHandler {
+
boolean emit(Emit emit);
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/PayloadEmitDelegateHandler.java b/src/main/java/org/ahocorasick/trie/handler/PayloadEmitDelegateHandler.java
index 2d42552..b3c78f7 100644
--- a/src/main/java/org/ahocorasick/trie/handler/PayloadEmitDelegateHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/PayloadEmitDelegateHandler.java
@@ -11,13 +11,17 @@ public class PayloadEmitDelegateHandler implements PayloadEmitHandler {
private EmitHandler handler;
+
public PayloadEmitDelegateHandler(EmitHandler handler) {
+
this.handler = handler;
}
+
@Override
public boolean emit(PayloadEmit emit) {
+
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
return handler.emit(newEmit);
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/PayloadEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/PayloadEmitHandler.java
index 173c712..d0b9c66 100644
--- a/src/main/java/org/ahocorasick/trie/handler/PayloadEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/PayloadEmitHandler.java
@@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.PayloadEmit;
public interface PayloadEmitHandler {
+
boolean emit(PayloadEmit emit);
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java
index 13cb20e..b674271 100644
--- a/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/StatefulEmitHandler.java
@@ -5,5 +5,7 @@ import java.util.List;
import org.ahocorasick.trie.Emit;
public interface StatefulEmitHandler extends EmitHandler {
+
List getEmits();
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitDelegateHandler.java b/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitDelegateHandler.java
index 1a8e1da..e7ba5e8 100644
--- a/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitDelegateHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitDelegateHandler.java
@@ -15,12 +15,16 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
private StatefulEmitHandler handler;
+
public StatefulPayloadEmitDelegateHandler(StatefulEmitHandler handler) {
+
this.handler = handler;
}
+
private static List> asEmits(Collection emits) {
+
List> result = new ArrayList<>();
for (Emit emit : emits) {
result.add(new PayloadEmit(emit.getStart(), emit.getEnd(), emit.getKeyword(), null));
@@ -28,15 +32,20 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
return result;
}
+
@Override
public boolean emit(PayloadEmit emit) {
+
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
return handler.emit(newEmit);
}
+
@Override
public List> getEmits() {
+
List emits = this.handler.getEmits();
return asEmits(emits);
}
+
}
diff --git a/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitHandler.java
index bb42049..c24b71f 100644
--- a/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitHandler.java
+++ b/src/main/java/org/ahocorasick/trie/handler/StatefulPayloadEmitHandler.java
@@ -4,6 +4,8 @@ import java.util.List;
import org.ahocorasick.trie.PayloadEmit;
-public interface StatefulPayloadEmitHandler extends PayloadEmitHandler{
+public interface StatefulPayloadEmitHandler extends PayloadEmitHandler {
+
List> getEmits();
+
}
diff --git a/src/test/java/org/ahocorasick/interval/IntervalTest.java b/src/test/java/org/ahocorasick/interval/IntervalTest.java
index 328b902..63698ab 100644
--- a/src/test/java/org/ahocorasick/interval/IntervalTest.java
+++ b/src/test/java/org/ahocorasick/interval/IntervalTest.java
@@ -12,38 +12,51 @@ public class IntervalTest {
@Test
public void test_construct() {
+
final Interval i = new Interval(1, 3);
assertEquals(1, i.getStart());
assertEquals(3, i.getEnd());
}
+
@Test
public void test_size() {
+
assertEquals(3, new Interval(0, 2).size());
}
+
@Test
public void test_intervaloverlaps() {
+
assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4)));
}
+
@Test
public void test_intervalDoesNotOverlap() {
+
assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42)));
}
+
@Test
public void test_pointOverlaps() {
+
assertTrue(new Interval(1, 3).overlapsWith(2));
}
+
@Test
public void test_pointDoesNotOverlap() {
+
assertFalse(new Interval(1, 13).overlapsWith(42));
}
+
@Test
public void test_comparable() {
+
final Set intervals = new TreeSet<>();
intervals.add(new Interval(4, 6));
intervals.add(new Interval(2, 7));
@@ -54,13 +67,17 @@ public class IntervalTest {
assertEquals(4, it.next().getStart());
}
+
@Test
public void test_checkToString() {
+
assertEquals("4:6", new Interval(4, 6).toString());
}
+
@Test
public void test_compareToNegativeTest() {
+
assertEquals(-1, new Interval(4, 6).compareTo(new Object()));
}
diff --git a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java
index fc41a3e..7995126 100644
--- a/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java
+++ b/src/test/java/org/ahocorasick/interval/IntervalTreeTest.java
@@ -12,6 +12,7 @@ public class IntervalTreeTest {
@Test
public void findOverlaps() {
+
List intervals = new ArrayList<>();
intervals.add(new Interval(0, 2));
intervals.add(new Interval(1, 3));
@@ -28,8 +29,10 @@ public class IntervalTreeTest {
assertOverlap(overlapsIt.next(), 0, 2);
}
+
@Test
public void removeOverlaps() {
+
List intervals = new ArrayList<>();
intervals.add(new Interval(0, 2));
intervals.add(new Interval(4, 5));
@@ -43,7 +46,9 @@ public class IntervalTreeTest {
}
+
protected void assertOverlap(Intervalable interval, int expectedStart, int expectedEnd) {
+
assertEquals(expectedStart, interval.getStart());
assertEquals(expectedEnd, interval.getEnd());
}
diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java
index a36c831..e67fdd6 100644
--- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java
+++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorByPositionTest.java
@@ -12,6 +12,7 @@ public class IntervalableComparatorByPositionTest {
@Test
public void sortOnPosition() {
+
List intervals = new ArrayList();
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
diff --git a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java
index 8fc7db1..7c7cb6e 100644
--- a/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java
+++ b/src/test/java/org/ahocorasick/interval/IntervalableComparatorBySizeTest.java
@@ -12,6 +12,7 @@ public class IntervalableComparatorBySizeTest {
@Test
public void sortOnSize() {
+
List intervals = new ArrayList();
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
@@ -22,8 +23,10 @@ public class IntervalableComparatorBySizeTest {
assertEquals(2, intervals.get(2).size());
}
+
@Test
public void sortOnSizeThenPosition() {
+
List intervals = new ArrayList();
intervals.add(new Interval(4, 7));
intervals.add(new Interval(2, 5));
diff --git a/src/test/java/org/ahocorasick/trie/EmitTest.java b/src/test/java/org/ahocorasick/trie/EmitTest.java
index 33f2d2c..1e8a335 100644
--- a/src/test/java/org/ahocorasick/trie/EmitTest.java
+++ b/src/test/java/org/ahocorasick/trie/EmitTest.java
@@ -15,18 +15,22 @@ public class EmitTest {
*/
@Test
public void test_Equality_SameValues_ObjectsAreEqual() {
+
final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 42, null);
- assertEquals( one, two );
+ assertEquals(one, two);
}
+
/**
* Test that two {@link Emit} instances having different values are equal.
*/
@Test
public void test_Equality_DifferingValues_ObjectsAreNotEqual() {
+
final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 43, null);
assertNotEquals(one, two);
}
+
}
diff --git a/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java b/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java
index a0fbaf3..026a186 100644
--- a/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java
+++ b/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java
@@ -17,111 +17,117 @@ import static org.junit.Assert.*;
public class PayloadTrieTest {
- private final static String[] ALPHABET = new String[] { "abc", "bcd", "cde" };
- private final static String[] ALPHABET_PAYLOAD = new String[] { "alpha:abc", "alpha:bcd", "alpha:cde" };
+ private final static String[] ALPHABET = new String[]{"abc", "bcd", "cde"};
+ private final static String[] ALPHABET_PAYLOAD = new String[]{"alpha:abc", "alpha:bcd", "alpha:cde"};
- private final static List> ALPHABET_WITH_PAYLOADS = asList(
- new Payload<>( ALPHABET[ 0 ], ALPHABET_PAYLOAD[ 0 ] ),
- new Payload<>( ALPHABET[ 1 ], ALPHABET_PAYLOAD[ 1 ] ),
- new Payload<>( ALPHABET[ 2 ], ALPHABET_PAYLOAD[ 2 ] ));
+ private final static List> ALPHABET_WITH_PAYLOADS = asList(new Payload<>(ALPHABET[0], ALPHABET_PAYLOAD[0]),
+ new Payload<>(ALPHABET[1], ALPHABET_PAYLOAD[1]),
+ new Payload<>(ALPHABET[2], ALPHABET_PAYLOAD[2]));
- private final static String[] PRONOUNS = new String[] { "hers", "his", "she", "he" };
- private final static int[] PRONOUNS_PAYLOAD_ID = new int[] { 9, 12, 4, 20 };
+ private final static String[] PRONOUNS = new String[]{"hers", "his", "she", "he"};
+ private final static int[] PRONOUNS_PAYLOAD_ID = new int[]{9, 12, 4, 20};
- private final static List> PRONOUNS_WITH_PAYLOADS = asList(
- new Payload<>( PRONOUNS[ 0 ], PRONOUNS_PAYLOAD_ID[ 0 ] ),
- new Payload<>( PRONOUNS[ 1 ], PRONOUNS_PAYLOAD_ID[ 1 ] ),
- new Payload<>( PRONOUNS[ 2 ], PRONOUNS_PAYLOAD_ID[ 2 ] ),
- new Payload<>( PRONOUNS[ 3 ], PRONOUNS_PAYLOAD_ID[ 3 ] )
- );
+ private final static List> PRONOUNS_WITH_PAYLOADS = asList(new Payload<>(PRONOUNS[0], PRONOUNS_PAYLOAD_ID[0]),
+ new Payload<>(PRONOUNS[1], PRONOUNS_PAYLOAD_ID[1]),
+ new Payload<>(PRONOUNS[2], PRONOUNS_PAYLOAD_ID[2]),
+ new Payload<>(PRONOUNS[3], PRONOUNS_PAYLOAD_ID[3]));
- private final static String[] FOOD = new String[] { "veal", "cauliflower", "broccoli", "tomatoes" };
- private final static Food[] FOOD_PAYLOAD = new Food[] { new Food("veal"), new Food("cauliflower"), new Food("broccoli"),
- new Food("tomatoes") };
+ private final static String[] FOOD = new String[]{"veal", "cauliflower", "broccoli", "tomatoes"};
+ private final static Food[] FOOD_PAYLOAD = new Food[]{new Food("veal"), new Food("cauliflower"), new Food("broccoli"), new Food("tomatoes")};
- private final static List> FOOD_WITH_PAYLOADS = asList(
- new Payload<>( FOOD[ 0 ], FOOD_PAYLOAD[ 0 ] ),
- new Payload<>( FOOD[ 1 ], FOOD_PAYLOAD[ 1 ] ),
- new Payload<>( FOOD[ 2 ], FOOD_PAYLOAD[ 2 ] ),
- new Payload<>( FOOD[ 3 ], FOOD_PAYLOAD[ 3 ] )
- );
+ private final static List> FOOD_WITH_PAYLOADS = asList(new Payload<>(FOOD[0], FOOD_PAYLOAD[0]),
+ new Payload<>(FOOD[1], FOOD_PAYLOAD[1]),
+ new Payload<>(FOOD[2], FOOD_PAYLOAD[2]),
+ new Payload<>(FOOD[3], FOOD_PAYLOAD[3]));
- private final static String[] GREEK_LETTERS = new String[] { "Alpha", "Beta", "Gamma" };
- private final static String[] GREEK_LETTERS_PAYLOAD = new String[] { "greek:Alpha", "greek:Beta", "greek:Gamma" };
+ private final static String[] GREEK_LETTERS = new String[]{"Alpha", "Beta", "Gamma"};
+ private final static String[] GREEK_LETTERS_PAYLOAD = new String[]{"greek:Alpha", "greek:Beta", "greek:Gamma"};
- private final static List> GREEK_LETTERS_WITH_PAYLOADS = asList(
- new Payload<>( GREEK_LETTERS[ 0 ], GREEK_LETTERS_PAYLOAD[ 0 ] ),
- new Payload<>( GREEK_LETTERS[ 1 ], GREEK_LETTERS_PAYLOAD[ 1 ] ),
- new Payload<>( GREEK_LETTERS[ 2 ], GREEK_LETTERS_PAYLOAD[ 2 ] ));
+ private final static List> GREEK_LETTERS_WITH_PAYLOADS = asList(new Payload<>(GREEK_LETTERS[0], GREEK_LETTERS_PAYLOAD[0]),
+ new Payload<>(GREEK_LETTERS[1], GREEK_LETTERS_PAYLOAD[1]),
+ new Payload<>(GREEK_LETTERS[2], GREEK_LETTERS_PAYLOAD[2]));
- private final static String[] UNICODE = new String[] { "turning", "once", "again", "börkü" };
- private final static String[] UNICODE_PAYLOAD = new String[] { "uni:turning", "uni:once", "uni:again", "uni:börkü" };
+ private final static String[] UNICODE = new String[]{"turning", "once", "again", "börkü"};
+ private final static String[] UNICODE_PAYLOAD = new String[]{"uni:turning", "uni:once", "uni:again", "uni:börkü"};
- private final static List> UNICODE_WITH_PAYLOADS = asList(
- new Payload<>( UNICODE[ 0 ], UNICODE_PAYLOAD[ 0 ] ),
- new Payload<>( UNICODE[ 1 ], UNICODE_PAYLOAD[ 1 ] ),
- new Payload<>( UNICODE[ 2 ], UNICODE_PAYLOAD[ 2 ] ),
- new Payload<>( UNICODE[ 3 ], UNICODE_PAYLOAD[ 3 ] ));
+ private final static List> UNICODE_WITH_PAYLOADS = asList(new Payload<>(UNICODE[0], UNICODE_PAYLOAD[0]),
+ new Payload<>(UNICODE[1], UNICODE_PAYLOAD[1]),
+ new Payload<>(UNICODE[2], UNICODE_PAYLOAD[2]),
+ new Payload<>(UNICODE[3], UNICODE_PAYLOAD[3]));
public static class Food {
+
private final String name;
+
public Food(String name) {
+
this.name = name;
}
+
@Override
public int hashCode() {
+
final int prime = 31;
int result = 1;
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
+
@Override
- public boolean equals( Object obj ) {
- if( this == obj ) {
+ public boolean equals(Object obj) {
+
+ if (this == obj) {
return true;
}
- if( obj == null ) {
+ if (obj == null) {
return false;
}
- if( getClass() != obj.getClass() ) {
+ if (getClass() != obj.getClass()) {
return false;
}
Food other = (Food) obj;
- if( name == null ) {
+ if (name == null) {
return other.name == null;
- }
- else {
- return name.equals( other.name );
+ } else {
+ return name.equals(other.name);
}
}
+
}
+
@Test
public void keywordAndTextAreTheSame() {
+
PayloadTrie trie = PayloadTrie.builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
Collection> emits = trie.parseText(ALPHABET[0]);
Iterator> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
+
@Test
public void keywordAndTextAreTheSameFirstMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
PayloadEmit firstMatch = trie.firstMatch(ALPHABET[0]);
checkEmit(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
+
@Test
public void textIsLongerThanKeyword() {
+
PayloadTrie trie = PayloadTrie.builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
Collection> emits = trie.parseText(" " + ALPHABET[0]);
Iterator> iterator = emits.iterator();
checkEmit(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
+
@Test
public void textIsLongerThanKeywordFirstMatch() {
@@ -130,23 +136,29 @@ public class PayloadTrieTest {
checkEmit(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
}
+
@Test
public void variousKeywordsOneMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
Collection> emits = trie.parseText("bcd");
Iterator> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 2, "bcd", "alpha:bcd");
}
+
@Test
public void variousKeywordsFirstMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
PayloadEmit firstMatch = trie.firstMatch("bcd");
checkEmit(firstMatch, 0, 2, "bcd", "alpha:bcd");
}
+
@Test
public void ushersTestAndStopOnHit() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
Collection> emits = trie.parseText("ushers");
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
@@ -154,15 +166,19 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 3, "he", 20);
}
+
@Test
public void ushersTestStopOnHitSkipOne() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
StatefulPayloadEmitHandler testEmitHandler = new AbstractStatefulPayloadEmitHandler() {
boolean first = true;
+
@Override
public boolean emit(final PayloadEmit emit) {
+
if (first) {
// return false for the first element
first = false;
@@ -181,8 +197,10 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 1, 3, "she", 4);
}
+
@Test
public void ushersTest() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
Collection> emits = trie.parseText("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
@@ -193,10 +211,17 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "hers", 9);
}
+
@Test
public void ushersTestWithCapitalKeywords() {
- PayloadTrie trie = PayloadTrie.builder().ignoreCase().addKeyword("HERS", "hers").addKeyword("HIS", "his")
- .addKeyword("SHE", "she").addKeyword("HE", "he").build();
+
+ PayloadTrie trie = PayloadTrie.builder()
+ .ignoreCase()
+ .addKeyword("HERS", "hers")
+ .addKeyword("HIS", "his")
+ .addKeyword("SHE", "she")
+ .addKeyword("HE", "he")
+ .build();
Collection> emits = trie.parseText("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator> iterator = emits.iterator();
@@ -205,15 +230,19 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "HERS", "hers");
}
+
@Test
public void ushersTestFirstMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
PayloadEmit firstMatch = trie.firstMatch("ushers");
checkEmit(firstMatch, 2, 3, "he", 20);
}
+
@Test
public void ushersTestByCallback() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
final List> emits = new LinkedList<>();
@@ -230,23 +259,29 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "hers", 9);
}
+
@Test
public void misleadingTest() {
+
PayloadTrie trie = PayloadTrie.builder().addKeyword("hers", "pronon:hers").build();
Collection> emits = trie.parseText("h he her hers");
Iterator> iterator = emits.iterator();
checkEmit(iterator.next(), 9, 12, "hers", "pronon:hers");
}
+
@Test
public void misleadingTestFirstMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeyword("hers", "pronon:hers").build();
PayloadEmit firstMatch = trie.firstMatch("h he her hers");
checkEmit(firstMatch, 9, 12, "hers", "pronon:hers");
}
+
@Test
public void recipes() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(FOOD_WITH_PAYLOADS).build();
Collection> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
Iterator> iterator = emits.iterator();
@@ -256,17 +291,20 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 51, 58, "broccoli", new Food("broccoli"));
}
+
@Test
public void recipesFirstMatch() {
+
PayloadTrie trie = PayloadTrie.builder().addKeywords(FOOD_WITH_PAYLOADS).build();
PayloadEmit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
checkEmit(firstMatch, 2, 12, "cauliflower", new Food("cauliflower"));
}
+
@Test
public void longAndShortOverlappingMatch() {
- PayloadTrie trie = PayloadTrie.builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage")
- .build();
+
+ PayloadTrie trie = PayloadTrie.builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage").build();
Collection> emits = trie.parseText("hehehehehe");
Iterator> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 1, "he", "pronon:he");
@@ -278,10 +316,16 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 9, "hehehehe", "garbage");
}
+
@Test
public void nonOverlapping() {
- PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
- .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
+
+ PayloadTrie trie = PayloadTrie.builder()
+ .ignoreOverlaps()
+ .addKeyword("ab", "alpha:ab")
+ .addKeyword("cba", "alpha:cba")
+ .addKeyword("ababc", "alpha:ababc")
+ .build();
Collection> emits = trie.parseText("ababcbab");
assertEquals(2, emits.size());
Iterator> iterator = emits.iterator();
@@ -290,49 +334,79 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 6, 7, "ab", "alpha:ab");
}
+
@Test
public void nonOverlappingFirstMatch() {
- PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
- .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
+
+ PayloadTrie trie = PayloadTrie.builder()
+ .ignoreOverlaps()
+ .addKeyword("ab", "alpha:ab")
+ .addKeyword("cba", "alpha:cba")
+ .addKeyword("ababc", "alpha:ababc")
+ .build();
PayloadEmit firstMatch = trie.firstMatch("ababcbab");
checkEmit(firstMatch, 0, 4, "ababc", "alpha:ababc");
}
+
@Test
public void containsMatch() {
- PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
- .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
+
+ PayloadTrie