diff --git a/pom.xml b/pom.xml index b560501..989e9e9 100644 --- a/pom.xml +++ b/pom.xml @@ -57,7 +57,7 @@ 1.8 UTF-8 - 4.13.1 + 4.13.2 2.5.2 2.8 @@ -115,7 +115,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.2.0 + 3.4.1 attach-javadocs @@ -132,7 +132,7 @@ org.apache.maven.plugins maven-source-plugin - 3.0.1 + 3.2.1 attach-sources diff --git a/src/main/java/org/ahocorasick/trie/PayloadTrie.java b/src/main/java/org/ahocorasick/trie/PayloadTrie.java index a5ddde0..1bc38cd 100644 --- a/src/main/java/org/ahocorasick/trie/PayloadTrie.java +++ b/src/main/java/org/ahocorasick/trie/PayloadTrie.java @@ -38,7 +38,7 @@ public class PayloadTrie { } /** - * Used by the builder to add a text search keyword with a emit payload. + * Used by the builder to add a text search keyword with an emit payload. * * @param keyword The search term to add to the list of search terms. * @param emit the payload to emit for this search term. @@ -129,7 +129,7 @@ public class PayloadTrie { * emitted outputs. * * @param text The character sequence to tokenize. - * @param emitHandler The emit handler that will be used to parse the text. + * @param emitHandler The handler that will be used to parse the text. * @return A collection of emits. */ @SuppressWarnings("unchecked") @@ -147,7 +147,7 @@ public class PayloadTrie { } /** - * Returns true if the text contains contains one of the search terms. Else, + * Returns true if the text contains one of the search terms; otherwise, * returns false. * * @param text Specified text. @@ -163,7 +163,7 @@ public class PayloadTrie { * emitted outputs. * * @param text The character sequence to tokenize. - * @param emitHandler The emit handler that will be used to parse the text. + * @param emitHandler The handler that will be used to parse the text. */ public void parseText(final CharSequence text, final PayloadEmitHandler emitHandler) { PayloadState currentState = getRootState(); @@ -186,10 +186,12 @@ public class PayloadTrie { /** * The first matching text sequence. * - * @param text The text to search for keywords. - * @return null if no matches found. + * @param text The text to search for keywords, must not be {@code null}. + * @return {@code null} if no matches found. */ public PayloadEmit firstMatch(final CharSequence text) { + assert text != null; + if (!trieConfig.isAllowOverlaps()) { // Slow path. Needs to find all the matches to detect overlaps. final Collection> parseText = parseText(text); @@ -358,8 +360,8 @@ public class PayloadTrie { } /** - * Adds a keyword to the Trie's list of text search keywords. No Payload is - * supplied. + * Adds a keyword to the {@link Trie}'s list of text search keywords. + * No {@link Payload} is supplied. * * @param keyword The keyword to add to the list. * @return This builder. @@ -371,7 +373,8 @@ public class PayloadTrie { } /** - * Adds a keyword and a payload to the Trie's list of text search keywords. + * Adds a keyword and a payload to the {@link Trie}'s list of text + * search keywords. * * @param keyword The keyword to add to the list. * @param payload the payload to add @@ -384,8 +387,8 @@ public class PayloadTrie { } /** - * Adds a list of keywords and payloads to the Trie's list of text search - * keywords. + * Adds a list of keywords and payloads to the {@link Trie}'s list of + * text search keywords. * * @param keywords The keywords to add to the list. * @return This builder. diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 010788a..5ffee10 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -71,12 +71,18 @@ public class Trie { /** * The first matching text sequence. * - * @param text The text to search for keywords. - * @return null if no matches found. + * @param text The text to search for keywords, must not be {@code null}. + * @return {@code null} if no matches found. */ public Emit firstMatch(final CharSequence text) { - final PayloadEmit payload = this.payloadTrie.firstMatch(text); - return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword()); + assert text != null; + + final PayloadEmit payload = this.payloadTrie.firstMatch( text ); + return payload == null + ? null + : new Emit( payload.getStart(), + payload.getEnd(), + payload.getKeyword() ); } /** diff --git a/src/test/java/org/ahocorasick/trie/EmitTest.java b/src/test/java/org/ahocorasick/trie/EmitTest.java index 62e6a98..33f2d2c 100644 --- a/src/test/java/org/ahocorasick/trie/EmitTest.java +++ b/src/test/java/org/ahocorasick/trie/EmitTest.java @@ -2,8 +2,8 @@ package org.ahocorasick.trie; import org.junit.Test; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNotSame; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; /** * Test the {@link Emit} class functionality. @@ -17,7 +17,7 @@ public class EmitTest { public void test_Equality_SameValues_ObjectsAreEqual() { final Emit one = new Emit(13, 42, null); final Emit two = new Emit(13, 42, null); - assertEquals(one, two); + assertEquals( one, two ); } /** @@ -27,6 +27,6 @@ public class EmitTest { public void test_Equality_DifferingValues_ObjectsAreNotEqual() { final Emit one = new Emit(13, 42, null); final Emit two = new Emit(13, 43, null); - assertNotSame(one, two); + assertNotEquals(one, two); } } diff --git a/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java b/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java index a69e462..a0fbaf3 100644 --- a/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java +++ b/src/test/java/org/ahocorasick/trie/PayloadTrieTest.java @@ -5,13 +5,14 @@ import org.ahocorasick.trie.handler.PayloadEmitHandler; import org.ahocorasick.trie.handler.StatefulPayloadEmitHandler; import org.junit.Test; -import java.util.LinkedList; import java.util.Collection; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.concurrent.ThreadLocalRandom; import static java.util.Arrays.asList; +import static org.ahocorasick.trie.TestHelper.injectKeyword; +import static org.ahocorasick.trie.TestHelper.randomNumbers; import static org.junit.Assert.*; public class PayloadTrieTest { @@ -279,7 +280,7 @@ public class PayloadTrieTest { @Test public void nonOverlapping() { - PayloadTrie trie = PayloadTrie.builder().removeOverlaps().addKeyword("ab", "alpha:ab") + PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab") .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); Collection> emits = trie.parseText("ababcbab"); assertEquals(2, emits.size()); @@ -291,7 +292,7 @@ public class PayloadTrieTest { @Test public void nonOverlappingFirstMatch() { - PayloadTrie trie = PayloadTrie.builder().removeOverlaps().addKeyword("ab", "alpha:ab") + PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab") .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); PayloadEmit firstMatch = trie.firstMatch("ababcbab"); @@ -300,14 +301,14 @@ public class PayloadTrieTest { @Test public void containsMatch() { - PayloadTrie trie = PayloadTrie.builder().removeOverlaps().addKeyword("ab", "alpha:ab") + PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("ab", "alpha:ab") .addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); assertTrue(trie.containsMatch("ababcbab")); } @Test public void startOfChurchillSpeech() { - PayloadTrie trie = PayloadTrie.builder().removeOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur") + PayloadTrie trie = PayloadTrie.builder().ignoreOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur") .addKeyword("r").addKeyword("urn").addKeyword("ni").addKeyword("i").addKeyword("in").addKeyword("n") .addKeyword("urning").build(); Collection> emits = trie.parseText("Turning"); @@ -449,7 +450,7 @@ public class PayloadTrieTest { @Test public void test_containsMatchWithCaseInsensitive() { - PayloadTrie trie = PayloadTrie.builder().caseInsensitive().addKeyword("foo", "bar").build(); + PayloadTrie trie = PayloadTrie.builder().ignoreCase().addKeyword("foo", "bar").build(); assertTrue(trie.containsMatch("FOOBAR")); assertFalse(trie.containsMatch("FO!?AR")); @@ -483,59 +484,36 @@ public class PayloadTrieTest { assertEquals(result1, result2); } - /** - * Generates a random sequence of ASCII numbers. - * - * @param count The number of numbers to generate. - * @return A character sequence filled with random digits. - */ - private StringBuilder randomNumbers(int count) { - final StringBuilder sb = new StringBuilder(count); - - while (--count > 0) { - sb.append(randomInt(0, 10)); - } - - return sb; - } - - /** - * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match any - * keyword. - * @param keyword A keyword to inject repeatedly in the text. - * @param interval How often to inject the keyword. - */ - private void injectKeyword(final StringBuilder source, final String keyword, final int interval) { - final int length = source.length(); - for (int i = 0; i < length; i += interval) { - source.replace(i, i + keyword.length(), keyword); - } - } - - private int randomInt(final int min, final int max) { - return ThreadLocalRandom.current().nextInt(min, max); - } - - private void checkEmit(PayloadEmit next, int expectedStart, int expectedEnd, String expectedKeyword, - Food expectedPayload) { + private void checkEmit( + final PayloadEmit next, + final int expectedStart, + final int expectedEnd, + final String expectedKeyword, + final Food expectedPayload) { assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload()); } - private void checkEmit(PayloadEmit next, int expectedStart, int expectedEnd, String expectedKeyword, - Integer expectedPayload) { + private void checkEmit( + final PayloadEmit next, + final int expectedStart, + final int expectedEnd, + final String expectedKeyword, + final Integer expectedPayload) { assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload()); } - private void checkEmit(PayloadEmit next, int expectedStart, int expectedEnd, String expectedKeyword, - String expectedPayload) { + private void checkEmit( + final PayloadEmit next, + final int expectedStart, + final int expectedEnd, + final String expectedKeyword, + final String expectedPayload) { assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); diff --git a/src/test/java/org/ahocorasick/trie/TestHelper.java b/src/test/java/org/ahocorasick/trie/TestHelper.java new file mode 100644 index 0000000..3893be9 --- /dev/null +++ b/src/test/java/org/ahocorasick/trie/TestHelper.java @@ -0,0 +1,44 @@ +package org.ahocorasick.trie; + +import static java.util.concurrent.ThreadLocalRandom.current; + +/** + * Contains functionality common to tests. + */ +public class TestHelper { + /** + * Injects keywords into a string builder. + * + * @param source Should contain a bunch of random data that cannot match + * any keyword. + * @param keyword A keyword to inject repeatedly in the text. + * @param interval How often to inject the keyword. + */ + @SuppressWarnings( "SameParameterValue" ) + static void injectKeyword( + final StringBuilder source, + final String keyword, + final int interval ) { + final int length = source.length(); + for( int i = 0; i < length; i += interval ) { + source.replace( i, i + keyword.length(), keyword ); + } + } + + /** + * Generates a random sequence of ASCII numbers. + * + * @param count The number of numbers to generate. + * @return A character sequence filled with random digits. + */ + @SuppressWarnings( "SameParameterValue" ) + public static StringBuilder randomNumbers( int count ) { + final StringBuilder sb = new StringBuilder( count ); + + while( --count > 0 ) { + sb.append( current().nextInt( 0, 10 ) ); + } + + return sb; + } +} diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index 228d12a..a72d987 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -9,13 +9,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; import static java.lang.String.format; +import static org.ahocorasick.trie.TestHelper.injectKeyword; +import static org.ahocorasick.trie.TestHelper.randomNumbers; import static org.ahocorasick.trie.Trie.builder; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; /** * Test the {@link Trie} class functionality. @@ -41,11 +41,21 @@ public class TrieTest { "turning", "once", "again", "börkü" }; + private static Trie trie( final String keyword ) { + return Trie.builder() + .addKeyword( keyword ) + .build(); + } + + private static Trie trie( final String[] keywords ) { + return Trie.builder() + .addKeywords( keywords ) + .build(); + } + @Test public void test_KeywordAndTextAreTheSame() { - final Trie trie = Trie.builder() - .addKeyword( ALPHABET[ 0 ] ) - .build(); + final Trie trie = trie( ALPHABET[ 0 ] ); final Collection emits = trie.parseText( ALPHABET[ 0 ] ); final Iterator iterator = emits.iterator(); checkEmit( iterator.next(), 0, 2, ALPHABET[ 0 ] ); @@ -53,18 +63,14 @@ public class TrieTest { @Test public void test_KeywordAndTextAreTheSameFirstMatch() { - final Trie trie = Trie.builder() - .addKeyword( ALPHABET[ 0 ] ) - .build(); + final Trie trie = trie( ALPHABET[ 0 ] ); final Emit firstMatch = trie.firstMatch( ALPHABET[ 0 ] ); checkEmit( firstMatch, 0, 2, ALPHABET[ 0 ] ); } @Test public void test_TextIsLongerThanKeyword() { - final Trie trie = Trie.builder() - .addKeyword( ALPHABET[ 0 ] ) - .build(); + final Trie trie = trie( ALPHABET[ 0 ] ); final Collection emits = trie.parseText( " " + ALPHABET[ 0 ] ); final Iterator iterator = emits.iterator(); checkEmit( iterator.next(), 1, 3, ALPHABET[ 0 ] ); @@ -72,18 +78,14 @@ public class TrieTest { @Test public void test_TextIsLongerThanKeywordFirstMatch() { - final Trie trie = Trie.builder() - .addKeyword( ALPHABET[ 0 ] ) - .build(); + final Trie trie = trie( ALPHABET[ 0 ] ); final Emit firstMatch = trie.firstMatch( " " + ALPHABET[ 0 ] ); checkEmit( firstMatch, 1, 3, ALPHABET[ 0 ] ); } @Test public void test_VariousKeywordsOneMatch() { - final Trie trie = Trie.builder() - .addKeywords( ALPHABET ) - .build(); + final Trie trie = trie( ALPHABET ); final Collection emits = trie.parseText( "bcd" ); final Iterator iterator = emits.iterator(); checkEmit( iterator.next(), 0, 2, "bcd" ); @@ -91,13 +93,18 @@ public class TrieTest { @Test public void test_VariousKeywordsFirstMatch() { - final Trie trie = Trie.builder() - .addKeywords( ALPHABET ) - .build(); + final Trie trie = trie( ALPHABET ); final Emit firstMatch = trie.firstMatch( "bcd" ); checkEmit( firstMatch, 0, 2, "bcd" ); } + @Test(expected=AssertionError.class) + public void test_NullInputTextFirstMatch() { + final Trie trie = trie( ALPHABET ); + final Emit firstMatch = trie.firstMatch( null ); + assertNull( firstMatch ); + } + @Test public void test_UshersTestAndStopOnHit() { final Trie trie = Trie.builder() @@ -142,9 +149,7 @@ public class TrieTest { @Test public void test_UshersTest() { - final Trie trie = Trie.builder() - .addKeywords( PRONOUNS ) - .build(); + final Trie trie = trie( PRONOUNS ); final Collection emits = trie.parseText( "ushers" ); assertEquals( 3, emits.size() ); // she @ 3, he @ 3, hers @ 5 final Iterator iterator = emits.iterator(); @@ -172,19 +177,14 @@ public class TrieTest { @Test public void test_UshersTestFirstMatch() { - final Trie trie = Trie.builder() - .addKeywords( PRONOUNS ) - .build(); + final Trie trie = trie( PRONOUNS ); final Emit firstMatch = trie.firstMatch( "ushers" ); checkEmit( firstMatch, 2, 3, "he" ); } @Test public void test_UshersTestByCallback() { - final Trie trie = Trie.builder() - .addKeywords( PRONOUNS ) - .build(); - + final Trie trie = trie( PRONOUNS ); final List emits = new ArrayList<>(); final EmitHandler emitHandler = emit -> { emits.add( emit ); @@ -200,9 +200,7 @@ public class TrieTest { @Test public void test_MisleadingTest() { - final Trie trie = Trie.builder() - .addKeyword( "hers" ) - .build(); + final Trie trie = trie( "hers" ); final Collection emits = trie.parseText( "h he her hers" ); final Iterator iterator = emits.iterator(); checkEmit( iterator.next(), 9, 12, "hers" ); @@ -210,18 +208,14 @@ public class TrieTest { @Test public void test_MisleadingTestFirstMatch() { - final Trie trie = Trie.builder() - .addKeyword( "hers" ) - .build(); + final Trie trie = trie( "hers" ); final Emit firstMatch = trie.firstMatch( "h he her hers" ); checkEmit( firstMatch, 9, 12, "hers" ); } @Test public void test_Recipes() { - final Trie trie = Trie.builder() - .addKeywords( FOOD ) - .build(); + final Trie trie = trie( FOOD ); final Collection emits = trie.parseText( "2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" ); final Iterator iterator = emits.iterator(); @@ -233,9 +227,7 @@ public class TrieTest { @Test public void test_RecipesFirstMatch() { - final Trie trie = Trie.builder() - .addKeywords( FOOD ) - .build(); + final Trie trie = trie( FOOD ); final Emit firstMatch = trie.firstMatch( "2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" ); @@ -261,7 +253,8 @@ public class TrieTest { @Test public void test_NonOverlapping() { - final Trie trie = Trie.builder().ignoreOverlaps() + final Trie trie = Trie.builder() + .ignoreOverlaps() .addKeyword( "ab" ) .addKeyword( "cba" ) .addKeyword( "ababc" ) @@ -276,7 +269,8 @@ public class TrieTest { @Test public void test_NonOverlappingFirstMatch() { - final Trie trie = Trie.builder().ignoreOverlaps() + final Trie trie = Trie.builder() + .ignoreOverlaps() .addKeyword( "ab" ) .addKeyword( "cba" ) .addKeyword( "ababc" ) @@ -288,7 +282,8 @@ public class TrieTest { @Test public void test_ContainsMatch() { - final Trie trie = Trie.builder().ignoreOverlaps() + final Trie trie = Trie.builder() + .ignoreOverlaps() .addKeyword( "ab" ) .addKeyword( "cba" ) .addKeyword( "ababc" ) @@ -298,7 +293,8 @@ public class TrieTest { @Test public void test_StartOfChurchillSpeech() { - final Trie trie = Trie.builder().ignoreOverlaps() + final Trie trie = Trie.builder() + .ignoreOverlaps() .addKeyword( "T" ) .addKeyword( "u" ) .addKeyword( "ur" ) @@ -342,9 +338,7 @@ public class TrieTest { @Test public void test_TokenizeFullSentence() { - final Trie trie = Trie.builder() - .addKeywords( GREEK_LETTERS ) - .build(); + final Trie trie = trie( GREEK_LETTERS ); final Collection tokens = trie.tokenize( "Hear: Alpha team first, Beta from the rear, Gamma in reserve" ); assertEquals( 7, tokens.size() ); @@ -363,7 +357,9 @@ public class TrieTest { */ @Test public void test_StringIndexOutOfBoundsException() { - final Trie trie = Trie.builder().ignoreCase().onlyWholeWords() + final Trie trie = Trie.builder() + .ignoreCase() + .onlyWholeWords() .addKeywords( UNICODE ) .build(); final Collection emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" ); @@ -377,7 +373,8 @@ public class TrieTest { @Test public void test_IgnoreCase() { - final Trie trie = Trie.builder().ignoreCase() + final Trie trie = Trie.builder() + .ignoreCase() .addKeywords( UNICODE ) .build(); final Collection emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" ); @@ -391,7 +388,8 @@ public class TrieTest { @Test public void test_IgnoreCaseFirstMatch() { - final Trie trie = Trie.builder().ignoreCase() + final Trie trie = Trie.builder() + .ignoreCase() .addKeywords( UNICODE ) .build(); final Emit firstMatch = trie.firstMatch( "TurninG OnCe AgAiN BÖRKÜ" ); @@ -401,9 +399,7 @@ public class TrieTest { @Test public void test_TokenizeTokensInSequence() { - final Trie trie = Trie.builder() - .addKeywords( GREEK_LETTERS ) - .build(); + final Trie trie = trie( GREEK_LETTERS ); final Collection tokens = trie.tokenize( "Alpha Beta Gamma" ); assertEquals( 5, tokens.size() ); } @@ -585,44 +581,6 @@ public class TrieTest { assertEquals( 0, nonMatchCount.get() ); } - /** - * Generates a random sequence of ASCII numbers. - * - * @param count The number of numbers to generate. - * @return A character sequence filled with random digits. - */ - private StringBuilder randomNumbers( int count ) { - final StringBuilder sb = new StringBuilder( count ); - - while( --count > 0 ) { - sb.append( randomInt( 0, 10 ) ); - } - - return sb; - } - - /** - * Injects keywords into a string builder. - * - * @param source Should contain a bunch of random data that cannot match - * any keyword. - * @param keyword A keyword to inject repeatedly in the text. - * @param interval How often to inject the keyword. - */ - private void injectKeyword( - final StringBuilder source, - final String keyword, - final int interval ) { - final int length = source.length(); - for( int i = 0; i < length; i += interval ) { - source.replace( i, i + keyword.length(), keyword ); - } - } - - private int randomInt( final int min, final int max ) { - return ThreadLocalRandom.current().nextInt( min, max ); - } - private void checkEmit( Emit next, int expectedStart, int expectedEnd, String expectedKeyword ) { assertEquals( "Start of emit should have been " + expectedStart,