Address underspecified API wrt null text, simplify tests

This commit is contained in:
Dave Jarvis 2022-12-08 19:56:02 -08:00
parent c54b19ae4f
commit d7d0dcc98f
7 changed files with 153 additions and 164 deletions

View File

@ -57,7 +57,7 @@
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>4.13.1</junit.version>
<junit.version>4.13.2</junit.version>
<!-- Reporting -->
<maven.cobertura.version>2.5.2</maven.cobertura.version>
<maven.javadoc.version>2.8</maven.javadoc.version>
@ -115,7 +115,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.2.0</version>
<version>3.4.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
@ -132,7 +132,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.0.1</version>
<version>3.2.1</version>
<executions>
<execution>
<id>attach-sources</id>

View File

@ -38,7 +38,7 @@ public class PayloadTrie<T> {
}
/**
* Used by the builder to add a text search keyword with a emit payload.
* Used by the builder to add a text search keyword with an emit payload.
*
* @param keyword The search term to add to the list of search terms.
* @param emit the payload to emit for this search term.
@ -129,7 +129,7 @@ public class PayloadTrie<T> {
* emitted outputs.
*
* @param text The character sequence to tokenize.
* @param emitHandler The emit handler that will be used to parse the text.
* @param emitHandler The handler that will be used to parse the text.
* @return A collection of emits.
*/
@SuppressWarnings("unchecked")
@ -147,7 +147,7 @@ public class PayloadTrie<T> {
}
/**
* Returns true if the text contains contains one of the search terms. Else,
* Returns true if the text contains one of the search terms; otherwise,
* returns false.
*
* @param text Specified text.
@ -163,7 +163,7 @@ public class PayloadTrie<T> {
* emitted outputs.
*
* @param text The character sequence to tokenize.
* @param emitHandler The emit handler that will be used to parse the text.
* @param emitHandler The handler that will be used to parse the text.
*/
public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) {
PayloadState<T> currentState = getRootState();
@ -186,10 +186,12 @@ public class PayloadTrie<T> {
/**
* The first matching text sequence.
*
* @param text The text to search for keywords.
* @return null if no matches found.
* @param text The text to search for keywords, must not be {@code null}.
* @return {@code null} if no matches found.
*/
public PayloadEmit<T> firstMatch(final CharSequence text) {
assert text != null;
if (!trieConfig.isAllowOverlaps()) {
// Slow path. Needs to find all the matches to detect overlaps.
final Collection<PayloadEmit<T>> parseText = parseText(text);
@ -358,8 +360,8 @@ public class PayloadTrie<T> {
}
/**
* Adds a keyword to the Trie's list of text search keywords. No Payload is
* supplied.
* Adds a keyword to the {@link Trie}'s list of text search keywords.
* No {@link Payload} is supplied.
*
* @param keyword The keyword to add to the list.
* @return This builder.
@ -371,7 +373,8 @@ public class PayloadTrie<T> {
}
/**
* Adds a keyword and a payload to the Trie's list of text search keywords.
* Adds a keyword and a payload to the {@link Trie}'s list of text
* search keywords.
*
* @param keyword The keyword to add to the list.
* @param payload the payload to add
@ -384,8 +387,8 @@ public class PayloadTrie<T> {
}
/**
* Adds a list of keywords and payloads to the Trie's list of text search
* keywords.
* Adds a list of keywords and payloads to the {@link Trie}'s list of
* text search keywords.
*
* @param keywords The keywords to add to the list.
* @return This builder.

View File

@ -71,12 +71,18 @@ public class Trie {
/**
* The first matching text sequence.
*
* @param text The text to search for keywords.
* @return null if no matches found.
* @param text The text to search for keywords, must not be {@code null}.
* @return {@code null} if no matches found.
*/
public Emit firstMatch(final CharSequence text) {
final PayloadEmit<String> payload = this.payloadTrie.firstMatch(text);
return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword());
assert text != null;
final PayloadEmit<String> payload = this.payloadTrie.firstMatch( text );
return payload == null
? null
: new Emit( payload.getStart(),
payload.getEnd(),
payload.getKeyword() );
}
/**

View File

@ -2,8 +2,8 @@ package org.ahocorasick.trie;
import org.junit.Test;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertNotSame;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
/**
* Test the {@link Emit} class functionality.
@ -17,7 +17,7 @@ public class EmitTest {
public void test_Equality_SameValues_ObjectsAreEqual() {
final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 42, null);
assertEquals(one, two);
assertEquals( one, two );
}
/**
@ -27,6 +27,6 @@ public class EmitTest {
public void test_Equality_DifferingValues_ObjectsAreNotEqual() {
final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 43, null);
assertNotSame(one, two);
assertNotEquals(one, two);
}
}

View File

@ -5,13 +5,14 @@ import org.ahocorasick.trie.handler.PayloadEmitHandler;
import org.ahocorasick.trie.handler.StatefulPayloadEmitHandler;
import org.junit.Test;
import java.util.LinkedList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import static java.util.Arrays.asList;
import static org.ahocorasick.trie.TestHelper.injectKeyword;
import static org.ahocorasick.trie.TestHelper.randomNumbers;
import static org.junit.Assert.*;
public class PayloadTrieTest {
@ -279,7 +280,7 @@ public class PayloadTrieTest {
@Test
public void nonOverlapping() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
Collection<PayloadEmit<String>> emits = trie.parseText("ababcbab");
assertEquals(2, emits.size());
@ -291,7 +292,7 @@ public class PayloadTrieTest {
@Test
public void nonOverlappingFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
PayloadEmit<String> firstMatch = trie.firstMatch("ababcbab");
@ -300,14 +301,14 @@ public class PayloadTrieTest {
@Test
public void containsMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
assertTrue(trie.containsMatch("ababcbab"));
}
@Test
public void startOfChurchillSpeech() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
.addKeyword("r").addKeyword("urn").addKeyword("ni").addKeyword("i").addKeyword("in").addKeyword("n")
.addKeyword("urning").build();
Collection<PayloadEmit<String>> emits = trie.parseText("Turning");
@ -449,7 +450,7 @@ public class PayloadTrieTest {
@Test
public void test_containsMatchWithCaseInsensitive() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().caseInsensitive().addKeyword("foo", "bar").build();
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("foo", "bar").build();
assertTrue(trie.containsMatch("FOOBAR"));
assertFalse(trie.containsMatch("FO!?AR"));
@ -483,59 +484,36 @@ public class PayloadTrieTest {
assertEquals(result1, result2);
}
/**
* Generates a random sequence of ASCII numbers.
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
private StringBuilder randomNumbers(int count) {
final StringBuilder sb = new StringBuilder(count);
while (--count > 0) {
sb.append(randomInt(0, 10));
}
return sb;
}
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match any
* keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
private void injectKeyword(final StringBuilder source, final String keyword, final int interval) {
final int length = source.length();
for (int i = 0; i < length; i += interval) {
source.replace(i, i + keyword.length(), keyword);
}
}
private int randomInt(final int min, final int max) {
return ThreadLocalRandom.current().nextInt(min, max);
}
private void checkEmit(PayloadEmit<Food> next, int expectedStart, int expectedEnd, String expectedKeyword,
Food expectedPayload) {
private void checkEmit(
final PayloadEmit<Food> next,
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final Food expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
}
private void checkEmit(PayloadEmit<Integer> next, int expectedStart, int expectedEnd, String expectedKeyword,
Integer expectedPayload) {
private void checkEmit(
final PayloadEmit<Integer> next,
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final Integer expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
}
private void checkEmit(PayloadEmit<String> next, int expectedStart, int expectedEnd, String expectedKeyword,
String expectedPayload) {
private void checkEmit(
final PayloadEmit<String> next,
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final String expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());

View File

@ -0,0 +1,44 @@
package org.ahocorasick.trie;
import static java.util.concurrent.ThreadLocalRandom.current;
/**
* Contains functionality common to tests.
*/
public class TestHelper {
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
@SuppressWarnings( "SameParameterValue" )
static void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval ) {
final int length = source.length();
for( int i = 0; i < length; i += interval ) {
source.replace( i, i + keyword.length(), keyword );
}
}
/**
* Generates a random sequence of ASCII numbers.
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
@SuppressWarnings( "SameParameterValue" )
public static StringBuilder randomNumbers( int count ) {
final StringBuilder sb = new StringBuilder( count );
while( --count > 0 ) {
sb.append( current().nextInt( 0, 10 ) );
}
return sb;
}
}

View File

@ -9,13 +9,13 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import static java.lang.String.format;
import static org.ahocorasick.trie.TestHelper.injectKeyword;
import static org.ahocorasick.trie.TestHelper.randomNumbers;
import static org.ahocorasick.trie.Trie.builder;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;
/**
* Test the {@link Trie} class functionality.
@ -41,11 +41,21 @@ public class TrieTest {
"turning", "once", "again", "börkü"
};
private static Trie trie( final String keyword ) {
return Trie.builder()
.addKeyword( keyword )
.build();
}
private static Trie trie( final String[] keywords ) {
return Trie.builder()
.addKeywords( keywords )
.build();
}
@Test
public void test_KeywordAndTextAreTheSame() {
final Trie trie = Trie.builder()
.addKeyword( ALPHABET[ 0 ] )
.build();
final Trie trie = trie( ALPHABET[ 0 ] );
final Collection<Emit> emits = trie.parseText( ALPHABET[ 0 ] );
final Iterator<Emit> iterator = emits.iterator();
checkEmit( iterator.next(), 0, 2, ALPHABET[ 0 ] );
@ -53,18 +63,14 @@ public class TrieTest {
@Test
public void test_KeywordAndTextAreTheSameFirstMatch() {
final Trie trie = Trie.builder()
.addKeyword( ALPHABET[ 0 ] )
.build();
final Trie trie = trie( ALPHABET[ 0 ] );
final Emit firstMatch = trie.firstMatch( ALPHABET[ 0 ] );
checkEmit( firstMatch, 0, 2, ALPHABET[ 0 ] );
}
@Test
public void test_TextIsLongerThanKeyword() {
final Trie trie = Trie.builder()
.addKeyword( ALPHABET[ 0 ] )
.build();
final Trie trie = trie( ALPHABET[ 0 ] );
final Collection<Emit> emits = trie.parseText( " " + ALPHABET[ 0 ] );
final Iterator<Emit> iterator = emits.iterator();
checkEmit( iterator.next(), 1, 3, ALPHABET[ 0 ] );
@ -72,18 +78,14 @@ public class TrieTest {
@Test
public void test_TextIsLongerThanKeywordFirstMatch() {
final Trie trie = Trie.builder()
.addKeyword( ALPHABET[ 0 ] )
.build();
final Trie trie = trie( ALPHABET[ 0 ] );
final Emit firstMatch = trie.firstMatch( " " + ALPHABET[ 0 ] );
checkEmit( firstMatch, 1, 3, ALPHABET[ 0 ] );
}
@Test
public void test_VariousKeywordsOneMatch() {
final Trie trie = Trie.builder()
.addKeywords( ALPHABET )
.build();
final Trie trie = trie( ALPHABET );
final Collection<Emit> emits = trie.parseText( "bcd" );
final Iterator<Emit> iterator = emits.iterator();
checkEmit( iterator.next(), 0, 2, "bcd" );
@ -91,13 +93,18 @@ public class TrieTest {
@Test
public void test_VariousKeywordsFirstMatch() {
final Trie trie = Trie.builder()
.addKeywords( ALPHABET )
.build();
final Trie trie = trie( ALPHABET );
final Emit firstMatch = trie.firstMatch( "bcd" );
checkEmit( firstMatch, 0, 2, "bcd" );
}
@Test(expected=AssertionError.class)
public void test_NullInputTextFirstMatch() {
final Trie trie = trie( ALPHABET );
final Emit firstMatch = trie.firstMatch( null );
assertNull( firstMatch );
}
@Test
public void test_UshersTestAndStopOnHit() {
final Trie trie = Trie.builder()
@ -142,9 +149,7 @@ public class TrieTest {
@Test
public void test_UshersTest() {
final Trie trie = Trie.builder()
.addKeywords( PRONOUNS )
.build();
final Trie trie = trie( PRONOUNS );
final Collection<Emit> emits = trie.parseText( "ushers" );
assertEquals( 3, emits.size() ); // she @ 3, he @ 3, hers @ 5
final Iterator<Emit> iterator = emits.iterator();
@ -172,19 +177,14 @@ public class TrieTest {
@Test
public void test_UshersTestFirstMatch() {
final Trie trie = Trie.builder()
.addKeywords( PRONOUNS )
.build();
final Trie trie = trie( PRONOUNS );
final Emit firstMatch = trie.firstMatch( "ushers" );
checkEmit( firstMatch, 2, 3, "he" );
}
@Test
public void test_UshersTestByCallback() {
final Trie trie = Trie.builder()
.addKeywords( PRONOUNS )
.build();
final Trie trie = trie( PRONOUNS );
final List<Emit> emits = new ArrayList<>();
final EmitHandler emitHandler = emit -> {
emits.add( emit );
@ -200,9 +200,7 @@ public class TrieTest {
@Test
public void test_MisleadingTest() {
final Trie trie = Trie.builder()
.addKeyword( "hers" )
.build();
final Trie trie = trie( "hers" );
final Collection<Emit> emits = trie.parseText( "h he her hers" );
final Iterator<Emit> iterator = emits.iterator();
checkEmit( iterator.next(), 9, 12, "hers" );
@ -210,18 +208,14 @@ public class TrieTest {
@Test
public void test_MisleadingTestFirstMatch() {
final Trie trie = Trie.builder()
.addKeyword( "hers" )
.build();
final Trie trie = trie( "hers" );
final Emit firstMatch = trie.firstMatch( "h he her hers" );
checkEmit( firstMatch, 9, 12, "hers" );
}
@Test
public void test_Recipes() {
final Trie trie = Trie.builder()
.addKeywords( FOOD )
.build();
final Trie trie = trie( FOOD );
final Collection<Emit> emits = trie.parseText(
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
final Iterator<Emit> iterator = emits.iterator();
@ -233,9 +227,7 @@ public class TrieTest {
@Test
public void test_RecipesFirstMatch() {
final Trie trie = Trie.builder()
.addKeywords( FOOD )
.build();
final Trie trie = trie( FOOD );
final Emit firstMatch = trie.firstMatch(
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
@ -261,7 +253,8 @@ public class TrieTest {
@Test
public void test_NonOverlapping() {
final Trie trie = Trie.builder().ignoreOverlaps()
final Trie trie = Trie.builder()
.ignoreOverlaps()
.addKeyword( "ab" )
.addKeyword( "cba" )
.addKeyword( "ababc" )
@ -276,7 +269,8 @@ public class TrieTest {
@Test
public void test_NonOverlappingFirstMatch() {
final Trie trie = Trie.builder().ignoreOverlaps()
final Trie trie = Trie.builder()
.ignoreOverlaps()
.addKeyword( "ab" )
.addKeyword( "cba" )
.addKeyword( "ababc" )
@ -288,7 +282,8 @@ public class TrieTest {
@Test
public void test_ContainsMatch() {
final Trie trie = Trie.builder().ignoreOverlaps()
final Trie trie = Trie.builder()
.ignoreOverlaps()
.addKeyword( "ab" )
.addKeyword( "cba" )
.addKeyword( "ababc" )
@ -298,7 +293,8 @@ public class TrieTest {
@Test
public void test_StartOfChurchillSpeech() {
final Trie trie = Trie.builder().ignoreOverlaps()
final Trie trie = Trie.builder()
.ignoreOverlaps()
.addKeyword( "T" )
.addKeyword( "u" )
.addKeyword( "ur" )
@ -342,9 +338,7 @@ public class TrieTest {
@Test
public void test_TokenizeFullSentence() {
final Trie trie = Trie.builder()
.addKeywords( GREEK_LETTERS )
.build();
final Trie trie = trie( GREEK_LETTERS );
final Collection<Token> tokens = trie.tokenize(
"Hear: Alpha team first, Beta from the rear, Gamma in reserve" );
assertEquals( 7, tokens.size() );
@ -363,7 +357,9 @@ public class TrieTest {
*/
@Test
public void test_StringIndexOutOfBoundsException() {
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
final Trie trie = Trie.builder()
.ignoreCase()
.onlyWholeWords()
.addKeywords( UNICODE )
.build();
final Collection<Emit> emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" );
@ -377,7 +373,8 @@ public class TrieTest {
@Test
public void test_IgnoreCase() {
final Trie trie = Trie.builder().ignoreCase()
final Trie trie = Trie.builder()
.ignoreCase()
.addKeywords( UNICODE )
.build();
final Collection<Emit> emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" );
@ -391,7 +388,8 @@ public class TrieTest {
@Test
public void test_IgnoreCaseFirstMatch() {
final Trie trie = Trie.builder().ignoreCase()
final Trie trie = Trie.builder()
.ignoreCase()
.addKeywords( UNICODE )
.build();
final Emit firstMatch = trie.firstMatch( "TurninG OnCe AgAiN BÖRKÜ" );
@ -401,9 +399,7 @@ public class TrieTest {
@Test
public void test_TokenizeTokensInSequence() {
final Trie trie = Trie.builder()
.addKeywords( GREEK_LETTERS )
.build();
final Trie trie = trie( GREEK_LETTERS );
final Collection<Token> tokens = trie.tokenize( "Alpha Beta Gamma" );
assertEquals( 5, tokens.size() );
}
@ -585,44 +581,6 @@ public class TrieTest {
assertEquals( 0, nonMatchCount.get() );
}
/**
* Generates a random sequence of ASCII numbers.
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
private StringBuilder randomNumbers( int count ) {
final StringBuilder sb = new StringBuilder( count );
while( --count > 0 ) {
sb.append( randomInt( 0, 10 ) );
}
return sb;
}
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
private void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval ) {
final int length = source.length();
for( int i = 0; i < length; i += interval ) {
source.replace( i, i + keyword.length(), keyword );
}
}
private int randomInt( final int min, final int max ) {
return ThreadLocalRandom.current().nextInt( min, max );
}
private void checkEmit( Emit next, int expectedStart, int expectedEnd,
String expectedKeyword ) {
assertEquals( "Start of emit should have been " + expectedStart,