Address underspecified API wrt null text, simplify tests
This commit is contained in:
parent
c54b19ae4f
commit
d7d0dcc98f
6
pom.xml
6
pom.xml
@ -57,7 +57,7 @@
|
||||
<java.version>1.8</java.version>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
|
||||
<junit.version>4.13.1</junit.version>
|
||||
<junit.version>4.13.2</junit.version>
|
||||
<!-- Reporting -->
|
||||
<maven.cobertura.version>2.5.2</maven.cobertura.version>
|
||||
<maven.javadoc.version>2.8</maven.javadoc.version>
|
||||
@ -115,7 +115,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>3.2.0</version>
|
||||
<version>3.4.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-javadocs</id>
|
||||
@ -132,7 +132,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<version>3.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
|
||||
@ -38,7 +38,7 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by the builder to add a text search keyword with a emit payload.
|
||||
* Used by the builder to add a text search keyword with an emit payload.
|
||||
*
|
||||
* @param keyword The search term to add to the list of search terms.
|
||||
* @param emit the payload to emit for this search term.
|
||||
@ -129,7 +129,7 @@ public class PayloadTrie<T> {
|
||||
* emitted outputs.
|
||||
*
|
||||
* @param text The character sequence to tokenize.
|
||||
* @param emitHandler The emit handler that will be used to parse the text.
|
||||
* @param emitHandler The handler that will be used to parse the text.
|
||||
* @return A collection of emits.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@ -147,7 +147,7 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the text contains contains one of the search terms. Else,
|
||||
* Returns true if the text contains one of the search terms; otherwise,
|
||||
* returns false.
|
||||
*
|
||||
* @param text Specified text.
|
||||
@ -163,7 +163,7 @@ public class PayloadTrie<T> {
|
||||
* emitted outputs.
|
||||
*
|
||||
* @param text The character sequence to tokenize.
|
||||
* @param emitHandler The emit handler that will be used to parse the text.
|
||||
* @param emitHandler The handler that will be used to parse the text.
|
||||
*/
|
||||
public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) {
|
||||
PayloadState<T> currentState = getRootState();
|
||||
@ -186,10 +186,12 @@ public class PayloadTrie<T> {
|
||||
/**
|
||||
* The first matching text sequence.
|
||||
*
|
||||
* @param text The text to search for keywords.
|
||||
* @return null if no matches found.
|
||||
* @param text The text to search for keywords, must not be {@code null}.
|
||||
* @return {@code null} if no matches found.
|
||||
*/
|
||||
public PayloadEmit<T> firstMatch(final CharSequence text) {
|
||||
assert text != null;
|
||||
|
||||
if (!trieConfig.isAllowOverlaps()) {
|
||||
// Slow path. Needs to find all the matches to detect overlaps.
|
||||
final Collection<PayloadEmit<T>> parseText = parseText(text);
|
||||
@ -358,8 +360,8 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a keyword to the Trie's list of text search keywords. No Payload is
|
||||
* supplied.
|
||||
* Adds a keyword to the {@link Trie}'s list of text search keywords.
|
||||
* No {@link Payload} is supplied.
|
||||
*
|
||||
* @param keyword The keyword to add to the list.
|
||||
* @return This builder.
|
||||
@ -371,7 +373,8 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a keyword and a payload to the Trie's list of text search keywords.
|
||||
* Adds a keyword and a payload to the {@link Trie}'s list of text
|
||||
* search keywords.
|
||||
*
|
||||
* @param keyword The keyword to add to the list.
|
||||
* @param payload the payload to add
|
||||
@ -384,8 +387,8 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a list of keywords and payloads to the Trie's list of text search
|
||||
* keywords.
|
||||
* Adds a list of keywords and payloads to the {@link Trie}'s list of
|
||||
* text search keywords.
|
||||
*
|
||||
* @param keywords The keywords to add to the list.
|
||||
* @return This builder.
|
||||
|
||||
@ -71,12 +71,18 @@ public class Trie {
|
||||
/**
|
||||
* The first matching text sequence.
|
||||
*
|
||||
* @param text The text to search for keywords.
|
||||
* @return null if no matches found.
|
||||
* @param text The text to search for keywords, must not be {@code null}.
|
||||
* @return {@code null} if no matches found.
|
||||
*/
|
||||
public Emit firstMatch(final CharSequence text) {
|
||||
final PayloadEmit<String> payload = this.payloadTrie.firstMatch(text);
|
||||
return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword());
|
||||
assert text != null;
|
||||
|
||||
final PayloadEmit<String> payload = this.payloadTrie.firstMatch( text );
|
||||
return payload == null
|
||||
? null
|
||||
: new Emit( payload.getStart(),
|
||||
payload.getEnd(),
|
||||
payload.getKeyword() );
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -2,8 +2,8 @@ package org.ahocorasick.trie;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.assertNotSame;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
|
||||
/**
|
||||
* Test the {@link Emit} class functionality.
|
||||
@ -17,7 +17,7 @@ public class EmitTest {
|
||||
public void test_Equality_SameValues_ObjectsAreEqual() {
|
||||
final Emit one = new Emit(13, 42, null);
|
||||
final Emit two = new Emit(13, 42, null);
|
||||
assertEquals(one, two);
|
||||
assertEquals( one, two );
|
||||
}
|
||||
|
||||
/**
|
||||
@ -27,6 +27,6 @@ public class EmitTest {
|
||||
public void test_Equality_DifferingValues_ObjectsAreNotEqual() {
|
||||
final Emit one = new Emit(13, 42, null);
|
||||
final Emit two = new Emit(13, 43, null);
|
||||
assertNotSame(one, two);
|
||||
assertNotEquals(one, two);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,13 +5,14 @@ import org.ahocorasick.trie.handler.PayloadEmitHandler;
|
||||
import org.ahocorasick.trie.handler.StatefulPayloadEmitHandler;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static org.ahocorasick.trie.TestHelper.injectKeyword;
|
||||
import static org.ahocorasick.trie.TestHelper.randomNumbers;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class PayloadTrieTest {
|
||||
@ -279,7 +280,7 @@ public class PayloadTrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlapping() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("ababcbab");
|
||||
assertEquals(2, emits.size());
|
||||
@ -291,7 +292,7 @@ public class PayloadTrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlappingFirstMatch() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("ababcbab");
|
||||
|
||||
@ -300,14 +301,14 @@ public class PayloadTrieTest {
|
||||
|
||||
@Test
|
||||
public void containsMatch() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("ab", "alpha:ab")
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
assertTrue(trie.containsMatch("ababcbab"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void startOfChurchillSpeech() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().removeOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
|
||||
.addKeyword("r").addKeyword("urn").addKeyword("ni").addKeyword("i").addKeyword("in").addKeyword("n")
|
||||
.addKeyword("urning").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("Turning");
|
||||
@ -449,7 +450,7 @@ public class PayloadTrieTest {
|
||||
|
||||
@Test
|
||||
public void test_containsMatchWithCaseInsensitive() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().caseInsensitive().addKeyword("foo", "bar").build();
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("foo", "bar").build();
|
||||
|
||||
assertTrue(trie.containsMatch("FOOBAR"));
|
||||
assertFalse(trie.containsMatch("FO!?AR"));
|
||||
@ -483,59 +484,36 @@ public class PayloadTrieTest {
|
||||
assertEquals(result1, result2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a random sequence of ASCII numbers.
|
||||
*
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
private StringBuilder randomNumbers(int count) {
|
||||
final StringBuilder sb = new StringBuilder(count);
|
||||
|
||||
while (--count > 0) {
|
||||
sb.append(randomInt(0, 10));
|
||||
}
|
||||
|
||||
return sb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Injects keywords into a string builder.
|
||||
*
|
||||
* @param source Should contain a bunch of random data that cannot match any
|
||||
* keyword.
|
||||
* @param keyword A keyword to inject repeatedly in the text.
|
||||
* @param interval How often to inject the keyword.
|
||||
*/
|
||||
private void injectKeyword(final StringBuilder source, final String keyword, final int interval) {
|
||||
final int length = source.length();
|
||||
for (int i = 0; i < length; i += interval) {
|
||||
source.replace(i, i + keyword.length(), keyword);
|
||||
}
|
||||
}
|
||||
|
||||
private int randomInt(final int min, final int max) {
|
||||
return ThreadLocalRandom.current().nextInt(min, max);
|
||||
}
|
||||
|
||||
private void checkEmit(PayloadEmit<Food> next, int expectedStart, int expectedEnd, String expectedKeyword,
|
||||
Food expectedPayload) {
|
||||
private void checkEmit(
|
||||
final PayloadEmit<Food> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final Food expectedPayload) {
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
|
||||
}
|
||||
|
||||
private void checkEmit(PayloadEmit<Integer> next, int expectedStart, int expectedEnd, String expectedKeyword,
|
||||
Integer expectedPayload) {
|
||||
private void checkEmit(
|
||||
final PayloadEmit<Integer> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final Integer expectedPayload) {
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
|
||||
}
|
||||
|
||||
private void checkEmit(PayloadEmit<String> next, int expectedStart, int expectedEnd, String expectedKeyword,
|
||||
String expectedPayload) {
|
||||
private void checkEmit(
|
||||
final PayloadEmit<String> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final String expectedPayload) {
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
|
||||
44
src/test/java/org/ahocorasick/trie/TestHelper.java
Normal file
44
src/test/java/org/ahocorasick/trie/TestHelper.java
Normal file
@ -0,0 +1,44 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import static java.util.concurrent.ThreadLocalRandom.current;
|
||||
|
||||
/**
|
||||
* Contains functionality common to tests.
|
||||
*/
|
||||
public class TestHelper {
|
||||
/**
|
||||
* Injects keywords into a string builder.
|
||||
*
|
||||
* @param source Should contain a bunch of random data that cannot match
|
||||
* any keyword.
|
||||
* @param keyword A keyword to inject repeatedly in the text.
|
||||
* @param interval How often to inject the keyword.
|
||||
*/
|
||||
@SuppressWarnings( "SameParameterValue" )
|
||||
static void injectKeyword(
|
||||
final StringBuilder source,
|
||||
final String keyword,
|
||||
final int interval ) {
|
||||
final int length = source.length();
|
||||
for( int i = 0; i < length; i += interval ) {
|
||||
source.replace( i, i + keyword.length(), keyword );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a random sequence of ASCII numbers.
|
||||
*
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
@SuppressWarnings( "SameParameterValue" )
|
||||
public static StringBuilder randomNumbers( int count ) {
|
||||
final StringBuilder sb = new StringBuilder( count );
|
||||
|
||||
while( --count > 0 ) {
|
||||
sb.append( current().nextInt( 0, 10 ) );
|
||||
}
|
||||
|
||||
return sb;
|
||||
}
|
||||
}
|
||||
@ -9,13 +9,13 @@ import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static java.lang.String.format;
|
||||
import static org.ahocorasick.trie.TestHelper.injectKeyword;
|
||||
import static org.ahocorasick.trie.TestHelper.randomNumbers;
|
||||
import static org.ahocorasick.trie.Trie.builder;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
/**
|
||||
* Test the {@link Trie} class functionality.
|
||||
@ -41,11 +41,21 @@ public class TrieTest {
|
||||
"turning", "once", "again", "börkü"
|
||||
};
|
||||
|
||||
private static Trie trie( final String keyword ) {
|
||||
return Trie.builder()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
}
|
||||
|
||||
private static Trie trie( final String[] keywords ) {
|
||||
return Trie.builder()
|
||||
.addKeywords( keywords )
|
||||
.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_KeywordAndTextAreTheSame() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( ALPHABET[ 0 ] )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET[ 0 ] );
|
||||
final Collection<Emit> emits = trie.parseText( ALPHABET[ 0 ] );
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit( iterator.next(), 0, 2, ALPHABET[ 0 ] );
|
||||
@ -53,18 +63,14 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_KeywordAndTextAreTheSameFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( ALPHABET[ 0 ] )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET[ 0 ] );
|
||||
final Emit firstMatch = trie.firstMatch( ALPHABET[ 0 ] );
|
||||
checkEmit( firstMatch, 0, 2, ALPHABET[ 0 ] );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_TextIsLongerThanKeyword() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( ALPHABET[ 0 ] )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET[ 0 ] );
|
||||
final Collection<Emit> emits = trie.parseText( " " + ALPHABET[ 0 ] );
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit( iterator.next(), 1, 3, ALPHABET[ 0 ] );
|
||||
@ -72,18 +78,14 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_TextIsLongerThanKeywordFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( ALPHABET[ 0 ] )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET[ 0 ] );
|
||||
final Emit firstMatch = trie.firstMatch( " " + ALPHABET[ 0 ] );
|
||||
checkEmit( firstMatch, 1, 3, ALPHABET[ 0 ] );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_VariousKeywordsOneMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( ALPHABET )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET );
|
||||
final Collection<Emit> emits = trie.parseText( "bcd" );
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit( iterator.next(), 0, 2, "bcd" );
|
||||
@ -91,13 +93,18 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_VariousKeywordsFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( ALPHABET )
|
||||
.build();
|
||||
final Trie trie = trie( ALPHABET );
|
||||
final Emit firstMatch = trie.firstMatch( "bcd" );
|
||||
checkEmit( firstMatch, 0, 2, "bcd" );
|
||||
}
|
||||
|
||||
@Test(expected=AssertionError.class)
|
||||
public void test_NullInputTextFirstMatch() {
|
||||
final Trie trie = trie( ALPHABET );
|
||||
final Emit firstMatch = trie.firstMatch( null );
|
||||
assertNull( firstMatch );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_UshersTestAndStopOnHit() {
|
||||
final Trie trie = Trie.builder()
|
||||
@ -142,9 +149,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_UshersTest() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( PRONOUNS )
|
||||
.build();
|
||||
final Trie trie = trie( PRONOUNS );
|
||||
final Collection<Emit> emits = trie.parseText( "ushers" );
|
||||
assertEquals( 3, emits.size() ); // she @ 3, he @ 3, hers @ 5
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
@ -172,19 +177,14 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_UshersTestFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( PRONOUNS )
|
||||
.build();
|
||||
final Trie trie = trie( PRONOUNS );
|
||||
final Emit firstMatch = trie.firstMatch( "ushers" );
|
||||
checkEmit( firstMatch, 2, 3, "he" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_UshersTestByCallback() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( PRONOUNS )
|
||||
.build();
|
||||
|
||||
final Trie trie = trie( PRONOUNS );
|
||||
final List<Emit> emits = new ArrayList<>();
|
||||
final EmitHandler emitHandler = emit -> {
|
||||
emits.add( emit );
|
||||
@ -200,9 +200,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_MisleadingTest() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( "hers" )
|
||||
.build();
|
||||
final Trie trie = trie( "hers" );
|
||||
final Collection<Emit> emits = trie.parseText( "h he her hers" );
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit( iterator.next(), 9, 12, "hers" );
|
||||
@ -210,18 +208,14 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_MisleadingTestFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( "hers" )
|
||||
.build();
|
||||
final Trie trie = trie( "hers" );
|
||||
final Emit firstMatch = trie.firstMatch( "h he her hers" );
|
||||
checkEmit( firstMatch, 9, 12, "hers" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_Recipes() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( FOOD )
|
||||
.build();
|
||||
final Trie trie = trie( FOOD );
|
||||
final Collection<Emit> emits = trie.parseText(
|
||||
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
@ -233,9 +227,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_RecipesFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( FOOD )
|
||||
.build();
|
||||
final Trie trie = trie( FOOD );
|
||||
final Emit firstMatch = trie.firstMatch(
|
||||
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
|
||||
|
||||
@ -261,7 +253,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_NonOverlapping() {
|
||||
final Trie trie = Trie.builder().ignoreOverlaps()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
@ -276,7 +269,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_NonOverlappingFirstMatch() {
|
||||
final Trie trie = Trie.builder().ignoreOverlaps()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
@ -288,7 +282,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_ContainsMatch() {
|
||||
final Trie trie = Trie.builder().ignoreOverlaps()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
@ -298,7 +293,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_StartOfChurchillSpeech() {
|
||||
final Trie trie = Trie.builder().ignoreOverlaps()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "T" )
|
||||
.addKeyword( "u" )
|
||||
.addKeyword( "ur" )
|
||||
@ -342,9 +338,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_TokenizeFullSentence() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( GREEK_LETTERS )
|
||||
.build();
|
||||
final Trie trie = trie( GREEK_LETTERS );
|
||||
final Collection<Token> tokens = trie.tokenize(
|
||||
"Hear: Alpha team first, Beta from the rear, Gamma in reserve" );
|
||||
assertEquals( 7, tokens.size() );
|
||||
@ -363,7 +357,9 @@ public class TrieTest {
|
||||
*/
|
||||
@Test
|
||||
public void test_StringIndexOutOfBoundsException() {
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
final Collection<Emit> emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" );
|
||||
@ -377,7 +373,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_IgnoreCase() {
|
||||
final Trie trie = Trie.builder().ignoreCase()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
final Collection<Emit> emits = trie.parseText( "TurninG OnCe AgAiN BÖRKÜ" );
|
||||
@ -391,7 +388,8 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_IgnoreCaseFirstMatch() {
|
||||
final Trie trie = Trie.builder().ignoreCase()
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
final Emit firstMatch = trie.firstMatch( "TurninG OnCe AgAiN BÖRKÜ" );
|
||||
@ -401,9 +399,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void test_TokenizeTokensInSequence() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( GREEK_LETTERS )
|
||||
.build();
|
||||
final Trie trie = trie( GREEK_LETTERS );
|
||||
final Collection<Token> tokens = trie.tokenize( "Alpha Beta Gamma" );
|
||||
assertEquals( 5, tokens.size() );
|
||||
}
|
||||
@ -585,44 +581,6 @@ public class TrieTest {
|
||||
assertEquals( 0, nonMatchCount.get() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a random sequence of ASCII numbers.
|
||||
*
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
private StringBuilder randomNumbers( int count ) {
|
||||
final StringBuilder sb = new StringBuilder( count );
|
||||
|
||||
while( --count > 0 ) {
|
||||
sb.append( randomInt( 0, 10 ) );
|
||||
}
|
||||
|
||||
return sb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Injects keywords into a string builder.
|
||||
*
|
||||
* @param source Should contain a bunch of random data that cannot match
|
||||
* any keyword.
|
||||
* @param keyword A keyword to inject repeatedly in the text.
|
||||
* @param interval How often to inject the keyword.
|
||||
*/
|
||||
private void injectKeyword(
|
||||
final StringBuilder source,
|
||||
final String keyword,
|
||||
final int interval ) {
|
||||
final int length = source.length();
|
||||
for( int i = 0; i < length; i += interval ) {
|
||||
source.replace( i, i + keyword.length(), keyword );
|
||||
}
|
||||
}
|
||||
|
||||
private int randomInt( final int min, final int max ) {
|
||||
return ThreadLocalRandom.current().nextInt( min, max );
|
||||
}
|
||||
|
||||
private void checkEmit( Emit next, int expectedStart, int expectedEnd,
|
||||
String expectedKeyword ) {
|
||||
assertEquals( "Start of emit should have been " + expectedStart,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user