Optimize imports
Reformatted code (Java convention; tab is 4 spaces)
This commit is contained in:
parent
90d4645d49
commit
b5aaa51fdd
3
pom.xml
3
pom.xml
@ -1,4 +1,5 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.ahocorasick</groupId>
|
||||
|
||||
@ -75,9 +75,9 @@ public class IntervalNode {
|
||||
}
|
||||
|
||||
protected void addToOverlaps(
|
||||
final Intervalable interval,
|
||||
final List<Intervalable> overlaps,
|
||||
final List<Intervalable> newOverlaps) {
|
||||
final Intervalable interval,
|
||||
final List<Intervalable> overlaps,
|
||||
final List<Intervalable> newOverlaps) {
|
||||
for (final Intervalable currentInterval : newOverlaps) {
|
||||
if (!currentInterval.equals(interval)) {
|
||||
overlaps.add(currentInterval);
|
||||
@ -94,9 +94,9 @@ public class IntervalNode {
|
||||
}
|
||||
|
||||
protected List<Intervalable> checkForOverlaps(
|
||||
final Intervalable interval, final Direction direction) {
|
||||
final Intervalable interval, final Direction direction) {
|
||||
final List<Intervalable> overlaps = new ArrayList<>();
|
||||
|
||||
|
||||
for (final Intervalable currentInterval : this.intervals) {
|
||||
switch (direction) {
|
||||
case LEFT:
|
||||
@ -111,13 +111,13 @@ public class IntervalNode {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
|
||||
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
|
||||
return node == null
|
||||
? Collections.<Intervalable>emptyList()
|
||||
: node.findOverlaps( interval );
|
||||
? Collections.<Intervalable>emptyList()
|
||||
: node.findOverlaps(interval);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
package org.ahocorasick.interval;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
|
||||
public class IntervalTree {
|
||||
|
||||
private final IntervalNode rootNode;
|
||||
|
||||
@ -7,11 +7,11 @@ public class IntervalableComparatorBySize implements Comparator<Intervalable> {
|
||||
@Override
|
||||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
|
||||
int comparison = intervalable2.size() - intervalable.size();
|
||||
|
||||
|
||||
if (comparison == 0) {
|
||||
comparison = intervalable.getStart() - intervalable2.getStart();
|
||||
}
|
||||
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
|
||||
@ -4,43 +4,51 @@ import java.util.*;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A state has various important tasks it must attend to:
|
||||
* A state has various important tasks it must attend to:
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>success; when a character points to another state, it must return that state</li>
|
||||
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
|
||||
* state with less depth</li>
|
||||
* <li>emits; when this state is passed and keywords have been matched, the matches must be
|
||||
* 'emitted' so that they can be used later on.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
|
||||
* it will still parse the next character and start from the root node. This ensures that the algorithm
|
||||
* always runs. All other states always have a fail state.
|
||||
* <ul>
|
||||
* <li>success; when a character points to another state, it must return that state</li>
|
||||
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
|
||||
* state with less depth</li>
|
||||
* <li>emits; when this state is passed and keywords have been matched, the matches must be
|
||||
* 'emitted' so that they can be used later on.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <p>
|
||||
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
|
||||
* it will still parse the next character and start from the root node. This ensures that the algorithm
|
||||
* always runs. All other states always have a fail state.
|
||||
* </p>
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
public class State {
|
||||
|
||||
/** effective the size of the keyword */
|
||||
/**
|
||||
* effective the size of the keyword
|
||||
*/
|
||||
private final int depth;
|
||||
|
||||
/** only used for the root state to refer to itself in case no matches have been found */
|
||||
/**
|
||||
* only used for the root state to refer to itself in case no matches have been found
|
||||
*/
|
||||
private final State rootState;
|
||||
|
||||
/**
|
||||
* referred to in the white paper as the 'goto' structure. From a state it is possible to go
|
||||
* to other states, depending on the character passed.
|
||||
*/
|
||||
private final Map<Character,State> success = new HashMap<>();
|
||||
private final Map<Character, State> success = new HashMap<>();
|
||||
|
||||
/** if no matching states are found, the failure state will be returned */
|
||||
/**
|
||||
* if no matching states are found, the failure state will be returned
|
||||
*/
|
||||
private State failure;
|
||||
|
||||
/** whenever this state is reached, it will emit the matches keywords for future reference */
|
||||
/**
|
||||
* whenever this state is reached, it will emit the matches keywords for future reference
|
||||
*/
|
||||
private Set<String> emits;
|
||||
|
||||
public State() {
|
||||
@ -54,11 +62,11 @@ public class State {
|
||||
|
||||
private State nextState(final Character character, final boolean ignoreRootState) {
|
||||
State nextState = this.success.get(character);
|
||||
|
||||
|
||||
if (!ignoreRootState && nextState == null && this.rootState != null) {
|
||||
nextState = this.rootState;
|
||||
}
|
||||
|
||||
|
||||
return nextState;
|
||||
}
|
||||
|
||||
@ -69,21 +77,21 @@ public class State {
|
||||
public State nextStateIgnoreRootState(final Character character) {
|
||||
return nextState(character, true);
|
||||
}
|
||||
|
||||
public State addState(final String keyword ) {
|
||||
State state = this;
|
||||
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
state = state.addState(character);
|
||||
}
|
||||
|
||||
return state;
|
||||
|
||||
public State addState(final String keyword) {
|
||||
State state = this;
|
||||
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
state = state.addState(character);
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
public State addState(final Character character) {
|
||||
State nextState = nextStateIgnoreRootState(character);
|
||||
if (nextState == null) {
|
||||
nextState = new State(this.depth+1);
|
||||
nextState = new State(this.depth + 1);
|
||||
this.success.put(character, nextState);
|
||||
}
|
||||
return nextState;
|
||||
@ -107,7 +115,7 @@ public class State {
|
||||
}
|
||||
|
||||
public Collection<String> emit() {
|
||||
return this.emits == null ? Collections.<String> emptyList() : this.emits;
|
||||
return this.emits == null ? Collections.<String>emptyList() : this.emits;
|
||||
}
|
||||
|
||||
public State failure() {
|
||||
|
||||
@ -1,22 +1,24 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import static java.lang.Character.isAlphabetic;
|
||||
import static java.lang.Character.isWhitespace;
|
||||
import static java.lang.Character.toLowerCase;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import org.ahocorasick.interval.IntervalTree;
|
||||
import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
|
||||
import static java.lang.Character.*;
|
||||
|
||||
import java.lang.Character;
|
||||
|
||||
/**
|
||||
* Based on the Aho-Corasick white paper, Bell technologies:
|
||||
* http://cr.yp.to/bib/1975/aho.pdf
|
||||
*
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
public class Trie {
|
||||
@ -29,42 +31,41 @@ public class Trie {
|
||||
this.trieConfig = trieConfig;
|
||||
this.rootState = new State();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used by the builder to add a text search keyword.
|
||||
*
|
||||
*
|
||||
* @param keyword The search term to add to the list of search terms.
|
||||
*
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
private void addKeyword( String keyword ) {
|
||||
if( keyword.length() > 0 ) {
|
||||
if( isCaseInsensitive() ) {
|
||||
private void addKeyword(String keyword) {
|
||||
if (keyword.length() > 0) {
|
||||
if (isCaseInsensitive()) {
|
||||
keyword = keyword.toLowerCase();
|
||||
}
|
||||
|
||||
addState( keyword ).addEmit( keyword );
|
||||
addState(keyword).addEmit(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
private State addState( final String keyword ) {
|
||||
return getRootState().addState( keyword );
|
||||
private State addState(final String keyword) {
|
||||
return getRootState().addState(keyword);
|
||||
}
|
||||
|
||||
|
||||
public Collection<Token> tokenize(final String text) {
|
||||
final Collection<Token> tokens = new ArrayList<>();
|
||||
final Collection<Emit> collectedEmits = parseText(text);
|
||||
int lastCollectedPosition = -1;
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if (emit.getStart() - lastCollectedPosition > 1) {
|
||||
tokens.add(createFragment(emit, text, lastCollectedPosition));
|
||||
}
|
||||
|
||||
|
||||
tokens.add(createMatch(emit, text));
|
||||
lastCollectedPosition = emit.getEnd();
|
||||
}
|
||||
|
||||
|
||||
if (text.length() - lastCollectedPosition > 1) {
|
||||
tokens.add(createFragment(null, text, lastCollectedPosition));
|
||||
}
|
||||
@ -73,14 +74,14 @@ public class Trie {
|
||||
}
|
||||
|
||||
private Token createFragment(
|
||||
final Emit emit,
|
||||
final String text,
|
||||
final int lastCollectedPosition) {
|
||||
return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart()));
|
||||
final Emit emit,
|
||||
final String text,
|
||||
final int lastCollectedPosition) {
|
||||
return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
|
||||
}
|
||||
|
||||
private Token createMatch(final Emit emit, final String text) {
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit);
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@ -99,7 +100,7 @@ public class Trie {
|
||||
}
|
||||
|
||||
if (!trieConfig.isAllowOverlaps()) {
|
||||
IntervalTree intervalTree = new IntervalTree((List<Intervalable>)(List<?>)collectedEmits);
|
||||
IntervalTree intervalTree = new IntervalTree((List<Intervalable>) (List<?>) collectedEmits);
|
||||
intervalTree.removeOverlaps((List<Intervalable>) (List<?>) collectedEmits);
|
||||
}
|
||||
|
||||
@ -112,15 +113,15 @@ public class Trie {
|
||||
|
||||
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
|
||||
State currentState = getRootState();
|
||||
|
||||
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
Character character = text.charAt(position);
|
||||
|
||||
|
||||
// TODO: Maybe lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = toLowerCase(character);
|
||||
}
|
||||
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) {
|
||||
return;
|
||||
@ -138,18 +139,18 @@ public class Trie {
|
||||
} else {
|
||||
// Fast path. Returns first match found.
|
||||
State currentState = getRootState();
|
||||
|
||||
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
Character character = text.charAt(position);
|
||||
|
||||
|
||||
// TODO: Lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = toLowerCase(character);
|
||||
}
|
||||
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
Collection<String> emitStrs = currentState.emit();
|
||||
|
||||
|
||||
if (emitStrs != null && !emitStrs.isEmpty()) {
|
||||
for (final String emitStr : emitStrs) {
|
||||
final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr);
|
||||
@ -164,26 +165,26 @@ public class Trie {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isPartialMatch(final CharSequence searchText, final Emit emit) {
|
||||
return (emit.getStart() != 0 &&
|
||||
isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
(emit.getEnd() + 1 != searchText.length() &&
|
||||
isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
(emit.getEnd() + 1 != searchText.length() &&
|
||||
isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
}
|
||||
|
||||
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
final List<Emit> removeEmits = new ArrayList<>();
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if (isPartialMatch(searchText, emit)) {
|
||||
removeEmits.add(emit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (final Emit removeEmit : removeEmits) {
|
||||
collectedEmits.remove(removeEmit);
|
||||
}
|
||||
@ -192,15 +193,15 @@ public class Trie {
|
||||
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
final long size = searchText.length();
|
||||
final List<Emit> removeEmits = new ArrayList<>();
|
||||
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) &&
|
||||
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
|
||||
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
|
||||
continue;
|
||||
}
|
||||
removeEmits.add(emit);
|
||||
}
|
||||
|
||||
|
||||
for (final Emit removeEmit : removeEmits) {
|
||||
collectedEmits.remove(removeEmit);
|
||||
}
|
||||
@ -209,12 +210,12 @@ public class Trie {
|
||||
private State getState(final State initialState, final Character character) {
|
||||
State currentState = initialState;
|
||||
State updatedState = currentState.nextState(character);
|
||||
|
||||
|
||||
while (updatedState == null) {
|
||||
currentState = currentState.failure();
|
||||
updatedState = currentState.nextState(character);
|
||||
}
|
||||
|
||||
|
||||
return updatedState;
|
||||
}
|
||||
|
||||
@ -249,12 +250,12 @@ public class Trie {
|
||||
}
|
||||
|
||||
private boolean storeEmits(
|
||||
final int position,
|
||||
final State currentState,
|
||||
final EmitHandler emitHandler) {
|
||||
final int position,
|
||||
final State currentState,
|
||||
final EmitHandler emitHandler) {
|
||||
boolean emitted = false;
|
||||
final Collection<String> emits = currentState.emit();
|
||||
|
||||
|
||||
// TODO: The check for empty might be superfluous.
|
||||
if (emits != null && !emits.isEmpty()) {
|
||||
for (final String emit : emits) {
|
||||
@ -262,22 +263,22 @@ public class Trie {
|
||||
emitted = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return emitted;
|
||||
}
|
||||
|
||||
private boolean isCaseInsensitive() {
|
||||
return trieConfig.isCaseInsensitive();
|
||||
return trieConfig.isCaseInsensitive();
|
||||
}
|
||||
|
||||
|
||||
private State getRootState() {
|
||||
return this.rootState;
|
||||
return this.rootState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a TrieBuilder instance for configuring the Trie using a fluent
|
||||
* interface.
|
||||
*
|
||||
*
|
||||
* @return The builder used to configure its Trie.
|
||||
*/
|
||||
public static TrieBuilder builder() {
|
||||
@ -296,31 +297,30 @@ public class Trie {
|
||||
/**
|
||||
* Default (empty) constructor.
|
||||
*/
|
||||
private TrieBuilder() {}
|
||||
private TrieBuilder() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a keyword to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keyword The keyword to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
public TrieBuilder addKeyword(final CharSequence keyword) {
|
||||
getTrie().addKeyword( keyword.toString() );
|
||||
getTrie().addKeyword(keyword.toString());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keywords The keywords to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final CharSequence... keywords) {
|
||||
for( final CharSequence keyword : keywords ) {
|
||||
addKeyword( keyword );
|
||||
for (final CharSequence keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
}
|
||||
|
||||
return this;
|
||||
@ -328,19 +328,18 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
*
|
||||
* @param keywords The keywords to add to the list.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final Collection<CharSequence> keywords) {
|
||||
return addKeywords( keywords.toArray( new CharSequence[ keywords.size() ] ) );
|
||||
return addKeywords(keywords.toArray(new CharSequence[keywords.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore case when searching for keywords in the
|
||||
* text.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreCase() {
|
||||
@ -350,7 +349,7 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore overlapping keywords.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreOverlaps() {
|
||||
@ -360,7 +359,7 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords in the text.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWords() {
|
||||
@ -372,18 +371,18 @@ public class Trie {
|
||||
* Configure the Trie to match whole keywords that are separated by
|
||||
* whitespace in the text. For example, "this keyword thatkeyword"
|
||||
* would only match the first occurrence of "keyword".
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
|
||||
getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to stop searching for matches after the first
|
||||
* keyword is found in the text.
|
||||
*
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyFirstMatch() {
|
||||
@ -393,27 +392,27 @@ public class Trie {
|
||||
|
||||
/**
|
||||
* Construct the Trie using the builder settings.
|
||||
*
|
||||
*
|
||||
* @return The configured Trie.
|
||||
*/
|
||||
public Trie build() {
|
||||
getTrie().constructFailureStates();
|
||||
return getTrie();
|
||||
}
|
||||
|
||||
|
||||
private Trie getTrie() {
|
||||
return this.trie;
|
||||
}
|
||||
|
||||
|
||||
private TrieConfig getTrieConfig() {
|
||||
return this.trieConfig;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @deprecated Use onlyFirstMatch()
|
||||
*/
|
||||
public TrieBuilder stopOnHit() {
|
||||
return onlyFirstMatch();
|
||||
return onlyFirstMatch();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
package org.ahocorasick.trie.handler;
|
||||
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
public class DefaultEmitHandler implements EmitHandler {
|
||||
|
||||
|
||||
@ -2,11 +2,11 @@ package org.ahocorasick.interval;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.assertFalse;
|
||||
import static junit.framework.Assert.assertTrue;
|
||||
import static junit.framework.Assert.*;
|
||||
|
||||
public class IntervalTest {
|
||||
|
||||
|
||||
@ -3,10 +3,9 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import static java.util.Collections.sort;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorByPositionTest {
|
||||
@ -14,9 +13,9 @@ public class IntervalableComparatorByPositionTest {
|
||||
@Test
|
||||
public void sortOnPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4,5));
|
||||
intervals.add(new Interval(1,4));
|
||||
intervals.add(new Interval(3,8));
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
sort(intervals, new IntervalableComparatorByPosition());
|
||||
assertEquals(4, intervals.get(0).size());
|
||||
assertEquals(6, intervals.get(1).size());
|
||||
|
||||
@ -3,11 +3,9 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import static java.util.Collections.sort;
|
||||
import static java.util.Collections.sort;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorBySizeTest {
|
||||
@ -15,9 +13,9 @@ public class IntervalableComparatorBySizeTest {
|
||||
@Test
|
||||
public void sortOnSize() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4,5));
|
||||
intervals.add(new Interval(1,4));
|
||||
intervals.add(new Interval(3,8));
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(6, intervals.get(0).size());
|
||||
assertEquals(4, intervals.get(1).size());
|
||||
@ -27,8 +25,8 @@ public class IntervalableComparatorBySizeTest {
|
||||
@Test
|
||||
public void sortOnSizeThenPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4,7));
|
||||
intervals.add(new Interval(2,5));
|
||||
intervals.add(new Interval(4, 7));
|
||||
intervals.add(new Interval(2, 5));
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(2, intervals.get(0).getStart());
|
||||
assertEquals(4, intervals.get(1).getStart());
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import org.ahocorasick.trie.State;
|
||||
import org.junit.Test;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
@ -1,36 +1,37 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import static java.util.concurrent.ThreadLocalRandom.current;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static org.ahocorasick.trie.Trie.builder;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TrieTest {
|
||||
private final static String[] ALPHABET = new String[]{
|
||||
"abc", "bcd", "cde"
|
||||
"abc", "bcd", "cde"
|
||||
};
|
||||
|
||||
|
||||
private final static String[] PRONOUNS = new String[]{
|
||||
"hers", "his", "she", "he"
|
||||
"hers", "his", "she", "he"
|
||||
};
|
||||
|
||||
private final static String[] FOOD = new String[]{
|
||||
"veal", "cauliflower", "broccoli", "tomatoes"
|
||||
"veal", "cauliflower", "broccoli", "tomatoes"
|
||||
};
|
||||
|
||||
private final static String[] GREEK_LETTERS = new String[]{
|
||||
"Alpha", "Beta", "Gamma"
|
||||
"Alpha", "Beta", "Gamma"
|
||||
};
|
||||
|
||||
|
||||
private final static String[] UNICODE = new String[]{
|
||||
"turning", "once", "again", "börkü"
|
||||
"turning", "once", "again", "börkü"
|
||||
};
|
||||
|
||||
@Test
|
||||
@ -408,7 +409,7 @@ public class TrieTest {
|
||||
.onlyWholeWordsWhiteSpaceSeparated()
|
||||
.addKeyword("#sugar-123")
|
||||
.build();
|
||||
Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
|
||||
Collection<Emit> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
|
||||
assertEquals(1, emits.size()); // Match must not be made
|
||||
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123");
|
||||
}
|
||||
@ -417,57 +418,57 @@ public class TrieTest {
|
||||
public void testLargeString() {
|
||||
final int interval = 100;
|
||||
final int textSize = 1000000;
|
||||
final String keyword = FOOD[ 1 ];
|
||||
final StringBuilder text = randomNumbers( textSize );
|
||||
final String keyword = FOOD[1];
|
||||
final StringBuilder text = randomNumbers(textSize);
|
||||
|
||||
injectKeyword( text, keyword, interval );
|
||||
injectKeyword(text, keyword, interval);
|
||||
|
||||
Trie trie = builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
.onlyWholeWords()
|
||||
.addKeyword(keyword)
|
||||
.build();
|
||||
|
||||
final Collection<Emit> emits = trie.parseText( text );
|
||||
final Collection<Emit> emits = trie.parseText(text);
|
||||
|
||||
assertEquals( textSize / interval, emits.size() );
|
||||
assertEquals(textSize / interval, emits.size());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generates a random sequence of ASCII numbers.
|
||||
*
|
||||
*
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
private StringBuilder randomNumbers( final int count ) {
|
||||
final StringBuilder sb = new StringBuilder( count );
|
||||
|
||||
for( int i = count - 1; i >= 0; i-- ) {
|
||||
sb.append( randomInt( 0, 10 ) );
|
||||
private StringBuilder randomNumbers(final int count) {
|
||||
final StringBuilder sb = new StringBuilder(count);
|
||||
|
||||
for (int i = count - 1; i >= 0; i--) {
|
||||
sb.append(randomInt(0, 10));
|
||||
}
|
||||
|
||||
return sb;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Injects keywords into a string builder.
|
||||
*
|
||||
* @param source Should contain a bunch of random data that cannot match
|
||||
* any keyword.
|
||||
* @param keyword A keyword to inject repeatedly in the text.
|
||||
*
|
||||
* @param source Should contain a bunch of random data that cannot match
|
||||
* any keyword.
|
||||
* @param keyword A keyword to inject repeatedly in the text.
|
||||
* @param interval How often to inject the keyword.
|
||||
*/
|
||||
private void injectKeyword(
|
||||
final StringBuilder source,
|
||||
final String keyword,
|
||||
final int interval ) {
|
||||
private void injectKeyword(
|
||||
final StringBuilder source,
|
||||
final String keyword,
|
||||
final int interval) {
|
||||
final int length = source.length();
|
||||
for( int i = 0; i < length; i += interval ) {
|
||||
source.replace( i, i + keyword.length(), keyword );
|
||||
for (int i = 0; i < length; i += interval) {
|
||||
source.replace(i, i + keyword.length(), keyword);
|
||||
}
|
||||
}
|
||||
|
||||
private int randomInt( final int min, final int max ) {
|
||||
return current().nextInt( min, max );
|
||||
|
||||
private int randomInt(final int min, final int max) {
|
||||
return current().nextInt(min, max);
|
||||
}
|
||||
|
||||
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user