Optimize imports

Reformatted code (Java convention; tab is 4 spaces)
This commit is contained in:
robert-bor 2016-11-30 09:10:21 +01:00
parent 90d4645d49
commit b5aaa51fdd
12 changed files with 187 additions and 180 deletions

View File

@ -1,4 +1,5 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.ahocorasick</groupId>

View File

@ -75,9 +75,9 @@ public class IntervalNode {
}
protected void addToOverlaps(
final Intervalable interval,
final List<Intervalable> overlaps,
final List<Intervalable> newOverlaps) {
final Intervalable interval,
final List<Intervalable> overlaps,
final List<Intervalable> newOverlaps) {
for (final Intervalable currentInterval : newOverlaps) {
if (!currentInterval.equals(interval)) {
overlaps.add(currentInterval);
@ -94,9 +94,9 @@ public class IntervalNode {
}
protected List<Intervalable> checkForOverlaps(
final Intervalable interval, final Direction direction) {
final Intervalable interval, final Direction direction) {
final List<Intervalable> overlaps = new ArrayList<>();
for (final Intervalable currentInterval : this.intervals) {
switch (direction) {
case LEFT:
@ -111,13 +111,13 @@ public class IntervalNode {
break;
}
}
return overlaps;
}
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
return node == null
? Collections.<Intervalable>emptyList()
: node.findOverlaps( interval );
? Collections.<Intervalable>emptyList()
: node.findOverlaps(interval);
}
}

View File

@ -1,10 +1,11 @@
package org.ahocorasick.interval;
import static java.util.Collections.sort;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import static java.util.Collections.sort;
public class IntervalTree {
private final IntervalNode rootNode;

View File

@ -7,11 +7,11 @@ public class IntervalableComparatorBySize implements Comparator<Intervalable> {
@Override
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
int comparison = intervalable2.size() - intervalable.size();
if (comparison == 0) {
comparison = intervalable.getStart() - intervalable2.getStart();
}
return comparison;
}

View File

@ -4,43 +4,51 @@ import java.util.*;
/**
* <p>
* A state has various important tasks it must attend to:
* A state has various important tasks it must attend to:
* </p>
*
* <ul>
* <li>success; when a character points to another state, it must return that state</li>
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
* state with less depth</li>
* <li>emits; when this state is passed and keywords have been matched, the matches must be
* 'emitted' so that they can be used later on.</li>
* </ul>
*
* <p>
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
* it will still parse the next character and start from the root node. This ensures that the algorithm
* always runs. All other states always have a fail state.
* <ul>
* <li>success; when a character points to another state, it must return that state</li>
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
* state with less depth</li>
* <li>emits; when this state is passed and keywords have been matched, the matches must be
* 'emitted' so that they can be used later on.</li>
* </ul>
* <p>
* <p>
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
* it will still parse the next character and start from the root node. This ensures that the algorithm
* always runs. All other states always have a fail state.
* </p>
*
* @author Robert Bor
*/
public class State {
/** effective the size of the keyword */
/**
* effective the size of the keyword
*/
private final int depth;
/** only used for the root state to refer to itself in case no matches have been found */
/**
* only used for the root state to refer to itself in case no matches have been found
*/
private final State rootState;
/**
* referred to in the white paper as the 'goto' structure. From a state it is possible to go
* to other states, depending on the character passed.
*/
private final Map<Character,State> success = new HashMap<>();
private final Map<Character, State> success = new HashMap<>();
/** if no matching states are found, the failure state will be returned */
/**
* if no matching states are found, the failure state will be returned
*/
private State failure;
/** whenever this state is reached, it will emit the matches keywords for future reference */
/**
* whenever this state is reached, it will emit the matches keywords for future reference
*/
private Set<String> emits;
public State() {
@ -54,11 +62,11 @@ public class State {
private State nextState(final Character character, final boolean ignoreRootState) {
State nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) {
nextState = this.rootState;
}
return nextState;
}
@ -69,21 +77,21 @@ public class State {
public State nextStateIgnoreRootState(final Character character) {
return nextState(character, true);
}
public State addState(final String keyword ) {
State state = this;
for (final Character character : keyword.toCharArray()) {
state = state.addState(character);
}
return state;
public State addState(final String keyword) {
State state = this;
for (final Character character : keyword.toCharArray()) {
state = state.addState(character);
}
return state;
}
public State addState(final Character character) {
State nextState = nextStateIgnoreRootState(character);
if (nextState == null) {
nextState = new State(this.depth+1);
nextState = new State(this.depth + 1);
this.success.put(character, nextState);
}
return nextState;
@ -107,7 +115,7 @@ public class State {
}
public Collection<String> emit() {
return this.emits == null ? Collections.<String> emptyList() : this.emits;
return this.emits == null ? Collections.<String>emptyList() : this.emits;
}
public State failure() {

View File

@ -1,22 +1,24 @@
package org.ahocorasick.trie;
import static java.lang.Character.isAlphabetic;
import static java.lang.Character.isWhitespace;
import static java.lang.Character.toLowerCase;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingDeque;
import org.ahocorasick.interval.IntervalTree;
import org.ahocorasick.interval.Intervalable;
import org.ahocorasick.trie.handler.DefaultEmitHandler;
import org.ahocorasick.trie.handler.EmitHandler;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingDeque;
import static java.lang.Character.*;
import java.lang.Character;
/**
* Based on the Aho-Corasick white paper, Bell technologies:
* http://cr.yp.to/bib/1975/aho.pdf
*
*
* @author Robert Bor
*/
public class Trie {
@ -29,42 +31,41 @@ public class Trie {
this.trieConfig = trieConfig;
this.rootState = new State();
}
/**
* Used by the builder to add a text search keyword.
*
*
* @param keyword The search term to add to the list of search terms.
*
* @throws NullPointerException if the keyword is null.
*/
private void addKeyword( String keyword ) {
if( keyword.length() > 0 ) {
if( isCaseInsensitive() ) {
private void addKeyword(String keyword) {
if (keyword.length() > 0) {
if (isCaseInsensitive()) {
keyword = keyword.toLowerCase();
}
addState( keyword ).addEmit( keyword );
addState(keyword).addEmit(keyword);
}
}
private State addState( final String keyword ) {
return getRootState().addState( keyword );
private State addState(final String keyword) {
return getRootState().addState(keyword);
}
public Collection<Token> tokenize(final String text) {
final Collection<Token> tokens = new ArrayList<>();
final Collection<Emit> collectedEmits = parseText(text);
int lastCollectedPosition = -1;
for (final Emit emit : collectedEmits) {
if (emit.getStart() - lastCollectedPosition > 1) {
tokens.add(createFragment(emit, text, lastCollectedPosition));
}
tokens.add(createMatch(emit, text));
lastCollectedPosition = emit.getEnd();
}
if (text.length() - lastCollectedPosition > 1) {
tokens.add(createFragment(null, text, lastCollectedPosition));
}
@ -73,14 +74,14 @@ public class Trie {
}
private Token createFragment(
final Emit emit,
final String text,
final int lastCollectedPosition) {
return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart()));
final Emit emit,
final String text,
final int lastCollectedPosition) {
return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
}
private Token createMatch(final Emit emit, final String text) {
return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit);
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
}
@SuppressWarnings("unchecked")
@ -99,7 +100,7 @@ public class Trie {
}
if (!trieConfig.isAllowOverlaps()) {
IntervalTree intervalTree = new IntervalTree((List<Intervalable>)(List<?>)collectedEmits);
IntervalTree intervalTree = new IntervalTree((List<Intervalable>) (List<?>) collectedEmits);
intervalTree.removeOverlaps((List<Intervalable>) (List<?>) collectedEmits);
}
@ -112,15 +113,15 @@ public class Trie {
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
State currentState = getRootState();
for (int position = 0; position < text.length(); position++) {
Character character = text.charAt(position);
// TODO: Maybe lowercase the entire string at once?
if (trieConfig.isCaseInsensitive()) {
character = toLowerCase(character);
}
currentState = getState(currentState, character);
if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) {
return;
@ -138,18 +139,18 @@ public class Trie {
} else {
// Fast path. Returns first match found.
State currentState = getRootState();
for (int position = 0; position < text.length(); position++) {
Character character = text.charAt(position);
// TODO: Lowercase the entire string at once?
if (trieConfig.isCaseInsensitive()) {
character = toLowerCase(character);
}
currentState = getState(currentState, character);
Collection<String> emitStrs = currentState.emit();
if (emitStrs != null && !emitStrs.isEmpty()) {
for (final String emitStr : emitStrs) {
final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr);
@ -164,26 +165,26 @@ public class Trie {
}
}
}
return null;
}
private boolean isPartialMatch(final CharSequence searchText, final Emit emit) {
return (emit.getStart() != 0 &&
isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
(emit.getEnd() + 1 != searchText.length() &&
isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
(emit.getEnd() + 1 != searchText.length() &&
isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
}
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
final List<Emit> removeEmits = new ArrayList<>();
for (final Emit emit : collectedEmits) {
if (isPartialMatch(searchText, emit)) {
removeEmits.add(emit);
}
}
for (final Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
@ -192,15 +193,15 @@ public class Trie {
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {
final long size = searchText.length();
final List<Emit> removeEmits = new ArrayList<>();
for (final Emit emit : collectedEmits) {
if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) &&
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
continue;
}
removeEmits.add(emit);
}
for (final Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
@ -209,12 +210,12 @@ public class Trie {
private State getState(final State initialState, final Character character) {
State currentState = initialState;
State updatedState = currentState.nextState(character);
while (updatedState == null) {
currentState = currentState.failure();
updatedState = currentState.nextState(character);
}
return updatedState;
}
@ -249,12 +250,12 @@ public class Trie {
}
private boolean storeEmits(
final int position,
final State currentState,
final EmitHandler emitHandler) {
final int position,
final State currentState,
final EmitHandler emitHandler) {
boolean emitted = false;
final Collection<String> emits = currentState.emit();
// TODO: The check for empty might be superfluous.
if (emits != null && !emits.isEmpty()) {
for (final String emit : emits) {
@ -262,22 +263,22 @@ public class Trie {
emitted = true;
}
}
return emitted;
}
private boolean isCaseInsensitive() {
return trieConfig.isCaseInsensitive();
return trieConfig.isCaseInsensitive();
}
private State getRootState() {
return this.rootState;
return this.rootState;
}
/**
* Constructs a TrieBuilder instance for configuring the Trie using a fluent
* interface.
*
*
* @return The builder used to configure its Trie.
*/
public static TrieBuilder builder() {
@ -296,31 +297,30 @@ public class Trie {
/**
* Default (empty) constructor.
*/
private TrieBuilder() {}
private TrieBuilder() {
}
/**
* Adds a keyword to the Trie's list of text search keywords.
*
*
* @param keyword The keyword to add to the list.
*
* @return This builder.
* @throws NullPointerException if the keyword is null.
*/
public TrieBuilder addKeyword(final CharSequence keyword) {
getTrie().addKeyword( keyword.toString() );
getTrie().addKeyword(keyword.toString());
return this;
}
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
*
* @param keywords The keywords to add to the list.
*
* @return This builder.
*/
public TrieBuilder addKeywords(final CharSequence... keywords) {
for( final CharSequence keyword : keywords ) {
addKeyword( keyword );
for (final CharSequence keyword : keywords) {
addKeyword(keyword);
}
return this;
@ -328,19 +328,18 @@ public class Trie {
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
*
* @param keywords The keywords to add to the list.
*
* @return This builder.
*/
public TrieBuilder addKeywords(final Collection<CharSequence> keywords) {
return addKeywords( keywords.toArray( new CharSequence[ keywords.size() ] ) );
return addKeywords(keywords.toArray(new CharSequence[keywords.size()]));
}
/**
* Configure the Trie to ignore case when searching for keywords in the
* text.
*
*
* @return This builder.
*/
public TrieBuilder ignoreCase() {
@ -350,7 +349,7 @@ public class Trie {
/**
* Configure the Trie to ignore overlapping keywords.
*
*
* @return This builder.
*/
public TrieBuilder ignoreOverlaps() {
@ -360,7 +359,7 @@ public class Trie {
/**
* Configure the Trie to match whole keywords in the text.
*
*
* @return This builder.
*/
public TrieBuilder onlyWholeWords() {
@ -372,18 +371,18 @@ public class Trie {
* Configure the Trie to match whole keywords that are separated by
* whitespace in the text. For example, "this keyword thatkeyword"
* would only match the first occurrence of "keyword".
*
*
* @return This builder.
*/
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true);
return this;
}
/**
* Configure the Trie to stop searching for matches after the first
* keyword is found in the text.
*
*
* @return This builder.
*/
public TrieBuilder onlyFirstMatch() {
@ -393,27 +392,27 @@ public class Trie {
/**
* Construct the Trie using the builder settings.
*
*
* @return The configured Trie.
*/
public Trie build() {
getTrie().constructFailureStates();
return getTrie();
}
private Trie getTrie() {
return this.trie;
}
private TrieConfig getTrieConfig() {
return this.trieConfig;
}
/**
* @deprecated Use onlyFirstMatch()
*/
public TrieBuilder stopOnHit() {
return onlyFirstMatch();
return onlyFirstMatch();
}
/**

View File

@ -1,8 +1,9 @@
package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.Emit;
import java.util.ArrayList;
import java.util.List;
import org.ahocorasick.trie.Emit;
public class DefaultEmitHandler implements EmitHandler {

View File

@ -2,11 +2,11 @@ package org.ahocorasick.interval;
import org.junit.Test;
import java.util.*;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue;
import static junit.framework.Assert.*;
public class IntervalTest {

View File

@ -3,10 +3,9 @@ package org.ahocorasick.interval;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import static java.util.Collections.sort;
import java.util.List;
import static java.util.Collections.sort;
import static junit.framework.Assert.assertEquals;
public class IntervalableComparatorByPositionTest {
@ -14,9 +13,9 @@ public class IntervalableComparatorByPositionTest {
@Test
public void sortOnPosition() {
List<Intervalable> intervals = new ArrayList<>();
intervals.add(new Interval(4,5));
intervals.add(new Interval(1,4));
intervals.add(new Interval(3,8));
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
intervals.add(new Interval(3, 8));
sort(intervals, new IntervalableComparatorByPosition());
assertEquals(4, intervals.get(0).size());
assertEquals(6, intervals.get(1).size());

View File

@ -3,11 +3,9 @@ package org.ahocorasick.interval;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import static java.util.Collections.sort;
import static java.util.Collections.sort;
import java.util.List;
import static java.util.Collections.sort;
import static junit.framework.Assert.assertEquals;
public class IntervalableComparatorBySizeTest {
@ -15,9 +13,9 @@ public class IntervalableComparatorBySizeTest {
@Test
public void sortOnSize() {
List<Intervalable> intervals = new ArrayList<>();
intervals.add(new Interval(4,5));
intervals.add(new Interval(1,4));
intervals.add(new Interval(3,8));
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
intervals.add(new Interval(3, 8));
sort(intervals, new IntervalableComparatorBySize());
assertEquals(6, intervals.get(0).size());
assertEquals(4, intervals.get(1).size());
@ -27,8 +25,8 @@ public class IntervalableComparatorBySizeTest {
@Test
public void sortOnSizeThenPosition() {
List<Intervalable> intervals = new ArrayList<>();
intervals.add(new Interval(4,7));
intervals.add(new Interval(2,5));
intervals.add(new Interval(4, 7));
intervals.add(new Interval(2, 5));
sort(intervals, new IntervalableComparatorBySize());
assertEquals(2, intervals.get(0).getStart());
assertEquals(4, intervals.get(1).getStart());

View File

@ -1,6 +1,5 @@
package org.ahocorasick.trie;
import org.ahocorasick.trie.State;
import org.junit.Test;
import static junit.framework.Assert.assertEquals;

View File

@ -1,36 +1,37 @@
package org.ahocorasick.trie;
import org.ahocorasick.trie.handler.EmitHandler;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import static java.util.concurrent.ThreadLocalRandom.current;
import static junit.framework.Assert.assertEquals;
import static org.ahocorasick.trie.Trie.builder;
import org.ahocorasick.trie.handler.EmitHandler;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
public class TrieTest {
private final static String[] ALPHABET = new String[]{
"abc", "bcd", "cde"
"abc", "bcd", "cde"
};
private final static String[] PRONOUNS = new String[]{
"hers", "his", "she", "he"
"hers", "his", "she", "he"
};
private final static String[] FOOD = new String[]{
"veal", "cauliflower", "broccoli", "tomatoes"
"veal", "cauliflower", "broccoli", "tomatoes"
};
private final static String[] GREEK_LETTERS = new String[]{
"Alpha", "Beta", "Gamma"
"Alpha", "Beta", "Gamma"
};
private final static String[] UNICODE = new String[]{
"turning", "once", "again", "börkü"
"turning", "once", "again", "börkü"
};
@Test
@ -408,7 +409,7 @@ public class TrieTest {
.onlyWholeWordsWhiteSpaceSeparated()
.addKeyword("#sugar-123")
.build();
Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
Collection<Emit> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123");
}
@ -417,57 +418,57 @@ public class TrieTest {
public void testLargeString() {
final int interval = 100;
final int textSize = 1000000;
final String keyword = FOOD[ 1 ];
final StringBuilder text = randomNumbers( textSize );
final String keyword = FOOD[1];
final StringBuilder text = randomNumbers(textSize);
injectKeyword( text, keyword, interval );
injectKeyword(text, keyword, interval);
Trie trie = builder()
.onlyWholeWords()
.addKeyword( keyword )
.build();
.onlyWholeWords()
.addKeyword(keyword)
.build();
final Collection<Emit> emits = trie.parseText( text );
final Collection<Emit> emits = trie.parseText(text);
assertEquals( textSize / interval, emits.size() );
assertEquals(textSize / interval, emits.size());
}
/**
* Generates a random sequence of ASCII numbers.
*
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
private StringBuilder randomNumbers( final int count ) {
final StringBuilder sb = new StringBuilder( count );
for( int i = count - 1; i >= 0; i-- ) {
sb.append( randomInt( 0, 10 ) );
private StringBuilder randomNumbers(final int count) {
final StringBuilder sb = new StringBuilder(count);
for (int i = count - 1; i >= 0; i--) {
sb.append(randomInt(0, 10));
}
return sb;
}
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
private void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval ) {
private void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval) {
final int length = source.length();
for( int i = 0; i < length; i += interval ) {
source.replace( i, i + keyword.length(), keyword );
for (int i = 0; i < length; i += interval) {
source.replace(i, i + keyword.length(), keyword);
}
}
private int randomInt( final int min, final int max ) {
return current().nextInt( min, max );
private int randomInt(final int min, final int max) {
return current().nextInt(min, max);
}
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {