Merge pull request #47 from robert-bor/jdk1.7
Updated source base to leverage JDK 1.7 syntax. Added more final modi…
This commit is contained in:
commit
89e7efab72
3
pom.xml
3
pom.xml
@ -1,4 +1,5 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.ahocorasick</groupId>
|
||||
|
||||
@ -8,16 +8,16 @@ public class IntervalNode {
|
||||
|
||||
private enum Direction {LEFT, RIGHT}
|
||||
|
||||
private IntervalNode left = null;
|
||||
private IntervalNode right = null;
|
||||
private IntervalNode left;
|
||||
private IntervalNode right;
|
||||
private int point;
|
||||
private List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
private List<Intervalable> intervals = new ArrayList<>();
|
||||
|
||||
public IntervalNode(List<Intervalable> intervals) {
|
||||
public IntervalNode(final List<Intervalable> intervals) {
|
||||
this.point = determineMedian(intervals);
|
||||
|
||||
List<Intervalable> toLeft = new ArrayList<Intervalable>();
|
||||
List<Intervalable> toRight = new ArrayList<Intervalable>();
|
||||
final List<Intervalable> toLeft = new ArrayList<>();
|
||||
final List<Intervalable> toRight = new ArrayList<>();
|
||||
|
||||
for (Intervalable interval : intervals) {
|
||||
if (interval.getEnd() < this.point) {
|
||||
@ -37,7 +37,7 @@ public class IntervalNode {
|
||||
}
|
||||
}
|
||||
|
||||
public int determineMedian(List<Intervalable> intervals) {
|
||||
public int determineMedian(final List<Intervalable> intervals) {
|
||||
int start = -1;
|
||||
int end = -1;
|
||||
for (Intervalable interval : intervals) {
|
||||
@ -53,17 +53,19 @@ public class IntervalNode {
|
||||
return (start + end) / 2;
|
||||
}
|
||||
|
||||
public List<Intervalable> findOverlaps(Intervalable interval) {
|
||||
public List<Intervalable> findOverlaps(final Intervalable interval) {
|
||||
final List<Intervalable> overlaps = new ArrayList<>();
|
||||
|
||||
List<Intervalable> overlaps = new ArrayList<Intervalable>();
|
||||
|
||||
if (this.point < interval.getStart()) { // Tends to the right
|
||||
if (this.point < interval.getStart()) {
|
||||
// Tends to the right
|
||||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval));
|
||||
addToOverlaps(interval, overlaps, checkForOverlapsToTheRight(interval));
|
||||
} else if (this.point > interval.getEnd()) { // Tends to the left
|
||||
} else if (this.point > interval.getEnd()) {
|
||||
// Tends to the left
|
||||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval));
|
||||
addToOverlaps(interval, overlaps, checkForOverlapsToTheLeft(interval));
|
||||
} else { // Somewhere in the middle
|
||||
} else {
|
||||
// Somewhere in the middle
|
||||
addToOverlaps(interval, overlaps, this.intervals);
|
||||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.left, interval));
|
||||
addToOverlaps(interval, overlaps, findOverlappingRanges(this.right, interval));
|
||||
@ -72,26 +74,30 @@ public class IntervalNode {
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
protected void addToOverlaps(Intervalable interval, List<Intervalable> overlaps, List<Intervalable> newOverlaps) {
|
||||
for (Intervalable currentInterval : newOverlaps) {
|
||||
protected void addToOverlaps(
|
||||
final Intervalable interval,
|
||||
final List<Intervalable> overlaps,
|
||||
final List<Intervalable> newOverlaps) {
|
||||
for (final Intervalable currentInterval : newOverlaps) {
|
||||
if (!currentInterval.equals(interval)) {
|
||||
overlaps.add(currentInterval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected List<Intervalable> checkForOverlapsToTheLeft(Intervalable interval) {
|
||||
protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) {
|
||||
return checkForOverlaps(interval, Direction.LEFT);
|
||||
}
|
||||
|
||||
protected List<Intervalable> checkForOverlapsToTheRight(Intervalable interval) {
|
||||
protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) {
|
||||
return checkForOverlaps(interval, Direction.RIGHT);
|
||||
}
|
||||
|
||||
protected List<Intervalable> checkForOverlaps(Intervalable interval, Direction direction) {
|
||||
protected List<Intervalable> checkForOverlaps(
|
||||
final Intervalable interval, final Direction direction) {
|
||||
final List<Intervalable> overlaps = new ArrayList<>();
|
||||
|
||||
List<Intervalable> overlaps = new ArrayList<Intervalable>();
|
||||
for (Intervalable currentInterval : this.intervals) {
|
||||
for (final Intervalable currentInterval : this.intervals) {
|
||||
switch (direction) {
|
||||
case LEFT:
|
||||
if (currentInterval.getStart() <= interval.getEnd()) {
|
||||
@ -105,15 +111,13 @@ public class IntervalNode {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
|
||||
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
|
||||
if (node != null) {
|
||||
return node.findOverlaps(interval);
|
||||
}
|
||||
return Collections.emptyList();
|
||||
return node == null
|
||||
? Collections.<Intervalable>emptyList()
|
||||
: node.findOverlaps(interval);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,26 +1,27 @@
|
||||
package org.ahocorasick.interval;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
|
||||
public class IntervalTree {
|
||||
|
||||
private IntervalNode rootNode = null;
|
||||
private final IntervalNode rootNode;
|
||||
|
||||
public IntervalTree(List<Intervalable> intervals) {
|
||||
this.rootNode = new IntervalNode(intervals);
|
||||
}
|
||||
|
||||
public List<Intervalable> removeOverlaps(List<Intervalable> intervals) {
|
||||
public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) {
|
||||
|
||||
// Sort the intervals on size, then left-most position
|
||||
Collections.sort(intervals, new IntervalableComparatorBySize());
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
|
||||
Set<Intervalable> removeIntervals = new TreeSet<Intervalable>();
|
||||
final Set<Intervalable> removeIntervals = new TreeSet<>();
|
||||
|
||||
for (Intervalable interval : intervals) {
|
||||
for (final Intervalable interval : intervals) {
|
||||
// If the interval was already removed, ignore it
|
||||
if (removeIntervals.contains(interval)) {
|
||||
continue;
|
||||
@ -31,17 +32,17 @@ public class IntervalTree {
|
||||
}
|
||||
|
||||
// Remove all intervals that were overlapping
|
||||
for (Intervalable removeInterval : removeIntervals) {
|
||||
for (final Intervalable removeInterval : removeIntervals) {
|
||||
intervals.remove(removeInterval);
|
||||
}
|
||||
|
||||
// Sort the intervals, now on left-most position only
|
||||
Collections.sort(intervals, new IntervalableComparatorByPosition());
|
||||
sort(intervals, new IntervalableComparatorByPosition());
|
||||
|
||||
return intervals;
|
||||
}
|
||||
|
||||
public List<Intervalable> findOverlaps(Intervalable interval) {
|
||||
public List<Intervalable> findOverlaps(final Intervalable interval) {
|
||||
return rootNode.findOverlaps(interval);
|
||||
}
|
||||
|
||||
|
||||
@ -7,5 +7,4 @@ public interface Intervalable extends Comparable {
|
||||
public int getEnd();
|
||||
|
||||
public int size();
|
||||
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ import java.util.Comparator;
|
||||
public class IntervalableComparatorByPosition implements Comparator<Intervalable> {
|
||||
|
||||
@Override
|
||||
public int compare(Intervalable intervalable, Intervalable intervalable2) {
|
||||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
|
||||
return intervalable.getStart() - intervalable2.getStart();
|
||||
}
|
||||
|
||||
|
||||
@ -5,11 +5,13 @@ import java.util.Comparator;
|
||||
public class IntervalableComparatorBySize implements Comparator<Intervalable> {
|
||||
|
||||
@Override
|
||||
public int compare(Intervalable intervalable, Intervalable intervalable2) {
|
||||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
|
||||
int comparison = intervalable2.size() - intervalable.size();
|
||||
|
||||
if (comparison == 0) {
|
||||
comparison = intervalable.getStart() - intervalable2.getStart();
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
|
||||
@ -20,5 +20,4 @@ public class Emit extends Interval implements Intervalable {
|
||||
public String toString() {
|
||||
return super.toString() + "=" + this.keyword;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,9 +2,9 @@ package org.ahocorasick.trie;
|
||||
|
||||
public class MatchToken extends Token {
|
||||
|
||||
private Emit emit;
|
||||
private final Emit emit;
|
||||
|
||||
public MatchToken(String fragment, Emit emit) {
|
||||
public MatchToken(final String fragment, final Emit emit) {
|
||||
super(fragment);
|
||||
this.emit = emit;
|
||||
}
|
||||
@ -18,5 +18,4 @@ public class MatchToken extends Token {
|
||||
public Emit getEmit() {
|
||||
return this.emit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -74,11 +74,11 @@ public class State {
|
||||
return nextState(character, false);
|
||||
}
|
||||
|
||||
public State nextStateIgnoreRootState(Character character) {
|
||||
public State nextStateIgnoreRootState(final Character character) {
|
||||
return nextState(character, true);
|
||||
}
|
||||
|
||||
public State addState(String keyword) {
|
||||
public State addState(final String keyword) {
|
||||
State state = this;
|
||||
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
@ -88,7 +88,7 @@ public class State {
|
||||
return state;
|
||||
}
|
||||
|
||||
public State addState(Character character) {
|
||||
public State addState(final Character character) {
|
||||
State nextState = nextStateIgnoreRootState(character);
|
||||
if (nextState == null) {
|
||||
nextState = new State(this.depth + 1);
|
||||
@ -101,14 +101,14 @@ public class State {
|
||||
return this.depth;
|
||||
}
|
||||
|
||||
public void addEmit(String keyword) {
|
||||
public void addEmit(final String keyword) {
|
||||
if (this.emits == null) {
|
||||
this.emits = new TreeSet<>();
|
||||
}
|
||||
this.emits.add(keyword);
|
||||
}
|
||||
|
||||
public void addEmit(Collection<String> emits) {
|
||||
public void addEmit(final Collection<String> emits) {
|
||||
for (String emit : emits) {
|
||||
addEmit(emit);
|
||||
}
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import static java.lang.Character.isWhitespace;
|
||||
import org.ahocorasick.interval.IntervalTree;
|
||||
import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
@ -8,10 +11,9 @@ import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
|
||||
import org.ahocorasick.interval.IntervalTree;
|
||||
import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import static java.lang.Character.*;
|
||||
|
||||
import java.lang.Character;
|
||||
|
||||
/**
|
||||
* Based on the Aho-Corasick white paper, Bell technologies:
|
||||
@ -37,36 +39,12 @@ public class Trie {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
private void addKeyword(String keyword) {
|
||||
if (keyword.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
if (keyword.length() > 0) {
|
||||
if (isCaseInsensitive()) {
|
||||
keyword = keyword.toLowerCase();
|
||||
}
|
||||
|
||||
if (isCaseInsensitive()) {
|
||||
keyword = keyword.toLowerCase();
|
||||
}
|
||||
|
||||
addState(keyword).addEmit(keyword);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delegates to addKeyword.
|
||||
*
|
||||
* @param keywords List of search term to add to the list of search terms.
|
||||
*/
|
||||
private void addKeywords(final String[] keywords) {
|
||||
for (final String keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delegates to addKeyword.
|
||||
*
|
||||
* @param keywords List of search term to add to the list of search terms.
|
||||
*/
|
||||
private void addKeywords(final Collection<String> keywords) {
|
||||
for (final String keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
addState(keyword).addEmit(keyword);
|
||||
}
|
||||
}
|
||||
|
||||
@ -95,11 +73,14 @@ public class Trie {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) {
|
||||
private Token createFragment(
|
||||
final Emit emit,
|
||||
final String text,
|
||||
final int lastCollectedPosition) {
|
||||
return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
|
||||
}
|
||||
|
||||
private Token createMatch(Emit emit, String text) {
|
||||
private Token createMatch(final Emit emit, final String text) {
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
|
||||
}
|
||||
|
||||
@ -138,7 +119,7 @@ public class Trie {
|
||||
|
||||
// TODO: Maybe lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
character = toLowerCase(character);
|
||||
}
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
@ -164,14 +145,14 @@ public class Trie {
|
||||
|
||||
// TODO: Lowercase the entire string at once?
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
character = toLowerCase(character);
|
||||
}
|
||||
|
||||
currentState = getState(currentState, character);
|
||||
Collection<String> emitStrs = currentState.emit();
|
||||
|
||||
if (emitStrs != null && !emitStrs.isEmpty()) {
|
||||
for (String emitStr : emitStrs) {
|
||||
for (final String emitStr : emitStrs) {
|
||||
final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr);
|
||||
if (trieConfig.isOnlyWholeWords()) {
|
||||
if (!isPartialMatch(text, emit)) {
|
||||
@ -190,9 +171,9 @@ public class Trie {
|
||||
|
||||
private boolean isPartialMatch(final CharSequence searchText, final Emit emit) {
|
||||
return (emit.getStart() != 0 &&
|
||||
Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
|
||||
(emit.getEnd() + 1 != searchText.length() &&
|
||||
Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
}
|
||||
|
||||
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
@ -226,15 +207,16 @@ public class Trie {
|
||||
}
|
||||
}
|
||||
|
||||
private State getState(State currentState, final Character character) {
|
||||
State newCurrentState = currentState.nextState(character);
|
||||
private State getState(final State initialState, final Character character) {
|
||||
State currentState = initialState;
|
||||
State updatedState = currentState.nextState(character);
|
||||
|
||||
while (newCurrentState == null) {
|
||||
while (updatedState == null) {
|
||||
currentState = currentState.failure();
|
||||
newCurrentState = currentState.nextState(character);
|
||||
updatedState = currentState.nextState(character);
|
||||
}
|
||||
|
||||
return newCurrentState;
|
||||
return updatedState;
|
||||
}
|
||||
|
||||
private void constructFailureStates() {
|
||||
@ -267,7 +249,10 @@ public class Trie {
|
||||
}
|
||||
}
|
||||
|
||||
private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) {
|
||||
private boolean storeEmits(
|
||||
final int position,
|
||||
final State currentState,
|
||||
final EmitHandler emitHandler) {
|
||||
boolean emitted = false;
|
||||
final Collection<String> emits = currentState.emit();
|
||||
|
||||
@ -291,7 +276,8 @@ public class Trie {
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a fluent interface for constructing Trie instances.
|
||||
* Constructs a TrieBuilder instance for configuring the Trie using a fluent
|
||||
* interface.
|
||||
*
|
||||
* @return The builder used to configure its Trie.
|
||||
*/
|
||||
@ -299,6 +285,9 @@ public class Trie {
|
||||
return new TrieBuilder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a fluent interface for constructing Trie instances.
|
||||
*/
|
||||
public static class TrieBuilder {
|
||||
|
||||
private final TrieConfig trieConfig = new TrieConfig();
|
||||
@ -318,8 +307,8 @@ public class Trie {
|
||||
* @return This builder.
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
public TrieBuilder addKeyword(final String keyword) {
|
||||
this.trie.addKeyword(keyword);
|
||||
public TrieBuilder addKeyword(final CharSequence keyword) {
|
||||
getTrie().addKeyword(keyword.toString());
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -329,8 +318,11 @@ public class Trie {
|
||||
* @param keywords The keywords to add to the list.
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final String... keywords) {
|
||||
this.trie.addKeywords(keywords);
|
||||
public TrieBuilder addKeywords(final CharSequence... keywords) {
|
||||
for (final CharSequence keyword : keywords) {
|
||||
addKeyword(keyword);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -340,19 +332,18 @@ public class Trie {
|
||||
* @param keywords The keywords to add to the list.
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final Collection<String> keywords) {
|
||||
this.trie.addKeywords(keywords);
|
||||
return this;
|
||||
public TrieBuilder addKeywords(final Collection<CharSequence> keywords) {
|
||||
return addKeywords(keywords.toArray(new CharSequence[keywords.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore case when searching for keywords in
|
||||
* the text.
|
||||
* Configure the Trie to ignore case when searching for keywords in the
|
||||
* text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreCase() {
|
||||
this.trieConfig.setCaseInsensitive(true);
|
||||
getTrieConfig().setCaseInsensitive(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -362,7 +353,7 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreOverlaps() {
|
||||
this.trieConfig.setAllowOverlaps(false);
|
||||
getTrieConfig().setAllowOverlaps(false);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -372,7 +363,7 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWords() {
|
||||
this.trieConfig.setOnlyWholeWords(true);
|
||||
getTrieConfig().setOnlyWholeWords(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -384,33 +375,47 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
|
||||
this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true);
|
||||
getTrieConfig().setOnlyWholeWordsWhiteSpaceSeparated(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie to stop after the first keyword is found in the
|
||||
* text.
|
||||
* Configure the Trie to stop searching for matches after the first
|
||||
* keyword is found in the text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder stopOnHit() {
|
||||
trie.trieConfig.setStopOnHit(true);
|
||||
public TrieBuilder onlyFirstMatch() {
|
||||
getTrieConfig().setStopOnHit(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the Trie based on the builder settings.
|
||||
* Construct the Trie using the builder settings.
|
||||
*
|
||||
* @return The configured Trie.
|
||||
*/
|
||||
public Trie build() {
|
||||
this.trie.constructFailureStates();
|
||||
getTrie().constructFailureStates();
|
||||
return getTrie();
|
||||
}
|
||||
|
||||
private Trie getTrie() {
|
||||
return this.trie;
|
||||
}
|
||||
|
||||
private TrieConfig getTrieConfig() {
|
||||
return this.trieConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use onlyFirstMatch()
|
||||
*/
|
||||
public TrieBuilder stopOnHit() {
|
||||
return onlyFirstMatch();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreCase()
|
||||
*/
|
||||
public TrieBuilder caseInsensitive() {
|
||||
@ -418,7 +423,6 @@ public class Trie {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreOverlaps()
|
||||
*/
|
||||
public TrieBuilder removeOverlaps() {
|
||||
|
||||
@ -7,15 +7,14 @@ import java.util.List;
|
||||
|
||||
public class DefaultEmitHandler implements EmitHandler {
|
||||
|
||||
private List<Emit> emits = new ArrayList<>();
|
||||
private final List<Emit> emits = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public void emit(Emit emit) {
|
||||
public void emit(final Emit emit) {
|
||||
this.emits.add(emit);
|
||||
}
|
||||
|
||||
public List<Emit> getEmits() {
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,11 +2,11 @@ package org.ahocorasick.interval;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
import static junit.framework.Assert.assertFalse;
|
||||
import static junit.framework.Assert.assertTrue;
|
||||
import static junit.framework.Assert.*;
|
||||
|
||||
public class IntervalTest {
|
||||
|
||||
@ -44,7 +44,7 @@ public class IntervalTest {
|
||||
|
||||
@Test
|
||||
public void comparable() {
|
||||
Set<Interval> intervals = new TreeSet<Interval>();
|
||||
Set<Interval> intervals = new TreeSet<>();
|
||||
intervals.add(new Interval(4, 6));
|
||||
intervals.add(new Interval(2, 7));
|
||||
intervals.add(new Interval(3, 4));
|
||||
|
||||
@ -12,7 +12,7 @@ public class IntervalTreeTest {
|
||||
|
||||
@Test
|
||||
public void findOverlaps() {
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(0, 2));
|
||||
intervals.add(new Interval(1, 3));
|
||||
intervals.add(new Interval(2, 4));
|
||||
@ -30,7 +30,7 @@ public class IntervalTreeTest {
|
||||
|
||||
@Test
|
||||
public void removeOverlaps() {
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(0, 2));
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(2, 10));
|
||||
|
||||
@ -3,20 +3,20 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorByPositionTest {
|
||||
|
||||
@Test
|
||||
public void sortOnPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
Collections.sort(intervals, new IntervalableComparatorByPosition());
|
||||
sort(intervals, new IntervalableComparatorByPosition());
|
||||
assertEquals(4, intervals.get(0).size());
|
||||
assertEquals(6, intervals.get(1).size());
|
||||
assertEquals(2, intervals.get(2).size());
|
||||
|
||||
@ -3,20 +3,20 @@ package org.ahocorasick.interval;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static java.util.Collections.sort;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
public class IntervalableComparatorBySizeTest {
|
||||
|
||||
@Test
|
||||
public void sortOnSize() {
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
intervals.add(new Interval(3, 8));
|
||||
Collections.sort(intervals, new IntervalableComparatorBySize());
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(6, intervals.get(0).size());
|
||||
assertEquals(4, intervals.get(1).size());
|
||||
assertEquals(2, intervals.get(2).size());
|
||||
@ -24,10 +24,10 @@ public class IntervalableComparatorBySizeTest {
|
||||
|
||||
@Test
|
||||
public void sortOnSizeThenPosition() {
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(4, 7));
|
||||
intervals.add(new Interval(2, 5));
|
||||
Collections.sort(intervals, new IntervalableComparatorBySize());
|
||||
sort(intervals, new IntervalableComparatorBySize());
|
||||
assertEquals(2, intervals.get(0).getStart());
|
||||
assertEquals(4, intervals.get(1).getStart());
|
||||
}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import org.ahocorasick.trie.State;
|
||||
import org.junit.Test;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
@ -1,19 +1,18 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import static java.util.concurrent.ThreadLocalRandom.current;
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
|
||||
import static org.ahocorasick.trie.Trie.builder;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TrieTest {
|
||||
private final static String[] ALPHABET = new String[]{
|
||||
"abc", "bcd", "cde"
|
||||
@ -37,7 +36,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSame() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(ALPHABET[0]);
|
||||
@ -47,7 +46,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSameFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch(ALPHABET[0]);
|
||||
@ -56,7 +55,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeyword() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(" " + ALPHABET[0]);
|
||||
@ -66,7 +65,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeywordFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword(ALPHABET[0])
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]);
|
||||
@ -75,7 +74,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void variousKeywordsOneMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(ALPHABET)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("bcd");
|
||||
@ -85,7 +84,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void variousKeywordsFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(ALPHABET)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("bcd");
|
||||
@ -94,7 +93,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestAndStopOnHit() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.stopOnHit()
|
||||
.build();
|
||||
@ -107,7 +106,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTest() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("ushers");
|
||||
@ -120,7 +119,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestWithCapitalKeywords() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.ignoreCase()
|
||||
.addKeyword("HERS")
|
||||
.addKeyword("HIS")
|
||||
@ -137,7 +136,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("ushers");
|
||||
@ -146,7 +145,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void ushersTestByCallback() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.build();
|
||||
|
||||
@ -168,7 +167,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void misleadingTest() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword("hers")
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("h he her hers");
|
||||
@ -178,7 +177,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void misleadingTestFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword("hers")
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("h he her hers");
|
||||
@ -187,7 +186,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void recipes() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(FOOD)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
@ -200,7 +199,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void recipesFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(FOOD)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
@ -210,7 +209,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void longAndShortOverlappingMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeyword("he")
|
||||
.addKeyword("hehehehe")
|
||||
.build();
|
||||
@ -227,7 +226,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlapping() {
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
Trie trie = builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -242,7 +241,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void nonOverlappingFirstMatch() {
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
Trie trie = builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -254,7 +253,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void containsMatch() {
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
Trie trie = builder().removeOverlaps()
|
||||
.addKeyword("ab")
|
||||
.addKeyword("cba")
|
||||
.addKeyword("ababc")
|
||||
@ -264,7 +263,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void startOfChurchillSpeech() {
|
||||
Trie trie = Trie.builder().removeOverlaps()
|
||||
Trie trie = builder().removeOverlaps()
|
||||
.addKeyword("T")
|
||||
.addKeyword("u")
|
||||
.addKeyword("ur")
|
||||
@ -282,7 +281,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void partialMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("sugar")
|
||||
.build();
|
||||
@ -293,7 +292,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void partialMatchFirstMatch() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("sugar")
|
||||
.build();
|
||||
@ -304,7 +303,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void tokenizeFullSentence() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(GREEK_LETTERS)
|
||||
.build();
|
||||
Collection<Token> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
|
||||
@ -322,7 +321,7 @@ public class TrieTest {
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/5
|
||||
@Test
|
||||
public void testStringIndexOutOfBoundsException() {
|
||||
Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
Trie trie = builder().ignoreCase().onlyWholeWords()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -336,7 +335,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testIgnoreCase() {
|
||||
Trie trie = Trie.builder().ignoreCase()
|
||||
Trie trie = builder().ignoreCase()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -350,7 +349,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testIgnoreCaseFirstMatch() {
|
||||
Trie trie = Trie.builder().ignoreCase()
|
||||
Trie trie = builder().ignoreCase()
|
||||
.addKeywords(UNICODE)
|
||||
.build();
|
||||
Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
|
||||
@ -360,7 +359,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void tokenizeTokensInSequence() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.addKeywords(GREEK_LETTERS)
|
||||
.build();
|
||||
Collection<Token> tokens = trie.tokenize("Alpha Beta Gamma");
|
||||
@ -370,7 +369,7 @@ public class TrieTest {
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/7
|
||||
@Test
|
||||
public void testZeroLength() {
|
||||
Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase()
|
||||
Trie trie = builder().ignoreOverlaps().onlyWholeWords().ignoreCase()
|
||||
.addKeyword("")
|
||||
.build();
|
||||
trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel.");
|
||||
@ -381,7 +380,7 @@ public class TrieTest {
|
||||
public void testUnicode1() {
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
|
||||
Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
Trie trie = builder().ignoreCase().onlyWholeWords()
|
||||
.addKeyword("this")
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText(target);
|
||||
@ -394,7 +393,7 @@ public class TrieTest {
|
||||
@Test
|
||||
public void testUnicode2() {
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeyword("this")
|
||||
@ -406,7 +405,7 @@ public class TrieTest {
|
||||
|
||||
@Test
|
||||
public void testPartialMatchWhiteSpaces() {
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.onlyWholeWordsWhiteSpaceSeparated()
|
||||
.addKeyword("#sugar-123")
|
||||
.build();
|
||||
@ -424,7 +423,7 @@ public class TrieTest {
|
||||
|
||||
injectKeyword(text, keyword, interval);
|
||||
|
||||
Trie trie = Trie.builder()
|
||||
Trie trie = builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword(keyword)
|
||||
.build();
|
||||
@ -440,10 +439,10 @@ public class TrieTest {
|
||||
* @param count The number of numbers to generate.
|
||||
* @return A character sequence filled with random digits.
|
||||
*/
|
||||
private StringBuilder randomNumbers(int count) {
|
||||
private StringBuilder randomNumbers(final int count) {
|
||||
final StringBuilder sb = new StringBuilder(count);
|
||||
|
||||
while (--count > 0) {
|
||||
for (int i = count - 1; i >= 0; i--) {
|
||||
sb.append(randomInt(0, 10));
|
||||
}
|
||||
|
||||
@ -469,7 +468,7 @@ public class TrieTest {
|
||||
}
|
||||
|
||||
private int randomInt(final int min, final int max) {
|
||||
return ThreadLocalRandom.current().nextInt(min, max);
|
||||
return current().nextInt(min, max);
|
||||
}
|
||||
|
||||
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user