4 spaces for code

Badges for Travis, Codacy, Codecov, Maven and Javadoc
Added Travis CI build instructions
This commit is contained in:
robert-bor 2016-11-29 19:54:23 +01:00
parent 2f1ec8d041
commit 8ae9636201
15 changed files with 266 additions and 196 deletions

6
.travis.yml Normal file
View File

@ -0,0 +1,6 @@
language: java
install: mvn install -DskipTests=true -Dgpg.skip=true
jdk:
- oraclejdk8
after_success:
- bash <(curl -s https://codecov.io/bash)

View File

@ -1,6 +1,12 @@
Aho-Corasick
============
[![Build Status](https://travis-ci.org/robert-bor/aho-corasick.svg?branch=master)](https://travis-ci.org/robert-bor/aho-corasick)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/0f65bfb641f745a4b301b85d028a4a8d)](https://www.codacy.com/app/bor-robert/aho-corasick)
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.ahocorasick/ahocorasick/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.ahocorasick/ahocorasick)
[![Javadoc](https://javadoc-emblem.rhcloud.com/doc/org.ahocorasick/ahocorasick/badge.svg)](http://www.javadoc.io/doc/org.ahocorasick/ahocorasick)
[![Apache 2](http://img.shields.io/badge/license-Apache%202-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0)
Dependency
----------
Include this dependency in your POM. Be sure to check for the latest version in Maven Central.

93
pom.xml
View File

@ -10,11 +10,16 @@
<inceptionYear>2014</inceptionYear>
<url>http://ahocorasick.org</url>
<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>7</version>
</parent>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
<organization>
<name>42 BV</name>
@ -39,9 +44,15 @@
<name>Robert Bor</name>
<organization>42</organization>
</developer>
<developer>
<name></name>
</developer>
</developers>
<properties>
<java.version>1.7</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>4.10</junit.version>
<!-- Reporting -->
<maven.cobertura.version>2.5.2</maven.cobertura.version>
@ -63,15 +74,19 @@
</dependencies>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.7</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin>
<plugin>
@ -79,30 +94,56 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>3.6.0</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>cobertura-maven-plugin</artifactId>
<version>${maven.cobertura.version}</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>${maven.javadoc.version}</version>
<version>2.9.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</reporting>
</build>
</project>

View File

@ -9,7 +9,7 @@ public class Interval implements Intervalable {
* Constructs an interval with a start and end position.
*
* @param start The interval's starting text position.
* @param end The interval's ending text position.
* @param end The interval's ending text position.
*/
public Interval(final int start, final int end) {
this.start = start;
@ -51,7 +51,7 @@ public class Interval implements Intervalable {
*/
public boolean overlapsWith(final Interval other) {
return this.start <= other.getEnd() &&
this.end >= other.getStart();
this.end >= other.getStart();
}
public boolean overlapsWith(int point) {
@ -63,9 +63,9 @@ public class Interval implements Intervalable {
if (!(o instanceof Intervalable)) {
return false;
}
Intervalable other = (Intervalable)o;
Intervalable other = (Intervalable) o;
return this.start == other.getStart() &&
this.end == other.getEnd();
this.end == other.getEnd();
}
@Override
@ -78,7 +78,7 @@ public class Interval implements Intervalable {
if (!(o instanceof Intervalable)) {
return -1;
}
Intervalable other = (Intervalable)o;
Intervalable other = (Intervalable) o;
int comparison = this.start - other.getStart();
return comparison != 0 ? comparison : this.end - other.getEnd();
}

View File

@ -6,7 +6,7 @@ import java.util.List;
public class IntervalNode {
private enum Direction { LEFT, RIGHT }
private enum Direction {LEFT, RIGHT}
private IntervalNode left = null;
private IntervalNode right = null;
@ -93,12 +93,12 @@ public class IntervalNode {
List<Intervalable> overlaps = new ArrayList<Intervalable>();
for (Intervalable currentInterval : this.intervals) {
switch (direction) {
case LEFT :
case LEFT:
if (currentInterval.getStart() <= interval.getEnd()) {
overlaps.add(currentInterval);
}
break;
case RIGHT :
case RIGHT:
if (currentInterval.getEnd() >= interval.getStart()) {
overlaps.add(currentInterval);
}

View File

@ -3,7 +3,9 @@ package org.ahocorasick.interval;
public interface Intervalable extends Comparable {
public int getStart();
public int getEnd();
public int size();
}

View File

@ -4,43 +4,51 @@ import java.util.*;
/**
* <p>
* A state has various important tasks it must attend to:
* A state has various important tasks it must attend to:
* </p>
*
* <ul>
* <li>success; when a character points to another state, it must return that state</li>
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
* state with less depth</li>
* <li>emits; when this state is passed and keywords have been matched, the matches must be
* 'emitted' so that they can be used later on.</li>
* </ul>
*
* <p>
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
* it will still parse the next character and start from the root node. This ensures that the algorithm
* always runs. All other states always have a fail state.
* <ul>
* <li>success; when a character points to another state, it must return that state</li>
* <li>failure; when a character has no matching state, the algorithm must be able to fall back on a
* state with less depth</li>
* <li>emits; when this state is passed and keywords have been matched, the matches must be
* 'emitted' so that they can be used later on.</li>
* </ul>
* <p>
* <p>
* The root state is special in the sense that it has no failure state; it cannot fail. If it 'fails'
* it will still parse the next character and start from the root node. This ensures that the algorithm
* always runs. All other states always have a fail state.
* </p>
*
* @author Robert Bor
*/
public class State {
/** effective the size of the keyword */
/**
* effective the size of the keyword
*/
private final int depth;
/** only used for the root state to refer to itself in case no matches have been found */
/**
* only used for the root state to refer to itself in case no matches have been found
*/
private final State rootState;
/**
* referred to in the white paper as the 'goto' structure. From a state it is possible to go
* to other states, depending on the character passed.
*/
private final Map<Character,State> success = new HashMap<>();
private final Map<Character, State> success = new HashMap<>();
/** if no matching states are found, the failure state will be returned */
/**
* if no matching states are found, the failure state will be returned
*/
private State failure;
/** whenever this state is reached, it will emit the matches keywords for future reference */
/**
* whenever this state is reached, it will emit the matches keywords for future reference
*/
private Set<String> emits;
public State() {
@ -54,11 +62,11 @@ public class State {
private State nextState(final Character character, final boolean ignoreRootState) {
State nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) {
nextState = this.rootState;
}
return nextState;
}
@ -69,21 +77,21 @@ public class State {
public State nextStateIgnoreRootState(Character character) {
return nextState(character, true);
}
public State addState( String keyword ) {
State state = this;
for (final Character character : keyword.toCharArray()) {
state = state.addState(character);
}
return state;
public State addState(String keyword) {
State state = this;
for (final Character character : keyword.toCharArray()) {
state = state.addState(character);
}
return state;
}
public State addState(Character character) {
State nextState = nextStateIgnoreRootState(character);
if (nextState == null) {
nextState = new State(this.depth+1);
nextState = new State(this.depth + 1);
this.success.put(character, nextState);
}
return nextState;
@ -107,7 +115,7 @@ public class State {
}
public Collection<String> emit() {
return this.emits == null ? Collections.<String> emptyList() : this.emits;
return this.emits == null ? Collections.<String>emptyList() : this.emits;
}
public State failure() {

View File

@ -1,11 +1,13 @@
package org.ahocorasick.trie;
import static java.lang.Character.isWhitespace;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.LinkedBlockingDeque;
import org.ahocorasick.interval.IntervalTree;
import org.ahocorasick.interval.Intervalable;
import org.ahocorasick.trie.handler.DefaultEmitHandler;
@ -14,7 +16,7 @@ import org.ahocorasick.trie.handler.EmitHandler;
/**
* Based on the Aho-Corasick white paper, Bell technologies:
* http://cr.yp.to/bib/1975/aho.pdf
*
*
* @author Robert Bor
*/
public class Trie {
@ -27,21 +29,20 @@ public class Trie {
this.trieConfig = trieConfig;
this.rootState = new State();
}
/**
* Used by the builder to add a text search keyword.
*
*
* @param keyword The search term to add to the list of search terms.
*
* @throws NullPointerException if the keyword is null.
*/
private void addKeyword(String keyword) {
if( keyword.isEmpty() ) {
return;
if (keyword.isEmpty()) {
return;
}
if( isCaseInsensitive() ) {
keyword = keyword.toLowerCase();
if (isCaseInsensitive()) {
keyword = keyword.toLowerCase();
}
addState(keyword).addEmit(keyword);
@ -49,44 +50,44 @@ public class Trie {
/**
* Delegates to addKeyword.
*
*
* @param keywords List of search term to add to the list of search terms.
*/
private void addKeywords( final String[] keywords ) {
for( final String keyword : keywords ) {
addKeyword( keyword );
}
private void addKeywords(final String[] keywords) {
for (final String keyword : keywords) {
addKeyword(keyword);
}
}
/**
* Delegates to addKeyword.
*
*
* @param keywords List of search term to add to the list of search terms.
*/
private void addKeywords( final Collection<String> keywords ) {
for( final String keyword : keywords ) {
addKeyword( keyword );
}
private void addKeywords(final Collection<String> keywords) {
for (final String keyword : keywords) {
addKeyword(keyword);
}
}
private State addState(final String keyword) {
return getRootState().addState(keyword);
}
public Collection<Token> tokenize(final String text) {
final Collection<Token> tokens = new ArrayList<>();
final Collection<Emit> collectedEmits = parseText(text);
int lastCollectedPosition = -1;
for (final Emit emit : collectedEmits) {
if (emit.getStart() - lastCollectedPosition > 1) {
tokens.add(createFragment(emit, text, lastCollectedPosition));
}
tokens.add(createMatch(emit, text));
lastCollectedPosition = emit.getEnd();
}
if (text.length() - lastCollectedPosition > 1) {
tokens.add(createFragment(null, text, lastCollectedPosition));
}
@ -95,11 +96,11 @@ public class Trie {
}
private Token createFragment(final Emit emit, final String text, final int lastCollectedPosition) {
return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart()));
return new FragmentToken(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
}
private Token createMatch(Emit emit, String text) {
return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit);
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
}
@SuppressWarnings("unchecked")
@ -118,7 +119,7 @@ public class Trie {
}
if (!trieConfig.isAllowOverlaps()) {
IntervalTree intervalTree = new IntervalTree((List<Intervalable>)(List<?>)collectedEmits);
IntervalTree intervalTree = new IntervalTree((List<Intervalable>) (List<?>) collectedEmits);
intervalTree.removeOverlaps((List<Intervalable>) (List<?>) collectedEmits);
}
@ -131,15 +132,15 @@ public class Trie {
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
State currentState = getRootState();
for (int position = 0; position < text.length(); position++) {
Character character = text.charAt(position);
// TODO: Maybe lowercase the entire string at once?
if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character);
}
currentState = getState(currentState, character);
if (storeEmits(position, currentState, emitHandler) && trieConfig.isStopOnHit()) {
return;
@ -157,18 +158,18 @@ public class Trie {
} else {
// Fast path. Returns first match found.
State currentState = getRootState();
for (int position = 0; position < text.length(); position++) {
Character character = text.charAt(position);
// TODO: Lowercase the entire string at once?
if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character);
}
currentState = getState(currentState, character);
Collection<String> emitStrs = currentState.emit();
if (emitStrs != null && !emitStrs.isEmpty()) {
for (String emitStr : emitStrs) {
final Emit emit = new Emit(position - emitStr.length() + 1, position, emitStr);
@ -183,26 +184,26 @@ public class Trie {
}
}
}
return null;
}
private boolean isPartialMatch(final CharSequence searchText, final Emit emit) {
return (emit.getStart() != 0 &&
Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
(emit.getEnd() + 1 != searchText.length() &&
Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) ||
(emit.getEnd() + 1 != searchText.length() &&
Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
}
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
final List<Emit> removeEmits = new ArrayList<>();
for (final Emit emit : collectedEmits) {
if (isPartialMatch(searchText, emit)) {
removeEmits.add(emit);
}
}
for (final Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
@ -211,15 +212,15 @@ public class Trie {
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {
final long size = searchText.length();
final List<Emit> removeEmits = new ArrayList<>();
for (final Emit emit : collectedEmits) {
if ((emit.getStart() == 0 || isWhitespace(searchText.charAt(emit.getStart() - 1))) &&
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
(emit.getEnd() + 1 == size || isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
continue;
}
removeEmits.add(emit);
}
for (final Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
@ -227,12 +228,12 @@ public class Trie {
private State getState(State currentState, final Character character) {
State newCurrentState = currentState.nextState(character);
while (newCurrentState == null) {
currentState = currentState.failure();
newCurrentState = currentState.nextState(character);
}
return newCurrentState;
}
@ -269,7 +270,7 @@ public class Trie {
private boolean storeEmits(final int position, final State currentState, final EmitHandler emitHandler) {
boolean emitted = false;
final Collection<String> emits = currentState.emit();
// TODO: The check for empty might be superfluous.
if (emits != null && !emits.isEmpty()) {
for (final String emit : emits) {
@ -277,21 +278,21 @@ public class Trie {
emitted = true;
}
}
return emitted;
}
private boolean isCaseInsensitive() {
return trieConfig.isCaseInsensitive();
return trieConfig.isCaseInsensitive();
}
private State getRootState() {
return this.rootState;
return this.rootState;
}
/**
* Provides a fluent interface for constructing Trie instances.
*
*
* @return The builder used to configure its Trie.
*/
public static TrieBuilder builder() {
@ -307,13 +308,13 @@ public class Trie {
/**
* Default (empty) constructor.
*/
private TrieBuilder() {}
private TrieBuilder() {
}
/**
* Adds a keyword to the Trie's list of text search keywords.
*
*
* @param keyword The keyword to add to the list.
*
* @return This builder.
* @throws NullPointerException if the keyword is null.
*/
@ -321,35 +322,33 @@ public class Trie {
this.trie.addKeyword(keyword);
return this;
}
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
*
* @param keywords The keywords to add to the list.
*
* @return This builder.
*/
public TrieBuilder addKeywords(final String... keywords) {
this.trie.addKeywords(keywords);
return this;
this.trie.addKeywords(keywords);
return this;
}
/**
* Adds a list of keywords to the Trie's list of text search keywords.
*
*
* @param keywords The keywords to add to the list.
*
* @return This builder.
*/
public TrieBuilder addKeywords(final Collection<String> keywords) {
this.trie.addKeywords(keywords);
return this;
this.trie.addKeywords(keywords);
return this;
}
/**
* Configure the Trie to ignore case when searching for keywords in
* the text.
*
*
* @return This builder.
*/
public TrieBuilder ignoreCase() {
@ -359,7 +358,7 @@ public class Trie {
/**
* Configure the Trie to ignore overlapping keywords.
*
*
* @return This builder.
*/
public TrieBuilder ignoreOverlaps() {
@ -369,7 +368,7 @@ public class Trie {
/**
* Configure the Trie to match whole keywords in the text.
*
*
* @return This builder.
*/
public TrieBuilder onlyWholeWords() {
@ -381,7 +380,7 @@ public class Trie {
* Configure the Trie to match whole keywords that are separated by
* whitespace in the text. For example, "this keyword thatkeyword"
* would only match the first occurrence of "keyword".
*
*
* @return This builder.
*/
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
@ -392,7 +391,7 @@ public class Trie {
/**
* Configure the Trie to stop after the first keyword is found in the
* text.
*
*
* @return This builder.
*/
public TrieBuilder stopOnHit() {
@ -402,27 +401,25 @@ public class Trie {
/**
* Configure the Trie based on the builder settings.
*
*
* @return The configured Trie.
*/
public Trie build() {
this.trie.constructFailureStates();
return this.trie;
}
/**
* @deprecated Use ignoreCase()
*
* @return This builder.
* @deprecated Use ignoreCase()
*/
public TrieBuilder caseInsensitive() {
return ignoreCase();
}
/**
* @deprecated Use ignoreOverlaps()
*
* @return This builder.
* @deprecated Use ignoreOverlaps()
*/
public TrieBuilder removeOverlaps() {
return ignoreOverlaps();

View File

@ -12,9 +12,13 @@ public class TrieConfig {
private boolean stopOnHit = false;
public boolean isStopOnHit() { return stopOnHit; }
public boolean isStopOnHit() {
return stopOnHit;
}
public void setStopOnHit(boolean stopOnHit) { this.stopOnHit = stopOnHit; }
public void setStopOnHit(boolean stopOnHit) {
this.stopOnHit = stopOnHit;
}
public boolean isAllowOverlaps() {
return allowOverlaps;
@ -32,7 +36,9 @@ public class TrieConfig {
this.onlyWholeWords = onlyWholeWords;
}
public boolean isOnlyWholeWordsWhiteSpaceSeparated() { return onlyWholeWordsWhiteSpaceSeparated; }
public boolean isOnlyWholeWordsWhiteSpaceSeparated() {
return onlyWholeWordsWhiteSpaceSeparated;
}
public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;

View File

@ -12,19 +12,19 @@ public class IntervalTest {
@Test
public void construct() {
Interval i = new Interval(1,3);
Interval i = new Interval(1, 3);
assertEquals(1, i.getStart());
assertEquals(3, i.getEnd());
}
@Test
public void size() {
assertEquals(3, new Interval(0,2).size());
assertEquals(3, new Interval(0, 2).size());
}
@Test
public void intervaloverlaps() {
assertTrue(new Interval(1,3).overlapsWith(new Interval(2,4)));
assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4)));
}
@Test
@ -34,7 +34,7 @@ public class IntervalTest {
@Test
public void pointOverlaps() {
assertTrue(new Interval(1,3).overlapsWith(2));
assertTrue(new Interval(1, 3).overlapsWith(2));
}
@Test

View File

@ -9,7 +9,7 @@ import java.util.List;
import static junit.framework.Assert.assertEquals;
public class IntervalTreeTest {
@Test
public void findOverlaps() {
List<Intervalable> intervals = new ArrayList<Intervalable>();
@ -20,7 +20,7 @@ public class IntervalTreeTest {
intervals.add(new Interval(4, 6));
intervals.add(new Interval(5, 7));
IntervalTree intervalTree = new IntervalTree(intervals);
List<Intervalable> overlaps = intervalTree.findOverlaps(new Interval(1,3));
List<Intervalable> overlaps = intervalTree.findOverlaps(new Interval(1, 3));
assertEquals(3, overlaps.size());
Iterator<Intervalable> overlapsIt = overlaps.iterator();
assertOverlap(overlapsIt.next(), 2, 4);
@ -47,5 +47,5 @@ public class IntervalTreeTest {
assertEquals(expectedStart, interval.getStart());
assertEquals(expectedEnd, interval.getEnd());
}
}

View File

@ -13,9 +13,9 @@ public class IntervalableComparatorByPositionTest {
@Test
public void sortOnPosition() {
List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4,5));
intervals.add(new Interval(1,4));
intervals.add(new Interval(3,8));
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
intervals.add(new Interval(3, 8));
Collections.sort(intervals, new IntervalableComparatorByPosition());
assertEquals(4, intervals.get(0).size());
assertEquals(6, intervals.get(1).size());

View File

@ -13,9 +13,9 @@ public class IntervalableComparatorBySizeTest {
@Test
public void sortOnSize() {
List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4,5));
intervals.add(new Interval(1,4));
intervals.add(new Interval(3,8));
intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4));
intervals.add(new Interval(3, 8));
Collections.sort(intervals, new IntervalableComparatorBySize());
assertEquals(6, intervals.get(0).size());
assertEquals(4, intervals.get(1).size());
@ -25,8 +25,8 @@ public class IntervalableComparatorBySizeTest {
@Test
public void sortOnSizeThenPosition() {
List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4,7));
intervals.add(new Interval(2,5));
intervals.add(new Interval(4, 7));
intervals.add(new Interval(2, 5));
Collections.sort(intervals, new IntervalableComparatorBySize());
assertEquals(2, intervals.get(0).getStart());
assertEquals(4, intervals.get(1).getStart());

View File

@ -11,9 +11,9 @@ public class StateTest {
public void constructSequenceOfCharacters() {
State rootState = new State();
rootState
.addState('a')
.addState('b')
.addState('c');
.addState('a')
.addState('b')
.addState('c');
State currentState = rootState.nextState('a');
assertEquals(1, currentState.getDepth());
currentState = currentState.nextState('b');

View File

@ -5,30 +5,34 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import static junit.framework.Assert.assertEquals;
import org.ahocorasick.trie.handler.EmitHandler;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
public class TrieTest {
private final static String[] ALPHABET = new String[]{
"abc", "bcd", "cde"
"abc", "bcd", "cde"
};
private final static String[] PRONOUNS = new String[]{
"hers", "his", "she", "he"
"hers", "his", "she", "he"
};
private final static String[] FOOD = new String[]{
"veal", "cauliflower", "broccoli", "tomatoes"
"veal", "cauliflower", "broccoli", "tomatoes"
};
private final static String[] GREEK_LETTERS = new String[]{
"Alpha", "Beta", "Gamma"
"Alpha", "Beta", "Gamma"
};
private final static String[] UNICODE = new String[]{
"turning", "once", "again", "börkü"
"turning", "once", "again", "börkü"
};
@Test
@ -406,7 +410,7 @@ public class TrieTest {
.onlyWholeWordsWhiteSpaceSeparated()
.addKeyword("#sugar-123")
.build();
Collection < Emit > emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
Collection<Emit> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123");
}
@ -415,57 +419,57 @@ public class TrieTest {
public void testLargeString() {
final int interval = 100;
final int textSize = 1000000;
final String keyword = FOOD[ 1 ];
final StringBuilder text = randomNumbers( textSize );
final String keyword = FOOD[1];
final StringBuilder text = randomNumbers(textSize);
injectKeyword( text, keyword, interval );
injectKeyword(text, keyword, interval);
Trie trie = Trie.builder()
.onlyWholeWords()
.addKeyword( keyword )
.build();
.onlyWholeWords()
.addKeyword(keyword)
.build();
final Collection<Emit> emits = trie.parseText( text );
final Collection<Emit> emits = trie.parseText(text);
assertEquals( textSize / interval, emits.size() );
assertEquals(textSize / interval, emits.size());
}
/**
* Generates a random sequence of ASCII numbers.
*
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
private StringBuilder randomNumbers( int count ) {
final StringBuilder sb = new StringBuilder( count );
private StringBuilder randomNumbers(int count) {
final StringBuilder sb = new StringBuilder(count);
while( --count > 0 ) {
sb.append( randomInt( 0, 10 ) );
while (--count > 0) {
sb.append(randomInt(0, 10));
}
return sb;
}
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
private void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval ) {
private void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval) {
final int length = source.length();
for( int i = 0; i < length; i += interval ) {
source.replace( i, i + keyword.length(), keyword );
for (int i = 0; i < length; i += interval) {
source.replace(i, i + keyword.length(), keyword);
}
}
private int randomInt( final int min, final int max ) {
return ThreadLocalRandom.current().nextInt( min, max );
private int randomInt(final int min, final int max) {
return ThreadLocalRandom.current().nextInt(min, max);
}
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {