migrate to gradle

This commit is contained in:
Kilian Schüttler 2024-10-29 11:42:02 +01:00
parent 0856732f88
commit 45494521d7
49 changed files with 1400 additions and 966 deletions

View File

@ -1,19 +1,21 @@
stages:
- test
- versioning
- deploy
variables:
GIT_SUBMODULE_STRATEGY: recursive
GIT_SUBMODULE_FORCE_HTTPS: 'true'
include: include:
- project: 'gitlab/gitlab' - project: 'gitlab/gitlab'
ref: 'main' ref: 'main'
file: 'ci-templates/maven_deps.yml' file: 'ci-templates/gradle_java.yml'
verify:
stage: test deploy:
stage: deploy
tags: tags:
- dind - dind
script: script:
- echo "Erfolgreich getestet" - echo "Building with gradle version ${BUILDVERSION}"
- gradle -Pversion=${BUILDVERSION} publish
- echo "BUILDVERSION=$BUILDVERSION" >> version.env
artifacts:
reports:
dotenv: version.env
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
- if: $CI_COMMIT_BRANCH =~ /^release/
- if: $CI_COMMIT_TAG

View File

@ -1,6 +0,0 @@
language: java
install: mvn install -DskipTests=true -Dgpg.skip=true
jdk:
- openjdk8
after_success:
- bash <(curl -s https://codecov.io/bash)

63
build.gradle.kts Normal file
View File

@ -0,0 +1,63 @@
plugins {
`java-library`
`maven-publish`
pmd
checkstyle
id("io.freefair.lombok") version "8.4"
}
repositories {
mavenLocal()
maven {
url = uri("https://nexus.knecon.com/repository/gindev/")
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
maven {
url = uri("https://repo.maven.apache.org/maven2/")
}
}
dependencies {
testImplementation("junit:junit:4.13.2")
}
group = "org.ahocorasick"
description = "Aho-CoraSick algorithm for efficient string matching"
java.sourceCompatibility = JavaVersion.VERSION_17
java.targetCompatibility = JavaVersion.VERSION_17
java {
withSourcesJar()
withJavadocJar()
}
publishing {
publications.create<MavenPublication>("maven") {
from(components["java"])
}
}
tasks.withType<JavaCompile>() {
options.encoding = "UTF-8"
}
tasks.withType<Javadoc>() {
options.encoding = "UTF-8"
}
pmd {
isConsoleOutput = true
}
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
}
tasks.pmdTest {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
}

View File

@ -0,0 +1,38 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<property
name="severity"
value="error"/>
<module name="TreeWalker">
<module name="SuppressWarningsHolder"/>
<module name="MissingDeprecated"/>
<module name="MissingOverride"/>
<module name="AnnotationLocation"/>
<module name="NonEmptyAtclauseDescription"/>
<module name="IllegalImport"/>
<module name="RedundantImport"/>
<module name="RedundantModifier"/>
<module name="EmptyBlock"/>
<module name="DefaultComesLast"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="ExplicitInitialization"/>
<module name="IllegalInstantiation"/>
<module name="ModifiedControlVariable"/>
<module name="MultipleVariableDeclarations"/>
<module name="PackageDeclaration"/>
<module name="ParameterAssignment"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<module name="StringLiteralEquality"/>
<module name="OneStatementPerLine"/>
<module name="FinalClass"/>
<module name="ArrayTypeStyle"/>
<module name="UpperEll"/>
<module name="OuterTypeFilename"/>
</module>
<module name="FileTabCharacter"/>
<module name="SuppressWarningsFilter"/>
</module>

21
config/pmd/pmd.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="MissingSerialVersionUID"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="DataflowAnomalyAnalysis"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="NullAssignment"/>
<exclude name="AssignmentInOperand"/>
<exclude name="BeanMembersShouldSerialize"/>
</rule>
</ruleset>

11
config/pmd/test_pmd.xml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon test ruleset checks the code for bad stuff
</description>
</ruleset>

1
gradle.properties.kts Normal file
View File

@ -0,0 +1 @@
version = 0.7-SNAPSHOT

183
pom.xml
View File

@ -1,183 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>0.7-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Aho-CoraSick algorithm for efficient string matching</name>
<description>Java library for efficient string matching against a large set of keywords</description>
<inceptionYear>2014</inceptionYear>
<url>https://github.com/robert-bor/aho-corasick</url>
<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
</distributionManagement>
<organization>
<name>42 BV</name>
<url>http://blog.42.nl/</url>
</organization>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<url>scm:git://github.com/robert-bor/aho-corasick</url>
<connection>scm:git://github.com/robert-bor/aho-corasick</connection>
</scm>
<developers>
<developer>
<name>Robert Bor</name>
<organization>42</organization>
</developer>
<developer>
<name>Daniel Beck</name>
<organization>neoSearch UG (haftungsbeschränkt)</organization>
</developer>
<developer>
<name>Dave Jarvis</name>
<organization>White Magic Software, Ltd.</organization>
</developer>
</developers>
<properties>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit.version>4.13.2</junit.version>
<!-- Reporting -->
<maven.cobertura.version>2.5.2</maven.cobertura.version>
<maven.javadoc.version>2.8</maven.javadoc.version>
<maven.project.version>2.4</maven.project.version>
<maven.site.plugin.version>3.3</maven.site.plugin.version>
</properties>
<!-- <repositories>
<repository>
<id>central</id>
<name>Maven Repository Switchboard</name>
<url>https://repo1.maven.org/maven2/</url>
</repository>
</repositories> -->
<dependencies>
<!-- Used for unit testing -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<!-- <plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.8</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
<autoReleaseAfterClose>false</autoReleaseAfterClose>
</configuration>
</plugin> -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<source>8</source>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.5</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin> -->
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.6</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

1
settings.gradle.kts Normal file
View File

@ -0,0 +1 @@
rootProject.name = "ahocorasick"

View File

@ -12,6 +12,7 @@ public class Interval implements Intervalable {
private final int start; private final int start;
private final int end; private final int end;
/** /**
* Constructs an interval with a start and end position. * Constructs an interval with a start and end position.
* *
@ -19,10 +20,12 @@ public class Interval implements Intervalable {
* @param end The interval's ending text position. * @param end The interval's ending text position.
*/ */
public Interval(final int start, final int end) { public Interval(final int start, final int end) {
this.start = start; this.start = start;
this.end = end; this.end = end;
} }
/** /**
* Returns the starting offset into the text for this interval. * Returns the starting offset into the text for this interval.
* *
@ -30,9 +33,11 @@ public class Interval implements Intervalable {
*/ */
@Override @Override
public int getStart() { public int getStart() {
return this.start; return this.start;
} }
/** /**
* Returns the ending offset into the text for this interval. * Returns the ending offset into the text for this interval.
* *
@ -40,9 +45,11 @@ public class Interval implements Intervalable {
*/ */
@Override @Override
public int getEnd() { public int getEnd() {
return this.end; return this.end;
} }
/** /**
* Returns the length of the interval. * Returns the length of the interval.
* *
@ -50,9 +57,11 @@ public class Interval implements Intervalable {
*/ */
@Override @Override
public int size() { public int size() {
return end - start + 1; return end - start + 1;
} }
/** /**
* Answers whether the given interval overlaps this interval * Answers whether the given interval overlaps this interval
* instance. * instance.
@ -61,31 +70,38 @@ public class Interval implements Intervalable {
* @return true The intervals overlap. * @return true The intervals overlap.
*/ */
public boolean overlapsWith(final Interval other) { public boolean overlapsWith(final Interval other) {
return this.start <= other.getEnd() &&
this.end >= other.getStart(); return this.start <= other.getEnd() && this.end >= other.getStart();
} }
public boolean overlapsWith(int point) { public boolean overlapsWith(int point) {
return this.start <= point && point <= this.end; return this.start <= point && point <= this.end;
} }
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (!(o instanceof Intervalable)) { if (!(o instanceof Intervalable)) {
return false; return false;
} }
Intervalable other = (Intervalable) o; Intervalable other = (Intervalable) o;
return this.start == other.getStart() && return this.start == other.getStart() && this.end == other.getEnd();
this.end == other.getEnd();
} }
@Override @Override
public int hashCode() { public int hashCode() {
return this.start % 100 + this.end % 100; return this.start % 100 + this.end % 100;
} }
@Override @Override
public int compareTo(Object o) { public int compareTo(Object o) {
if (!(o instanceof Intervalable)) { if (!(o instanceof Intervalable)) {
return -1; return -1;
} }
@ -94,6 +110,7 @@ public class Interval implements Intervalable {
return comparison != 0 ? comparison : this.end - other.getEnd(); return comparison != 0 ? comparison : this.end - other.getEnd();
} }
/** /**
* Returns the starting offset and ending offset separated * Returns the starting offset and ending offset separated
* by a full colon (:). * by a full colon (:).
@ -102,6 +119,8 @@ public class Interval implements Intervalable {
*/ */
@Override @Override
public String toString() { public String toString() {
return this.start + ":" + this.end; return this.start + ":" + this.end;
} }
} }

View File

@ -6,14 +6,19 @@ import java.util.List;
public class IntervalNode { public class IntervalNode {
private enum Direction {LEFT, RIGHT} private enum Direction {
LEFT,
RIGHT
}
private IntervalNode left; private IntervalNode left;
private IntervalNode right; private IntervalNode right;
private int point; private int point;
private List<Intervalable> intervals = new ArrayList<>(); private List<Intervalable> intervals = new ArrayList<>();
public IntervalNode(final List<Intervalable> intervals) { public IntervalNode(final List<Intervalable> intervals) {
this.point = determineMedian(intervals); this.point = determineMedian(intervals);
final List<Intervalable> toLeft = new ArrayList<>(); final List<Intervalable> toLeft = new ArrayList<>();
@ -37,7 +42,9 @@ public class IntervalNode {
} }
} }
public int determineMedian(final List<Intervalable> intervals) {
private int determineMedian(final List<Intervalable> intervals) {
int start = -1; int start = -1;
int end = -1; int end = -1;
for (Intervalable interval : intervals) { for (Intervalable interval : intervals) {
@ -53,7 +60,9 @@ public class IntervalNode {
return (start + end) / 2; return (start + end) / 2;
} }
public List<Intervalable> findOverlaps(final Intervalable interval) { public List<Intervalable> findOverlaps(final Intervalable interval) {
final List<Intervalable> overlaps = new ArrayList<>(); final List<Intervalable> overlaps = new ArrayList<>();
if (this.point < interval.getStart()) { if (this.point < interval.getStart()) {
@ -74,10 +83,9 @@ public class IntervalNode {
return overlaps; return overlaps;
} }
protected void addToOverlaps(
final Intervalable interval, protected void addToOverlaps(final Intervalable interval, final List<Intervalable> overlaps, final List<Intervalable> newOverlaps) {
final List<Intervalable> overlaps,
final List<Intervalable> newOverlaps) {
for (final Intervalable currentInterval : newOverlaps) { for (final Intervalable currentInterval : newOverlaps) {
if (!currentInterval.equals(interval)) { if (!currentInterval.equals(interval)) {
overlaps.add(currentInterval); overlaps.add(currentInterval);
@ -85,16 +93,21 @@ public class IntervalNode {
} }
} }
protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) { protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) {
return checkForOverlaps(interval, Direction.LEFT); return checkForOverlaps(interval, Direction.LEFT);
} }
protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) { protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) {
return checkForOverlaps(interval, Direction.RIGHT); return checkForOverlaps(interval, Direction.RIGHT);
} }
protected List<Intervalable> checkForOverlaps(
final Intervalable interval, final Direction direction) { protected List<Intervalable> checkForOverlaps(final Intervalable interval, final Direction direction) {
final List<Intervalable> overlaps = new ArrayList<>(); final List<Intervalable> overlaps = new ArrayList<>();
for (final Intervalable currentInterval : this.intervals) { for (final Intervalable currentInterval : this.intervals) {
@ -115,9 +128,10 @@ public class IntervalNode {
return overlaps; return overlaps;
} }
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) { protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
return node == null
? Collections.<Intervalable>emptyList() return node == null ? Collections.<Intervalable>emptyList() : node.findOverlaps(interval);
: node.findOverlaps(interval);
} }
} }

View File

@ -10,10 +10,13 @@ public class IntervalTree {
private final IntervalNode rootNode; private final IntervalNode rootNode;
public IntervalTree(List<Intervalable> intervals) { public IntervalTree(List<Intervalable> intervals) {
this.rootNode = new IntervalNode(intervals); this.rootNode = new IntervalNode(intervals);
} }
public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) { public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) {
// Sort the intervals on size, then left-most position // Sort the intervals on size, then left-most position
@ -42,7 +45,9 @@ public class IntervalTree {
return intervals; return intervals;
} }
public List<Intervalable> findOverlaps(final Intervalable interval) { public List<Intervalable> findOverlaps(final Intervalable interval) {
return rootNode.findOverlaps(interval); return rootNode.findOverlaps(interval);
} }

View File

@ -2,10 +2,12 @@ package org.ahocorasick.interval;
public interface Intervalable extends Comparable { public interface Intervalable extends Comparable {
int getStart(); int getStart();
int getEnd();
int size(); int getEnd();
int size();
} }

View File

@ -6,6 +6,7 @@ public class IntervalableComparatorByPosition implements Comparator<Intervalable
@Override @Override
public int compare(final Intervalable intervalable, final Intervalable intervalable2) { public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
return intervalable.getStart() - intervalable2.getStart(); return intervalable.getStart() - intervalable2.getStart();
} }

View File

@ -6,6 +6,7 @@ public class IntervalableComparatorBySize implements Comparator<Intervalable> {
@Override @Override
public int compare(final Intervalable intervalable, final Intervalable intervalable2) { public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
int comparison = intervalable2.size() - intervalable.size(); int comparison = intervalable2.size() - intervalable.size();
if (comparison == 0) { if (comparison == 0) {

View File

@ -4,16 +4,22 @@ public class DefaultToken extends Token {
private PayloadToken<String> payloadToken; private PayloadToken<String> payloadToken;
public DefaultToken(PayloadToken<String> payloadToken) { public DefaultToken(PayloadToken<String> payloadToken) {
super(payloadToken.getFragment()); super(payloadToken.getFragment());
this.payloadToken = payloadToken; this.payloadToken = payloadToken;
} }
public boolean isMatch() { public boolean isMatch() {
return payloadToken.isMatch(); return payloadToken.isMatch();
} }
public Emit getEmit() { public Emit getEmit() {
PayloadEmit<String> emit = payloadToken.getEmit(); PayloadEmit<String> emit = payloadToken.getEmit();
return new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword()); return new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
} }

View File

@ -7,19 +7,26 @@ import org.ahocorasick.interval.Intervalable;
* Responsible for tracking the bounds of matched terms. * Responsible for tracking the bounds of matched terms.
*/ */
public class Emit extends Interval implements Intervalable { public class Emit extends Interval implements Intervalable {
private final String keyword; private final String keyword;
public Emit(final int start, final int end, final String keyword) { public Emit(final int start, final int end, final String keyword) {
super(start, end); super(start, end);
this.keyword = keyword; this.keyword = keyword;
} }
public String getKeyword() { public String getKeyword() {
return this.keyword; return this.keyword;
} }
@Override @Override
public String toString() { public String toString() {
return super.toString() + "=" + this.keyword; return super.toString() + "=" + this.keyword;
} }

View File

@ -3,16 +3,21 @@ package org.ahocorasick.trie;
public class FragmentToken extends Token { public class FragmentToken extends Token {
public FragmentToken(String fragment) { public FragmentToken(String fragment) {
super(fragment); super(fragment);
} }
@Override @Override
public boolean isMatch() { public boolean isMatch() {
return false; return false;
} }
@Override @Override
public Emit getEmit() { public Emit getEmit() {
return null; return null;
} }

View File

@ -4,19 +4,26 @@ public class MatchToken extends Token {
private final Emit emit; private final Emit emit;
public MatchToken(final String fragment, final Emit emit) { public MatchToken(final String fragment, final Emit emit) {
super(fragment); super(fragment);
this.emit = emit; this.emit = emit;
} }
@Override @Override
public boolean isMatch() { public boolean isMatch() {
return true; return true;
} }
@Override @Override
public Emit getEmit() { public Emit getEmit() {
return this.emit; return this.emit;
} }
} }

View File

@ -1,32 +1,21 @@
package org.ahocorasick.trie; package org.ahocorasick.trie;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
/** /**
* Contains the matched keyword and some payload data. * Contains the matched keyword and some payload data.
* *
* @author Daniel Beck
* @param <T> The type of the wrapped payload data. * @param <T> The type of the wrapped payload data.
* @author Daniel Beck
*/ */
public class Payload<T> implements Comparable<Payload<T>> { @Getter
@EqualsAndHashCode
@RequiredArgsConstructor
public class Payload<T> {
private final String keyword; private final String keyword;
private final T data; private final T data;
public Payload(final String keyword, final T data) {
super();
this.keyword = keyword;
this.data = data;
}
public String getKeyword() {
return keyword;
}
public T getData() {
return data;
}
@Override
public int compareTo(Payload<T> other) {
return keyword.compareTo(other.getKeyword());
}
} }

View File

@ -5,7 +5,7 @@ import org.ahocorasick.interval.Intervalable;
/** /**
* Contains a matched term and its associated payload data. * Contains a matched term and its associated payload data.
* *
* @param <T> Type of the wrapped payload-data. * @param <T> Type of the wrapped payload-data.
* @author Daniel Beck * @author Daniel Beck
*/ */
@ -15,35 +15,44 @@ public class PayloadEmit<T> extends Interval implements Intervalable {
private final T payload; private final T payload;
/** /**
* Created a PayloadEmit * Created a PayloadEmit
* *
* @param start Start of the matched search term. * @param start Start of the matched search term.
* @param end End of the matched search term. * @param end End of the matched search term.
* @param keyword Keyword that matched. * @param keyword Keyword that matched.
* @param payload Emitted payload data. * @param payload Emitted payload data.
*/ */
public PayloadEmit(final int start, final int end, String keyword, T payload) { public PayloadEmit(final int start, final int end, String keyword, T payload) {
super(start, end); super(start, end);
this.keyword = keyword; this.keyword = keyword;
this.payload = payload; this.payload = payload;
} }
public String getKeyword() { public String getKeyword() {
return this.keyword; return this.keyword;
} }
/** /**
* Returns the payload associated to this emit. * Returns the payload associated to this emit.
* *
* @return the associated payload * @return the associated payload
*/ */
public T getPayload() { public T getPayload() {
return this.payload; return this.payload;
} }
@Override @Override
public String toString() { public String toString() {
return super.toString() + "=" + this.keyword + (this.payload != null ? "->" + this.payload : ""); return super.toString() + "=" + this.keyword + (this.payload != null ? "->" + this.payload : "");
} }
} }

View File

@ -6,7 +6,7 @@ package org.ahocorasick.trie;
* This token indicates a matching search term was not found, so * This token indicates a matching search term was not found, so
* {@link #isMatch()} always returns {@code false}. * {@link #isMatch()} always returns {@code false}.
* </p> * </p>
* *
* @author Daniel Beck * @author Daniel Beck
* *
* @param <T> The Type of the emitted payloads. * @param <T> The Type of the emitted payloads.
@ -14,19 +14,25 @@ package org.ahocorasick.trie;
public class PayloadFragmentToken<T> extends PayloadToken<T> { public class PayloadFragmentToken<T> extends PayloadToken<T> {
public PayloadFragmentToken(String fragment) { public PayloadFragmentToken(String fragment) {
super(fragment); super(fragment);
} }
@Override @Override
public boolean isMatch() { public boolean isMatch() {
return false; return false;
} }
/** /**
* Returns null. * Returns null.
*/ */
@Override @Override
public PayloadEmit<T> getEmit() { public PayloadEmit<T> getEmit() {
return null; return null;
} }
} }

View File

@ -6,27 +6,33 @@ package org.ahocorasick.trie;
* This token indicates a matching search term was found, so {@link #isMatch()} * This token indicates a matching search term was found, so {@link #isMatch()}
* always returns {@code true}. * always returns {@code true}.
* </p> * </p>
*
* @author Daniel Beck
* *
* @param <T> The Type of the emitted payloads. * @param <T> The Type of the emitted payloads.
* @author Daniel Beck
*/ */
public class PayloadMatchToken<T> extends PayloadToken<T> { public class PayloadMatchToken<T> extends PayloadToken<T> {
private final PayloadEmit<T> emit; private final PayloadEmit<T> emit;
public PayloadMatchToken(final String fragment, final PayloadEmit<T> emit) { public PayloadMatchToken(final String fragment, final PayloadEmit<T> emit) {
super(fragment); super(fragment);
this.emit = emit; this.emit = emit;
} }
@Override @Override
public boolean isMatch() { public boolean isMatch() {
return true; return true;
} }
@Override @Override
public PayloadEmit<T> getEmit() { public PayloadEmit<T> getEmit() {
return this.emit; return this.emit;
} }
} }

View File

@ -1,6 +1,10 @@
package org.ahocorasick.trie; package org.ahocorasick.trie;
import java.util.*; import java.util.*;
import java.util.stream.Collectors;
import lombok.Getter;
import lombok.Setter;
/** /**
* <p> * <p>
@ -27,13 +31,14 @@ import java.util.*;
public class PayloadState<T> { public class PayloadState<T> {
/** /**
* effective the size of the keyword * effective the size of the keyword.
*/ */
@Getter
private final int depth; private final int depth;
/** /**
* only used for the root state to refer to itself in case no matches have been * only used for the root state to refer to itself in case no matches have been
* found * found.
*/ */
private final PayloadState<T> rootState; private final PayloadState<T> rootState;
@ -44,26 +49,34 @@ public class PayloadState<T> {
private final Map<Character, PayloadState<T>> success = new HashMap<>(); private final Map<Character, PayloadState<T>> success = new HashMap<>();
/** /**
* if no matching states are found, the failure state will be returned * if no matching states are found, the failure state will be returned.
*/ */
@Getter
@Setter
private PayloadState<T> failure; private PayloadState<T> failure;
/** /**
* whenever this state is reached, it will emit the matches keywords for future * whenever this state is reached, it will emit the matches keywords for future
* reference * reference.
*/ */
private Set<Payload<T>> emits; private Set<Payload<T>> emits;
public PayloadState() { public PayloadState() {
this(0); this(0);
} }
public PayloadState(final int depth) { public PayloadState(final int depth) {
this.depth = depth; this.depth = depth;
this.rootState = depth == 0 ? this : null; this.rootState = depth == 0 ? this : null;
} }
private PayloadState<T> nextState(final Character character, final boolean ignoreRootState) { private PayloadState<T> nextState(final Character character, final boolean ignoreRootState) {
PayloadState<T> nextState = this.success.get(character); PayloadState<T> nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) { if (!ignoreRootState && nextState == null && this.rootState != null) {
@ -73,15 +86,21 @@ public class PayloadState<T> {
return nextState; return nextState;
} }
public PayloadState<T> nextState(final Character character) { public PayloadState<T> nextState(final Character character) {
return nextState(character, false); return nextState(character, false);
} }
public PayloadState<T> nextStateIgnoreRootState(Character character) { public PayloadState<T> nextStateIgnoreRootState(Character character) {
return nextState(character, true); return nextState(character, true);
} }
public PayloadState<T> addState(Character character) { public PayloadState<T> addState(Character character) {
PayloadState<T> nextState = nextStateIgnoreRootState(character); PayloadState<T> nextState = nextStateIgnoreRootState(character);
if (nextState == null) { if (nextState == null) {
nextState = new PayloadState<>(this.depth + 1); nextState = new PayloadState<>(this.depth + 1);
@ -90,55 +109,56 @@ public class PayloadState<T> {
return nextState; return nextState;
} }
public int getDepth() {
return this.depth;
}
/** /**
* Adds a payload to be emitted for this state. * Adds a payload to be emitted for this state.
* *
* @param payload to be emitted. * @param payload to be emitted.
*/ */
public void addEmit(Payload<T> payload) { public void addEmit(Payload<T> payload) {
if (this.emits == null) { if (this.emits == null) {
this.emits = new TreeSet<>(); this.emits = new HashSet<>();
} }
this.emits.add(payload); this.emits.add(payload);
} }
/** /**
* Adds a collection of payloads to be emitted for this state. * Adds a collection of payloads to be emitted for this state.
* *
* @param emits Collection of payloads to be emitted. * @param emits Collection of payloads to be emitted.
*/ */
public void addEmit(Collection<Payload<T>> emits) { public void addEmit(Collection<Payload<T>> emits) {
for (Payload<T> emit : emits) { for (Payload<T> emit : emits) {
addEmit(emit); addEmit(emit);
} }
} }
/** /**
* Returns a collection of emitted payloads for this state. * Returns a collection of emitted payloads for this state.
* *
* @return Collection of emitted payloads. * @return Collection of emitted payloads.
*/ */
public Collection<Payload<T>> emit() { public Collection<Payload<T>> emit() {
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits;
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits.stream()
.sorted(Comparator.comparing(Payload::getKeyword))
.collect(Collectors.toList());
} }
public PayloadState<T> failure() {
return this.failure;
}
public void setFailure(PayloadState<T> failState) {
this.failure = failState;
}
public Collection<PayloadState<T>> getStates() { public Collection<PayloadState<T>> getStates() {
return this.success.values(); return this.success.values();
} }
public Collection<Character> getTransitions() { public Collection<Character> getTransitions() {
return this.success.keySet(); return this.success.keySet();
} }
} }

View File

@ -9,24 +9,33 @@ package org.ahocorasick.trie;
* @param <T> The Type of the emitted payloads. * @param <T> The Type of the emitted payloads.
*/ */
public abstract class PayloadToken<T> { public abstract class PayloadToken<T> {
private String fragment; private String fragment;
public PayloadToken(String fragment) { public PayloadToken(String fragment) {
this.fragment = fragment; this.fragment = fragment;
} }
public String getFragment() { public String getFragment() {
return this.fragment; return this.fragment;
} }
/** /**
* Return {@code true} if a search term matched. * Return {@code true} if a search term matched.
*
* @return {@code true} if this is a match * @return {@code true} if this is a match
*/ */
public abstract boolean isMatch(); public abstract boolean isMatch();
/** /**
* @return the payload * @return the payload
*/ */
public abstract PayloadEmit<T> getEmit(); public abstract PayloadEmit<T> getEmit();
} }

View File

@ -1,7 +1,9 @@
package org.ahocorasick.trie; package org.ahocorasick.trie;
import static java.lang.Character.isWhitespace; import static java.lang.Character.isWhitespace;
import static java.lang.Character.toLowerCase;
import java.util.Deque;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -23,8 +25,8 @@ import org.ahocorasick.trie.handler.StatefulPayloadEmitHandler;
* added keyword. * added keyword.
* </p> * </p>
* *
* @author Daniel Beck
* @param <T> The type of the supplied of the payload. * @param <T> The type of the supplied of the payload.
* @author Daniel Beck
*/ */
public class PayloadTrie<T> { public class PayloadTrie<T> {
@ -32,11 +34,14 @@ public class PayloadTrie<T> {
private final PayloadState<T> rootState; private final PayloadState<T> rootState;
protected PayloadTrie(final TrieConfig trieConfig) { protected PayloadTrie(final TrieConfig trieConfig) {
this.trieConfig = trieConfig; this.trieConfig = trieConfig;
this.rootState = new PayloadState<>(); this.rootState = new PayloadState<>();
} }
/** /**
* Used by the builder to add a text search keyword with an emit payload. * Used by the builder to add a text search keyword with an emit payload.
* *
@ -45,6 +50,7 @@ public class PayloadTrie<T> {
* @throws NullPointerException if the keyword is null. * @throws NullPointerException if the keyword is null.
*/ */
private void addKeyword(String keyword, T emit) { private void addKeyword(String keyword, T emit) {
if (keyword.isEmpty()) { if (keyword.isEmpty()) {
return; return;
} }
@ -52,6 +58,7 @@ public class PayloadTrie<T> {
addState(keyword).addEmit(new Payload<>(keyword, emit)); addState(keyword).addEmit(new Payload<>(keyword, emit));
} }
/** /**
* Used by the builder to add a text search keyword. * Used by the builder to add a text search keyword.
* *
@ -59,6 +66,7 @@ public class PayloadTrie<T> {
* @throws NullPointerException if the keyword is null. * @throws NullPointerException if the keyword is null.
*/ */
private void addKeyword(String keyword) { private void addKeyword(String keyword) {
if (keyword.isEmpty()) { if (keyword.isEmpty()) {
return; return;
} }
@ -66,15 +74,21 @@ public class PayloadTrie<T> {
addState(keyword).addEmit(new Payload<>(keyword, null)); addState(keyword).addEmit(new Payload<>(keyword, null));
} }
private PayloadState<T> addState(final String keyword) { private PayloadState<T> addState(final String keyword) {
PayloadState<T> state = getRootState(); PayloadState<T> state = getRootState();
for (final Character character : keyword.toCharArray()) { for (final Character character : keyword.toCharArray()) {
if (isIgnoreWhiteSpace() && isWhitespace(character)) {
continue;
}
Character adjustedChar = isCaseInsensitive() ? Character.toLowerCase(character) : character; Character adjustedChar = isCaseInsensitive() ? Character.toLowerCase(character) : character;
state = state.addState(adjustedChar); state = state.addState(adjustedChar);
} }
return state; return state;
} }
/** /**
* Tokenizes the specified text and returns the emitted outputs. * Tokenizes the specified text and returns the emitted outputs.
* *
@ -82,13 +96,14 @@ public class PayloadTrie<T> {
* @return the emitted outputs * @return the emitted outputs
*/ */
public Collection<PayloadToken<T>> tokenize(final String text) { public Collection<PayloadToken<T>> tokenize(final String text) {
final Collection<PayloadToken<T>> tokens = new LinkedList<>(); final Collection<PayloadToken<T>> tokens = new LinkedList<>();
final Collection<PayloadEmit<T>> collectedEmits = parseText(text); final Collection<PayloadEmit<T>> collectedEmits = parseText(text);
int lastCollectedPosition = -1; int lastCollectedPosition = -1;
for (final PayloadEmit<T> emit : collectedEmits) { for (final PayloadEmit<T> emit : collectedEmits) {
if (emit.getStart() - lastCollectedPosition > 1) { if (emit.getStart() - lastCollectedPosition > 1) {
tokens.add( createFragment( emit, text, lastCollectedPosition) ); tokens.add(createFragment(emit, text, lastCollectedPosition));
} }
tokens.add(createMatch(emit, text)); tokens.add(createMatch(emit, text));
@ -96,24 +111,25 @@ public class PayloadTrie<T> {
} }
if (text.length() - lastCollectedPosition > 1) { if (text.length() - lastCollectedPosition > 1) {
tokens.add( createFragment( null, text, lastCollectedPosition) ); tokens.add(createFragment(null, text, lastCollectedPosition));
} }
return tokens; return tokens;
} }
private PayloadToken<T> createFragment(final PayloadEmit<T> emit, final String text, final int lastCollectedPosition) { private PayloadToken<T> createFragment(final PayloadEmit<T> emit, final String text, final int lastCollectedPosition) {
return new PayloadFragmentToken<>(
text.substring( lastCollectedPosition + 1, return new PayloadFragmentToken<>(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
emit == null ? text.length() : emit.getStart() ) );
} }
private PayloadToken<T> createMatch(PayloadEmit<T> emit, String text) { private PayloadToken<T> createMatch(PayloadEmit<T> emit, String text) {
return new PayloadMatchToken<>( text.substring( emit.getStart(),
emit.getEnd() + 1 ), return new PayloadMatchToken<>(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
emit );
} }
/** /**
* Tokenizes a specified text and returns the emitted outputs. * Tokenizes a specified text and returns the emitted outputs.
* *
@ -121,9 +137,11 @@ public class PayloadTrie<T> {
* @return A collection of emits. * @return A collection of emits.
*/ */
public Collection<PayloadEmit<T>> parseText(final CharSequence text) { public Collection<PayloadEmit<T>> parseText(final CharSequence text) {
return parseText(text, new DefaultPayloadEmitHandler<>()); return parseText(text, new DefaultPayloadEmitHandler<>());
} }
/** /**
* Tokenizes the specified text by using a custom EmitHandler and returns the * Tokenizes the specified text by using a custom EmitHandler and returns the
* emitted outputs. * emitted outputs.
@ -134,6 +152,7 @@ public class PayloadTrie<T> {
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Collection<PayloadEmit<T>> parseText(final CharSequence text, final StatefulPayloadEmitHandler<T> emitHandler) { public Collection<PayloadEmit<T>> parseText(final CharSequence text, final StatefulPayloadEmitHandler<T> emitHandler) {
parseText(text, (PayloadEmitHandler<T>) emitHandler); parseText(text, (PayloadEmitHandler<T>) emitHandler);
final List<PayloadEmit<T>> collectedEmits = emitHandler.getEmits(); final List<PayloadEmit<T>> collectedEmits = emitHandler.getEmits();
@ -146,18 +165,21 @@ public class PayloadTrie<T> {
return collectedEmits; return collectedEmits;
} }
/** /**
* Returns true if the text contains one of the search terms; otherwise, * Returns true if the text contains one of the search terms; otherwise,
* returns false. * returns false.
* *
* @param text Specified text. * @param text Specified text.
* @return true if the text contains one of the search terms. Else, returns * @return true if the text contains one of the search terms. Else, returns
* false. * false.
*/ */
public boolean containsMatch(final CharSequence text) { public boolean containsMatch(final CharSequence text) {
return firstMatch(text) != null; return firstMatch(text) != null;
} }
/** /**
* Tokenizes the specified text by using a custom EmitHandler and returns the * Tokenizes the specified text by using a custom EmitHandler and returns the
* emitted outputs. * emitted outputs.
@ -166,10 +188,14 @@ public class PayloadTrie<T> {
* @param emitHandler The handler that will be used to parse the text. * @param emitHandler The handler that will be used to parse the text.
*/ */
public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) { public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) {
PayloadState<T> currentState = getRootState();
PayloadState<T> currentState = getRootState();
for (int position = 0; position < text.length(); position++) { for (int position = 0; position < text.length(); position++) {
char character = text.charAt( position); char character = text.charAt(position);
if (trieConfig.isIgnoreWhiteSpace() && isWhitespace(character)) {
continue;
}
if (trieConfig.isCaseInsensitive()) { if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character); character = Character.toLowerCase(character);
@ -183,6 +209,7 @@ public class PayloadTrie<T> {
} }
} }
/** /**
* The first matching text sequence. * The first matching text sequence.
* *
@ -190,6 +217,7 @@ public class PayloadTrie<T> {
* @return {@code null} if no matches found. * @return {@code null} if no matches found.
*/ */
public PayloadEmit<T> firstMatch(final CharSequence text) { public PayloadEmit<T> firstMatch(final CharSequence text) {
assert text != null; assert text != null;
if (!trieConfig.isAllowOverlaps()) { if (!trieConfig.isAllowOverlaps()) {
@ -204,8 +232,11 @@ public class PayloadTrie<T> {
PayloadState<T> currentState = getRootState(); PayloadState<T> currentState = getRootState();
for (int position = 0; position < text.length(); position++) { for (int position = 0; position < text.length(); position++) {
char character = text.charAt( position); char character = text.charAt(position);
if (trieConfig.isIgnoreWhiteSpace() && isWhitespace(character)) {
continue;
}
if (trieConfig.isCaseInsensitive()) { if (trieConfig.isCaseInsensitive()) {
character = Character.toLowerCase(character); character = Character.toLowerCase(character);
} }
@ -215,8 +246,13 @@ public class PayloadTrie<T> {
if (payloads != null && !payloads.isEmpty()) { if (payloads != null && !payloads.isEmpty()) {
for (final Payload<T> payload : payloads) { for (final Payload<T> payload : payloads) {
final PayloadEmit<T> emit = new PayloadEmit<>(position - payload.getKeyword().length() + 1, position, int start;
payload.getKeyword(), payload.getData()); if (isIgnoreWhiteSpace()) {
start = findStart(text, position, payload);
} else {
start = position - payload.getKeyword().length() + 1;
}
final PayloadEmit<T> emit = new PayloadEmit<>(start, position, payload.getKeyword(), payload.getData());
if (trieConfig.isOnlyWholeWords()) { if (trieConfig.isOnlyWholeWords()) {
if (!isPartialMatch(text, emit)) { if (!isPartialMatch(text, emit)) {
return emit; return emit;
@ -232,29 +268,38 @@ public class PayloadTrie<T> {
return null; return null;
} }
private boolean isPartialMatch(final CharSequence searchText, final PayloadEmit<T> emit) { private boolean isPartialMatch(final CharSequence searchText, final PayloadEmit<T> emit) {
return (emit.getStart() != 0 && Character.isAlphabetic(searchText.charAt(emit.getStart() - 1)))
|| (emit.getEnd() + 1 != searchText.length() && Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1))); return (emit.getStart() != 0 && Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != searchText.length() && Character.isAlphabetic(
searchText.charAt(emit.getEnd() + 1)));
} }
private boolean isPartialMatchWhiteSpaceSeparated(final CharSequence searchText, final PayloadEmit<T> emit) { private boolean isPartialMatchWhiteSpaceSeparated(final CharSequence searchText, final PayloadEmit<T> emit) {
final long size = searchText.length(); final long size = searchText.length();
return (emit.getStart() != 0 && !isWhitespace(searchText.charAt(emit.getStart() - 1))) return (emit.getStart() != 0 && !isWhitespace(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != size && !isWhitespace(searchText.charAt(emit.getEnd()
|| (emit.getEnd() + 1 != size && !isWhitespace(searchText.charAt(emit.getEnd() + 1))); + 1)));
} }
private PayloadState<T> getState(PayloadState<T> currentState, final Character character) { private PayloadState<T> getState(PayloadState<T> currentState, final Character character) {
PayloadState<T> newCurrentState = currentState.nextState(character); PayloadState<T> newCurrentState = currentState.nextState(character);
var tempState = currentState;
while (newCurrentState == null) { while (newCurrentState == null) {
currentState = currentState.failure(); tempState = tempState.getFailure();
newCurrentState = currentState.nextState(character); newCurrentState = tempState.nextState(character);
} }
return newCurrentState; return newCurrentState;
} }
private void constructFailureStates() { private void constructFailureStates() {
final Queue<PayloadState<T>> queue = new LinkedBlockingDeque<>(); final Queue<PayloadState<T>> queue = new LinkedBlockingDeque<>();
final PayloadState<T> startState = getRootState(); final PayloadState<T> startState = getRootState();
@ -272,9 +317,9 @@ public class PayloadTrie<T> {
PayloadState<T> targetState = currentState.nextState(transition); PayloadState<T> targetState = currentState.nextState(transition);
queue.add(targetState); queue.add(targetState);
PayloadState<T> traceFailureState = currentState.failure(); PayloadState<T> traceFailureState = currentState.getFailure();
while (traceFailureState.nextState(transition) == null) { while (traceFailureState.nextState(transition) == null) {
traceFailureState = traceFailureState.failure(); traceFailureState = traceFailureState.getFailure();
} }
final PayloadState<T> newFailureState = traceFailureState.nextState(transition); final PayloadState<T> newFailureState = traceFailureState.nextState(transition);
@ -284,13 +329,21 @@ public class PayloadTrie<T> {
} }
} }
private boolean processEmits(final CharSequence text, final int position, final Collection<Payload<T>> payloads, final PayloadEmitHandler<T> emitHandler) { private boolean processEmits(final CharSequence text, final int position, final Collection<Payload<T>> payloads, final PayloadEmitHandler<T> emitHandler) {
boolean emitted = false; boolean emitted = false;
for (final Payload<T> payload : payloads) { for (final Payload<T> payload : payloads) {
final PayloadEmit<T> payloadEmit = new PayloadEmit<>(position - payload.getKeyword().length() + 1, int start;
position, payload.getKeyword(), payload.getData()); if (isIgnoreWhiteSpace()) {
if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, payloadEmit)) && start = findStart(text, position, payload);
!(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() && isPartialMatchWhiteSpaceSeparated(text, payloadEmit))) { } else {
start = position - payload.getKeyword().length() + 1;
}
final PayloadEmit<T> payloadEmit = new PayloadEmit<>(start, position, payload.getKeyword(), payload.getData());
if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, payloadEmit)) && !(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() && isPartialMatchWhiteSpaceSeparated(
text,
payloadEmit))) {
emitted = emitHandler.emit(payloadEmit) || emitted; emitted = emitHandler.emit(payloadEmit) || emitted;
if (emitted && trieConfig.isStopOnHit()) { if (emitted && trieConfig.isStopOnHit()) {
break; break;
@ -301,41 +354,77 @@ public class PayloadTrie<T> {
return emitted; return emitted;
} }
private int findStart(CharSequence text, int position, Payload<T> payload) {
Deque<Character> stack = new LinkedList<>();
int i;
for (i = 0; i < payload.getKeyword().length(); i++) {
if (isWhitespace(payload.getKeyword().charAt(i))) {
continue;
}
stack.push(isCaseInsensitive() ? toLowerCase(payload.getKeyword().charAt(i)) : payload.getKeyword().charAt(i));
}
for (i = position; !stack.isEmpty() && i >= 0; --i) {
char c = isCaseInsensitive() ? toLowerCase(text.charAt(i)) : text.charAt(i);
if (c == stack.peek()) {
stack.pop();
}
}
return i + 1;
}
private boolean isCaseInsensitive() { private boolean isCaseInsensitive() {
return trieConfig.isCaseInsensitive(); return trieConfig.isCaseInsensitive();
} }
private boolean isIgnoreWhiteSpace() {
return trieConfig.isIgnoreWhiteSpace();
}
private PayloadState<T> getRootState() { private PayloadState<T> getRootState() {
return this.rootState; return this.rootState;
} }
/** /**
* Provides a fluent interface for constructing Trie instances with payloads. * Provides a fluent interface for constructing Trie instances with payloads.
* @param <T> The type of the emitted payload.
* *
* @param <T> The type of the emitted payload.
* @return The builder used to configure its Trie. * @return The builder used to configure its Trie.
*/ */
public static <T> PayloadTrieBuilder<T> builder() { public static <T> PayloadTrieBuilder<T> builder() {
return new PayloadTrieBuilder<>(); return new PayloadTrieBuilder<>();
} }
/** /**
* Builder class to create a PayloadTrie instance. * Builder class to create a PayloadTrie instance.
* *
* @param <T> The type of the emitted payload. * @param <T> The type of the emitted payload.
*/ */
public static class PayloadTrieBuilder<T> { public static final class PayloadTrieBuilder<T> {
private final TrieConfig trieConfig = new TrieConfig(); private final TrieConfig trieConfig = new TrieConfig();
private final PayloadTrie<T> trie = new PayloadTrie<>(trieConfig); private final PayloadTrie<T> trie = new PayloadTrie<>(trieConfig);
/** /**
* Default (empty) constructor. * Default (empty) constructor.
*/ */
private PayloadTrieBuilder() { private PayloadTrieBuilder() {
} }
/** /**
* Configure the Trie to ignore case when searching for keywords in the text. * Configure the Trie to ignore case when searching for keywords in the text.
* This must be called before calling addKeyword because the algorithm converts * This must be called before calling addKeyword because the algorithm converts
@ -345,20 +434,24 @@ public class PayloadTrie<T> {
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> ignoreCase() { public PayloadTrieBuilder<T> ignoreCase() {
this.trieConfig.setCaseInsensitive(true); this.trieConfig.setCaseInsensitive(true);
return this; return this;
} }
/** /**
* Configure the Trie to ignore overlapping keywords. * Configure the Trie to ignore overlapping keywords.
* *
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> ignoreOverlaps() { public PayloadTrieBuilder<T> ignoreOverlaps() {
this.trieConfig.setAllowOverlaps(false); this.trieConfig.setAllowOverlaps(false);
return this; return this;
} }
/** /**
* Adds a keyword to the {@link Trie}'s list of text search keywords. * Adds a keyword to the {@link Trie}'s list of text search keywords.
* No {@link Payload} is supplied. * No {@link Payload} is supplied.
@ -368,10 +461,12 @@ public class PayloadTrie<T> {
* @throws NullPointerException if the keyword is null. * @throws NullPointerException if the keyword is null.
*/ */
public PayloadTrieBuilder<T> addKeyword(final String keyword) { public PayloadTrieBuilder<T> addKeyword(final String keyword) {
this.trie.addKeyword(keyword); this.trie.addKeyword(keyword);
return this; return this;
} }
/** /**
* Adds a keyword and a payload to the {@link Trie}'s list of text * Adds a keyword and a payload to the {@link Trie}'s list of text
* search keywords. * search keywords.
@ -382,10 +477,12 @@ public class PayloadTrie<T> {
* @throws NullPointerException if the keyword is null. * @throws NullPointerException if the keyword is null.
*/ */
public PayloadTrieBuilder<T> addKeyword(final String keyword, final T payload) { public PayloadTrieBuilder<T> addKeyword(final String keyword, final T payload) {
this.trie.addKeyword(keyword, payload); this.trie.addKeyword(keyword, payload);
return this; return this;
} }
/** /**
* Adds a list of keywords and payloads to the {@link Trie}'s list of * Adds a list of keywords and payloads to the {@link Trie}'s list of
* text search keywords. * text search keywords.
@ -394,22 +491,26 @@ public class PayloadTrie<T> {
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> addKeywords(final Collection<Payload<T>> keywords) { public PayloadTrieBuilder<T> addKeywords(final Collection<Payload<T>> keywords) {
for (Payload<T> payload : keywords) { for (Payload<T> payload : keywords) {
this.trie.addKeyword(payload.getKeyword(), payload.getData()); this.trie.addKeyword(payload.getKeyword(), payload.getData());
} }
return this; return this;
} }
/** /**
* Configure the Trie to match whole keywords in the text. * Configure the Trie to match whole keywords in the text.
* *
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> onlyWholeWords() { public PayloadTrieBuilder<T> onlyWholeWords() {
this.trieConfig.setOnlyWholeWords(true); this.trieConfig.setOnlyWholeWords(true);
return this; return this;
} }
/** /**
* Configure the Trie to match whole keywords that are separated by whitespace * Configure the Trie to match whole keywords that are separated by whitespace
* in the text. For example, "this keyword thatkeyword" would only match the * in the text. For example, "this keyword thatkeyword" would only match the
@ -418,46 +519,69 @@ public class PayloadTrie<T> {
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> onlyWholeWordsWhiteSpaceSeparated() { public PayloadTrieBuilder<T> onlyWholeWordsWhiteSpaceSeparated() {
this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true); this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true);
return this; return this;
} }
/** /**
* Configure the Trie to stop after the first keyword is found in the text. * Configure the Trie to stop after the first keyword is found in the text.
* *
* @return This builder. * @return This builder.
*/ */
public PayloadTrieBuilder<T> stopOnHit() { public PayloadTrieBuilder<T> stopOnHit() {
trie.trieConfig.setStopOnHit(true); trie.trieConfig.setStopOnHit(true);
return this; return this;
} }
/** /**
* Configure the PayloadTrie based on the builder settings. * Configure the PayloadTrie based on the builder settings.
* *
* @return The configured PayloadTrie. * @return The configured PayloadTrie.
*/ */
public PayloadTrie<T> build() { public PayloadTrie<T> build() {
this.trie.constructFailureStates(); this.trie.constructFailureStates();
return this.trie; return this.trie;
} }
/** /**
* @return This builder. * @return This builder.
* @deprecated Use ignoreCase() * @deprecated Use ignoreCase()
*/ */
@Deprecated @Deprecated
public PayloadTrieBuilder<T> caseInsensitive() { public PayloadTrieBuilder<T> caseInsensitive() {
return ignoreCase(); return ignoreCase();
} }
/** /**
* @return This builder. * @return This builder.
* @deprecated Use ignoreOverlaps() * @deprecated Use ignoreOverlaps()
*/ */
@Deprecated @Deprecated
public PayloadTrieBuilder<T> removeOverlaps() { public PayloadTrieBuilder<T> removeOverlaps() {
return ignoreOverlaps(); return ignoreOverlaps();
} }
/**
* Configure the Trie to ignore whitespaces.
*
* @return This builder.
*/
public PayloadTrieBuilder<T> ignoreWhiteSpace() {
trieConfig.setIgnoreWhiteSpace(true);
return this;
}
} }
} }

View File

@ -2,6 +2,9 @@ package org.ahocorasick.trie;
import java.util.*; import java.util.*;
import lombok.Getter;
import lombok.Setter;
/** /**
* <p> * <p>
* A state has various important tasks it must attend to: * A state has various important tasks it must attend to:
@ -26,6 +29,7 @@ public class State {
/** /**
* effective the size of the keyword * effective the size of the keyword
*/ */
@Getter
private final int depth; private final int depth;
/** /**
@ -42,6 +46,8 @@ public class State {
/** /**
* if no matching states are found, the failure state will be returned * if no matching states are found, the failure state will be returned
*/ */
@Setter
@Getter
private State failure; private State failure;
/** /**
@ -49,16 +55,22 @@ public class State {
*/ */
private Set<String> emits; private Set<String> emits;
public State() { public State() {
this(0); this(0);
} }
public State(final int depth) { public State(final int depth) {
this.depth = depth; this.depth = depth;
this.rootState = depth == 0 ? this : null; this.rootState = depth == 0 ? this : null;
} }
private State nextState(final Character character, final boolean ignoreRootState) { private State nextState(final Character character, final boolean ignoreRootState) {
State nextState = this.success.get(character); State nextState = this.success.get(character);
if (!ignoreRootState && nextState == null && this.rootState != null) { if (!ignoreRootState && nextState == null && this.rootState != null) {
@ -68,15 +80,21 @@ public class State {
return nextState; return nextState;
} }
public State nextState(final Character character) { public State nextState(final Character character) {
return nextState(character, false); return nextState(character, false);
} }
public State nextStateIgnoreRootState(Character character) { public State nextStateIgnoreRootState(Character character) {
return nextState(character, true); return nextState(character, true);
} }
public State addState(String keyword) { public State addState(String keyword) {
State state = this; State state = this;
for (final Character character : keyword.toCharArray()) { for (final Character character : keyword.toCharArray()) {
@ -86,7 +104,9 @@ public class State {
return state; return state;
} }
public State addState(Character character) { public State addState(Character character) {
State nextState = nextStateIgnoreRootState(character); State nextState = nextStateIgnoreRootState(character);
if (nextState == null) { if (nextState == null) {
nextState = new State(this.depth + 1); nextState = new State(this.depth + 1);
@ -95,40 +115,39 @@ public class State {
return nextState; return nextState;
} }
public int getDepth() {
return this.depth;
}
public void addEmit(String keyword) { public void addEmit(String keyword) {
if (this.emits == null) { if (this.emits == null) {
this.emits = new TreeSet<>(); this.emits = new TreeSet<>();
} }
this.emits.add(keyword); this.emits.add(keyword);
} }
public void addEmit(Collection<String> emits) { public void addEmit(Collection<String> emits) {
for (String emit : emits) { for (String emit : emits) {
addEmit(emit); addEmit(emit);
} }
} }
public Collection<String> emit() { public Collection<String> emit() {
return this.emits == null ? Collections.<String>emptyList() : this.emits; return this.emits == null ? Collections.<String>emptyList() : this.emits;
} }
public State failure() {
return this.failure;
}
public void setFailure(State failState) {
this.failure = failState;
}
public Collection<State> getStates() { public Collection<State> getStates() {
return this.success.values(); return this.success.values();
} }
public Collection<Character> getTransitions() { public Collection<Character> getTransitions() {
return this.success.keySet(); return this.success.keySet();
} }
} }

View File

@ -1,17 +1,25 @@
package org.ahocorasick.trie; package org.ahocorasick.trie;
public abstract class Token { public abstract class Token {
private String fragment; private String fragment;
public Token(String fragment) { public Token(String fragment) {
this.fragment = fragment; this.fragment = fragment;
} }
public String getFragment() { public String getFragment() {
return this.fragment; return this.fragment;
} }
public abstract boolean isMatch(); public abstract boolean isMatch();
public abstract Emit getEmit(); public abstract Emit getEmit();
} }

View File

@ -15,20 +15,26 @@ import org.ahocorasick.trie.handler.StatefulEmitHandler;
* *
* @author Robert Bor * @author Robert Bor
*/ */
public class Trie { public final class Trie {
private final PayloadTrie<String> payloadTrie; private final PayloadTrie<String> payloadTrie;
private Trie(final PayloadTrie<String> payloadTrie) { private Trie(final PayloadTrie<String> payloadTrie) {
this.payloadTrie = payloadTrie; this.payloadTrie = payloadTrie;
} }
public Collection<Token> tokenize(final String text) { public Collection<Token> tokenize(final String text) {
Collection<PayloadToken<String>> tokens = this.payloadTrie.tokenize(text); Collection<PayloadToken<String>> tokens = this.payloadTrie.tokenize(text);
return asTokens(tokens); return asTokens(tokens);
} }
private static Collection<Token> asTokens(Collection<PayloadToken<String>> tokens) { private static Collection<Token> asTokens(Collection<PayloadToken<String>> tokens) {
Collection<Token> result = new ArrayList<>(); Collection<Token> result = new ArrayList<>();
for (PayloadToken<String> payloadToken : tokens) { for (PayloadToken<String> payloadToken : tokens) {
result.add(new DefaultToken(payloadToken)); result.add(new DefaultToken(payloadToken));
@ -36,7 +42,9 @@ public class Trie {
return result; return result;
} }
private static Collection<Emit> asEmits(Collection<PayloadEmit<String>> emits) { private static Collection<Emit> asEmits(Collection<PayloadEmit<String>> emits) {
Collection<Emit> result = new ArrayList<>(); Collection<Emit> result = new ArrayList<>();
for (PayloadEmit<String> emit : emits) { for (PayloadEmit<String> emit : emits) {
result.add(asEmit(emit)); result.add(asEmit(emit));
@ -44,30 +52,40 @@ public class Trie {
return result; return result;
} }
private static Emit asEmit(PayloadEmit<String> payloadEmit) { private static Emit asEmit(PayloadEmit<String> payloadEmit) {
return new Emit(payloadEmit.getStart(), payloadEmit.getEnd(), payloadEmit.getKeyword()); return new Emit(payloadEmit.getStart(), payloadEmit.getEnd(), payloadEmit.getKeyword());
} }
public Collection<Emit> parseText(final CharSequence text) { public Collection<Emit> parseText(final CharSequence text) {
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text); Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text);
return asEmits(parsedText); return asEmits(parsedText);
} }
@SuppressWarnings("UnusedReturnValue") @SuppressWarnings("UnusedReturnValue")
public Collection<Emit> parseText( final CharSequence text, final StatefulEmitHandler emitHandler) { public Collection<Emit> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text,
new StatefulPayloadEmitDelegateHandler(emitHandler)); Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text, new StatefulPayloadEmitDelegateHandler(emitHandler));
return asEmits(parsedText); return asEmits(parsedText);
} }
public boolean containsMatch(final CharSequence text) { public boolean containsMatch(final CharSequence text) {
return firstMatch(text) != null; return firstMatch(text) != null;
} }
public void parseText(final CharSequence text, final EmitHandler emitHandler) { public void parseText(final CharSequence text, final EmitHandler emitHandler) {
this.payloadTrie.parseText(text, new PayloadEmitDelegateHandler(emitHandler)); this.payloadTrie.parseText(text, new PayloadEmitDelegateHandler(emitHandler));
} }
/** /**
* The first matching text sequence. * The first matching text sequence.
* *
@ -75,35 +93,38 @@ public class Trie {
* @return {@code null} if no matches found. * @return {@code null} if no matches found.
*/ */
public Emit firstMatch(final CharSequence text) { public Emit firstMatch(final CharSequence text) {
assert text != null; assert text != null;
final PayloadEmit<String> payload = this.payloadTrie.firstMatch( text ); final PayloadEmit<String> payload = this.payloadTrie.firstMatch(text);
return payload == null return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword());
? null
: new Emit( payload.getStart(),
payload.getEnd(),
payload.getKeyword() );
} }
/** /**
* Provides a fluent interface for constructing Trie instances. * Provides a fluent interface for constructing Trie instances.
* *
* @return The builder used to configure its Trie. * @return The builder used to configure its Trie.
*/ */
public static TrieBuilder builder() { public static TrieBuilder builder() {
return new TrieBuilder(); return new TrieBuilder();
} }
public static class TrieBuilder {
public static final class TrieBuilder {
private final PayloadTrieBuilder<String> delegate = PayloadTrie.builder(); private final PayloadTrieBuilder<String> delegate = PayloadTrie.builder();
/** /**
* Default (empty) constructor. * Default (empty) constructor.
*/ */
private TrieBuilder() { private TrieBuilder() {
} }
/** /**
* Configure the Trie to ignore case when searching for keywords in the text. * Configure the Trie to ignore case when searching for keywords in the text.
* This must be called before calling addKeyword because the algorithm converts * This must be called before calling addKeyword because the algorithm converts
@ -113,21 +134,37 @@ public class Trie {
* @return This builder. * @return This builder.
*/ */
public TrieBuilder ignoreCase() { public TrieBuilder ignoreCase() {
delegate.ignoreCase(); delegate.ignoreCase();
// this.trieConfig.setCaseInsensitive(true); // this.trieConfig.setCaseInsensitive(true);
return this; return this;
} }
/** /**
* Configure the Trie to ignore overlapping keywords. * Configure the Trie to ignore overlapping keywords.
* *
* @return This builder. * @return This builder.
*/ */
public TrieBuilder ignoreOverlaps() { public TrieBuilder ignoreOverlaps() {
delegate.ignoreOverlaps(); delegate.ignoreOverlaps();
return this; return this;
} }
/**
* Configure the Trie to ignore whitespaces.
*
* @return This builder.
*/
public TrieBuilder ignoreWhiteSpace() {
delegate.ignoreWhiteSpace();
return this;
}
/** /**
* Adds a keyword to the Trie's list of text search keywords. * Adds a keyword to the Trie's list of text search keywords.
* *
@ -136,10 +173,12 @@ public class Trie {
* @throws NullPointerException if the keyword is null. * @throws NullPointerException if the keyword is null.
*/ */
public TrieBuilder addKeyword(final String keyword) { public TrieBuilder addKeyword(final String keyword) {
delegate.addKeyword(keyword, null); delegate.addKeyword(keyword, null);
return this; return this;
} }
/** /**
* Adds a list of keywords to the Trie's list of text search keywords. * Adds a list of keywords to the Trie's list of text search keywords.
* *
@ -147,12 +186,14 @@ public class Trie {
* @return This builder. * @return This builder.
*/ */
public TrieBuilder addKeywords(final String... keywords) { public TrieBuilder addKeywords(final String... keywords) {
for (String keyword : keywords) { for (String keyword : keywords) {
delegate.addKeyword(keyword, null); delegate.addKeyword(keyword, null);
} }
return this; return this;
} }
/** /**
* Adds a list of keywords to the Trie's list of text search keywords. * Adds a list of keywords to the Trie's list of text search keywords.
* *
@ -160,23 +201,27 @@ public class Trie {
* @return This builder. * @return This builder.
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public TrieBuilder addKeywords( final Collection<String> keywords ) { public TrieBuilder addKeywords(final Collection<String> keywords) {
for (String keyword : keywords) { for (String keyword : keywords) {
this.delegate.addKeyword(keyword, null); this.delegate.addKeyword(keyword, null);
} }
return this; return this;
} }
/** /**
* Configure the Trie to match whole keywords in the text. * Configure the Trie to match whole keywords in the text.
* *
* @return This builder. * @return This builder.
*/ */
public TrieBuilder onlyWholeWords() { public TrieBuilder onlyWholeWords() {
this.delegate.onlyWholeWords(); this.delegate.onlyWholeWords();
return this; return this;
} }
/** /**
* Configure the Trie to match whole keywords that are separated by whitespace * Configure the Trie to match whole keywords that are separated by whitespace
* in the text. For example, "this keyword thatkeyword" would only match the * in the text. For example, "this keyword thatkeyword" would only match the
@ -185,44 +230,35 @@ public class Trie {
* @return This builder. * @return This builder.
*/ */
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() { public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
this.delegate.onlyWholeWordsWhiteSpaceSeparated(); this.delegate.onlyWholeWordsWhiteSpaceSeparated();
return this; return this;
} }
/** /**
* Configure the Trie to stop after the first keyword is found in the text. * Configure the Trie to stop after the first keyword is found in the text.
* *
* @return This builder. * @return This builder.
*/ */
public TrieBuilder stopOnHit() { public TrieBuilder stopOnHit() {
this.delegate.stopOnHit(); this.delegate.stopOnHit();
return this; return this;
} }
/** /**
* Configure the Trie based on the builder settings. * Configure the Trie based on the builder settings.
* *
* @return The configured Trie. * @return The configured Trie.
*/ */
public Trie build() { public Trie build() {
PayloadTrie<String> payloadTrie = this.delegate.build(); PayloadTrie<String> payloadTrie = this.delegate.build();
return new Trie(payloadTrie); return new Trie(payloadTrie);
} }
/**
* @return This builder.
* @deprecated Use ignoreCase()
*/
public TrieBuilder caseInsensitive() {
return ignoreCase();
}
/**
* @return This builder.
* @deprecated Use ignoreOverlaps()
*/
public TrieBuilder removeOverlaps() {
return ignoreOverlaps();
}
} }
} }

View File

@ -4,51 +4,86 @@ public class TrieConfig {
private boolean allowOverlaps = true; private boolean allowOverlaps = true;
private boolean onlyWholeWords = false; private boolean onlyWholeWords;
private boolean onlyWholeWordsWhiteSpaceSeparated = false; private boolean onlyWholeWordsWhiteSpaceSeparated;
private boolean caseInsensitive = false; private boolean caseInsensitive;
private boolean ignoreWhiteSpace;
private boolean stopOnHit;
private boolean stopOnHit = false;
public boolean isStopOnHit() { public boolean isStopOnHit() {
return stopOnHit; return stopOnHit;
} }
public void setStopOnHit(boolean stopOnHit) { public void setStopOnHit(boolean stopOnHit) {
this.stopOnHit = stopOnHit; this.stopOnHit = stopOnHit;
} }
public boolean isAllowOverlaps() { public boolean isAllowOverlaps() {
return allowOverlaps; return allowOverlaps;
} }
public void setAllowOverlaps(boolean allowOverlaps) { public void setAllowOverlaps(boolean allowOverlaps) {
this.allowOverlaps = allowOverlaps; this.allowOverlaps = allowOverlaps;
} }
public boolean isOnlyWholeWords() { public boolean isOnlyWholeWords() {
return onlyWholeWords; return onlyWholeWords;
} }
public void setOnlyWholeWords(boolean onlyWholeWords) { public void setOnlyWholeWords(boolean onlyWholeWords) {
this.onlyWholeWords = onlyWholeWords; this.onlyWholeWords = onlyWholeWords;
} }
public boolean isOnlyWholeWordsWhiteSpaceSeparated() { public boolean isOnlyWholeWordsWhiteSpaceSeparated() {
return onlyWholeWordsWhiteSpaceSeparated; return onlyWholeWordsWhiteSpaceSeparated;
} }
public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) { public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated; this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;
} }
public boolean isCaseInsensitive() { public boolean isCaseInsensitive() {
return caseInsensitive; return caseInsensitive;
} }
public boolean isIgnoreWhiteSpace() {
return ignoreWhiteSpace;
}
public void setCaseInsensitive(boolean caseInsensitive) { public void setCaseInsensitive(boolean caseInsensitive) {
this.caseInsensitive = caseInsensitive; this.caseInsensitive = caseInsensitive;
} }
public void setIgnoreWhiteSpace(boolean ignoreWhiteSpace) {
this.ignoreWhiteSpace = ignoreWhiteSpace;
}
} }

View File

@ -9,12 +9,16 @@ public abstract class AbstractStatefulEmitHandler implements StatefulEmitHandler
private final List<Emit> emits = new ArrayList<>(); private final List<Emit> emits = new ArrayList<>();
public void addEmit(final Emit emit) { public void addEmit(final Emit emit) {
this.emits.add(emit); this.emits.add(emit);
} }
@Override @Override
public List<Emit> getEmits() { public List<Emit> getEmits() {
return this.emits; return this.emits;
} }

View File

@ -9,12 +9,16 @@ public abstract class AbstractStatefulPayloadEmitHandler<T> implements StatefulP
private final List<PayloadEmit<T>> emits = new ArrayList<>(); private final List<PayloadEmit<T>> emits = new ArrayList<>();
public void addEmit(final PayloadEmit<T> emit) { public void addEmit(final PayloadEmit<T> emit) {
this.emits.add(emit); this.emits.add(emit);
} }
@Override @Override
public List<PayloadEmit<T>> getEmits() { public List<PayloadEmit<T>> getEmits() {
return this.emits; return this.emits;
} }

View File

@ -9,14 +9,19 @@ public class DefaultEmitHandler implements StatefulEmitHandler {
private final List<Emit> emits = new ArrayList<>(); private final List<Emit> emits = new ArrayList<>();
@Override @Override
public boolean emit(final Emit emit) { public boolean emit(final Emit emit) {
this.emits.add(emit); this.emits.add(emit);
return true; return true;
} }
@Override @Override
public List<Emit> getEmits() { public List<Emit> getEmits() {
return this.emits; return this.emits;
} }
} }

View File

@ -9,14 +9,19 @@ public class DefaultPayloadEmitHandler<T> implements StatefulPayloadEmitHandler<
private final List<PayloadEmit<T>> emits = new ArrayList<>(); private final List<PayloadEmit<T>> emits = new ArrayList<>();
@Override @Override
public boolean emit(final PayloadEmit<T> emit) { public boolean emit(final PayloadEmit<T> emit) {
this.emits.add(emit); this.emits.add(emit);
return true; return true;
} }
@Override @Override
public List<PayloadEmit<T>> getEmits() { public List<PayloadEmit<T>> getEmits() {
return this.emits; return this.emits;
} }
} }

View File

@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.Emit; import org.ahocorasick.trie.Emit;
public interface EmitHandler { public interface EmitHandler {
boolean emit(Emit emit); boolean emit(Emit emit);
} }

View File

@ -11,13 +11,17 @@ public class PayloadEmitDelegateHandler implements PayloadEmitHandler<String> {
private EmitHandler handler; private EmitHandler handler;
public PayloadEmitDelegateHandler(EmitHandler handler) { public PayloadEmitDelegateHandler(EmitHandler handler) {
this.handler = handler; this.handler = handler;
} }
@Override @Override
public boolean emit(PayloadEmit<String> emit) { public boolean emit(PayloadEmit<String> emit) {
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword()); Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
return handler.emit(newEmit); return handler.emit(newEmit);
} }

View File

@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.PayloadEmit; import org.ahocorasick.trie.PayloadEmit;
public interface PayloadEmitHandler<T> { public interface PayloadEmitHandler<T> {
boolean emit(PayloadEmit<T> emit); boolean emit(PayloadEmit<T> emit);
} }

View File

@ -5,5 +5,7 @@ import java.util.List;
import org.ahocorasick.trie.Emit; import org.ahocorasick.trie.Emit;
public interface StatefulEmitHandler extends EmitHandler { public interface StatefulEmitHandler extends EmitHandler {
List<Emit> getEmits(); List<Emit> getEmits();
} }

View File

@ -15,12 +15,16 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
private StatefulEmitHandler handler; private StatefulEmitHandler handler;
public StatefulPayloadEmitDelegateHandler(StatefulEmitHandler handler) { public StatefulPayloadEmitDelegateHandler(StatefulEmitHandler handler) {
this.handler = handler; this.handler = handler;
} }
private static List<PayloadEmit<String>> asEmits(Collection<Emit> emits) { private static List<PayloadEmit<String>> asEmits(Collection<Emit> emits) {
List<PayloadEmit<String>> result = new ArrayList<>(); List<PayloadEmit<String>> result = new ArrayList<>();
for (Emit emit : emits) { for (Emit emit : emits) {
result.add(new PayloadEmit<String>(emit.getStart(), emit.getEnd(), emit.getKeyword(), null)); result.add(new PayloadEmit<String>(emit.getStart(), emit.getEnd(), emit.getKeyword(), null));
@ -28,15 +32,20 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
return result; return result;
} }
@Override @Override
public boolean emit(PayloadEmit<String> emit) { public boolean emit(PayloadEmit<String> emit) {
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword()); Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
return handler.emit(newEmit); return handler.emit(newEmit);
} }
@Override @Override
public List<PayloadEmit<String>> getEmits() { public List<PayloadEmit<String>> getEmits() {
List<Emit> emits = this.handler.getEmits(); List<Emit> emits = this.handler.getEmits();
return asEmits(emits); return asEmits(emits);
} }
} }

View File

@ -4,6 +4,8 @@ import java.util.List;
import org.ahocorasick.trie.PayloadEmit; import org.ahocorasick.trie.PayloadEmit;
public interface StatefulPayloadEmitHandler<T> extends PayloadEmitHandler<T>{ public interface StatefulPayloadEmitHandler<T> extends PayloadEmitHandler<T> {
List<PayloadEmit<T>> getEmits(); List<PayloadEmit<T>> getEmits();
} }

View File

@ -12,38 +12,51 @@ public class IntervalTest {
@Test @Test
public void test_construct() { public void test_construct() {
final Interval i = new Interval(1, 3); final Interval i = new Interval(1, 3);
assertEquals(1, i.getStart()); assertEquals(1, i.getStart());
assertEquals(3, i.getEnd()); assertEquals(3, i.getEnd());
} }
@Test @Test
public void test_size() { public void test_size() {
assertEquals(3, new Interval(0, 2).size()); assertEquals(3, new Interval(0, 2).size());
} }
@Test @Test
public void test_intervaloverlaps() { public void test_intervaloverlaps() {
assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4))); assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4)));
} }
@Test @Test
public void test_intervalDoesNotOverlap() { public void test_intervalDoesNotOverlap() {
assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42))); assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42)));
} }
@Test @Test
public void test_pointOverlaps() { public void test_pointOverlaps() {
assertTrue(new Interval(1, 3).overlapsWith(2)); assertTrue(new Interval(1, 3).overlapsWith(2));
} }
@Test @Test
public void test_pointDoesNotOverlap() { public void test_pointDoesNotOverlap() {
assertFalse(new Interval(1, 13).overlapsWith(42)); assertFalse(new Interval(1, 13).overlapsWith(42));
} }
@Test @Test
public void test_comparable() { public void test_comparable() {
final Set<Interval> intervals = new TreeSet<>(); final Set<Interval> intervals = new TreeSet<>();
intervals.add(new Interval(4, 6)); intervals.add(new Interval(4, 6));
intervals.add(new Interval(2, 7)); intervals.add(new Interval(2, 7));
@ -54,13 +67,17 @@ public class IntervalTest {
assertEquals(4, it.next().getStart()); assertEquals(4, it.next().getStart());
} }
@Test @Test
public void test_checkToString() { public void test_checkToString() {
assertEquals("4:6", new Interval(4, 6).toString()); assertEquals("4:6", new Interval(4, 6).toString());
} }
@Test @Test
public void test_compareToNegativeTest() { public void test_compareToNegativeTest() {
assertEquals(-1, new Interval(4, 6).compareTo(new Object())); assertEquals(-1, new Interval(4, 6).compareTo(new Object()));
} }

View File

@ -12,6 +12,7 @@ public class IntervalTreeTest {
@Test @Test
public void findOverlaps() { public void findOverlaps() {
List<Intervalable> intervals = new ArrayList<>(); List<Intervalable> intervals = new ArrayList<>();
intervals.add(new Interval(0, 2)); intervals.add(new Interval(0, 2));
intervals.add(new Interval(1, 3)); intervals.add(new Interval(1, 3));
@ -28,8 +29,10 @@ public class IntervalTreeTest {
assertOverlap(overlapsIt.next(), 0, 2); assertOverlap(overlapsIt.next(), 0, 2);
} }
@Test @Test
public void removeOverlaps() { public void removeOverlaps() {
List<Intervalable> intervals = new ArrayList<>(); List<Intervalable> intervals = new ArrayList<>();
intervals.add(new Interval(0, 2)); intervals.add(new Interval(0, 2));
intervals.add(new Interval(4, 5)); intervals.add(new Interval(4, 5));
@ -43,7 +46,9 @@ public class IntervalTreeTest {
} }
protected void assertOverlap(Intervalable interval, int expectedStart, int expectedEnd) { protected void assertOverlap(Intervalable interval, int expectedStart, int expectedEnd) {
assertEquals(expectedStart, interval.getStart()); assertEquals(expectedStart, interval.getStart());
assertEquals(expectedEnd, interval.getEnd()); assertEquals(expectedEnd, interval.getEnd());
} }

View File

@ -12,6 +12,7 @@ public class IntervalableComparatorByPositionTest {
@Test @Test
public void sortOnPosition() { public void sortOnPosition() {
List<Intervalable> intervals = new ArrayList<Intervalable>(); List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4, 5)); intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4)); intervals.add(new Interval(1, 4));

View File

@ -12,6 +12,7 @@ public class IntervalableComparatorBySizeTest {
@Test @Test
public void sortOnSize() { public void sortOnSize() {
List<Intervalable> intervals = new ArrayList<Intervalable>(); List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4, 5)); intervals.add(new Interval(4, 5));
intervals.add(new Interval(1, 4)); intervals.add(new Interval(1, 4));
@ -22,8 +23,10 @@ public class IntervalableComparatorBySizeTest {
assertEquals(2, intervals.get(2).size()); assertEquals(2, intervals.get(2).size());
} }
@Test @Test
public void sortOnSizeThenPosition() { public void sortOnSizeThenPosition() {
List<Intervalable> intervals = new ArrayList<Intervalable>(); List<Intervalable> intervals = new ArrayList<Intervalable>();
intervals.add(new Interval(4, 7)); intervals.add(new Interval(4, 7));
intervals.add(new Interval(2, 5)); intervals.add(new Interval(2, 5));

View File

@ -15,18 +15,22 @@ public class EmitTest {
*/ */
@Test @Test
public void test_Equality_SameValues_ObjectsAreEqual() { public void test_Equality_SameValues_ObjectsAreEqual() {
final Emit one = new Emit(13, 42, null); final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 42, null); final Emit two = new Emit(13, 42, null);
assertEquals( one, two ); assertEquals(one, two);
} }
/** /**
* Test that two {@link Emit} instances having different values are equal. * Test that two {@link Emit} instances having different values are equal.
*/ */
@Test @Test
public void test_Equality_DifferingValues_ObjectsAreNotEqual() { public void test_Equality_DifferingValues_ObjectsAreNotEqual() {
final Emit one = new Emit(13, 42, null); final Emit one = new Emit(13, 42, null);
final Emit two = new Emit(13, 43, null); final Emit two = new Emit(13, 43, null);
assertNotEquals(one, two); assertNotEquals(one, two);
} }
} }

View File

@ -17,111 +17,117 @@ import static org.junit.Assert.*;
public class PayloadTrieTest { public class PayloadTrieTest {
private final static String[] ALPHABET = new String[] { "abc", "bcd", "cde" }; private final static String[] ALPHABET = new String[]{"abc", "bcd", "cde"};
private final static String[] ALPHABET_PAYLOAD = new String[] { "alpha:abc", "alpha:bcd", "alpha:cde" }; private final static String[] ALPHABET_PAYLOAD = new String[]{"alpha:abc", "alpha:bcd", "alpha:cde"};
private final static List<Payload<String>> ALPHABET_WITH_PAYLOADS = asList( private final static List<Payload<String>> ALPHABET_WITH_PAYLOADS = asList(new Payload<>(ALPHABET[0], ALPHABET_PAYLOAD[0]),
new Payload<>( ALPHABET[ 0 ], ALPHABET_PAYLOAD[ 0 ] ), new Payload<>(ALPHABET[1], ALPHABET_PAYLOAD[1]),
new Payload<>( ALPHABET[ 1 ], ALPHABET_PAYLOAD[ 1 ] ), new Payload<>(ALPHABET[2], ALPHABET_PAYLOAD[2]));
new Payload<>( ALPHABET[ 2 ], ALPHABET_PAYLOAD[ 2 ] ));
private final static String[] PRONOUNS = new String[] { "hers", "his", "she", "he" }; private final static String[] PRONOUNS = new String[]{"hers", "his", "she", "he"};
private final static int[] PRONOUNS_PAYLOAD_ID = new int[] { 9, 12, 4, 20 }; private final static int[] PRONOUNS_PAYLOAD_ID = new int[]{9, 12, 4, 20};
private final static List<Payload<Integer>> PRONOUNS_WITH_PAYLOADS = asList( private final static List<Payload<Integer>> PRONOUNS_WITH_PAYLOADS = asList(new Payload<>(PRONOUNS[0], PRONOUNS_PAYLOAD_ID[0]),
new Payload<>( PRONOUNS[ 0 ], PRONOUNS_PAYLOAD_ID[ 0 ] ), new Payload<>(PRONOUNS[1], PRONOUNS_PAYLOAD_ID[1]),
new Payload<>( PRONOUNS[ 1 ], PRONOUNS_PAYLOAD_ID[ 1 ] ), new Payload<>(PRONOUNS[2], PRONOUNS_PAYLOAD_ID[2]),
new Payload<>( PRONOUNS[ 2 ], PRONOUNS_PAYLOAD_ID[ 2 ] ), new Payload<>(PRONOUNS[3], PRONOUNS_PAYLOAD_ID[3]));
new Payload<>( PRONOUNS[ 3 ], PRONOUNS_PAYLOAD_ID[ 3 ] )
);
private final static String[] FOOD = new String[] { "veal", "cauliflower", "broccoli", "tomatoes" }; private final static String[] FOOD = new String[]{"veal", "cauliflower", "broccoli", "tomatoes"};
private final static Food[] FOOD_PAYLOAD = new Food[] { new Food("veal"), new Food("cauliflower"), new Food("broccoli"), private final static Food[] FOOD_PAYLOAD = new Food[]{new Food("veal"), new Food("cauliflower"), new Food("broccoli"), new Food("tomatoes")};
new Food("tomatoes") };
private final static List<Payload<Food>> FOOD_WITH_PAYLOADS = asList( private final static List<Payload<Food>> FOOD_WITH_PAYLOADS = asList(new Payload<>(FOOD[0], FOOD_PAYLOAD[0]),
new Payload<>( FOOD[ 0 ], FOOD_PAYLOAD[ 0 ] ), new Payload<>(FOOD[1], FOOD_PAYLOAD[1]),
new Payload<>( FOOD[ 1 ], FOOD_PAYLOAD[ 1 ] ), new Payload<>(FOOD[2], FOOD_PAYLOAD[2]),
new Payload<>( FOOD[ 2 ], FOOD_PAYLOAD[ 2 ] ), new Payload<>(FOOD[3], FOOD_PAYLOAD[3]));
new Payload<>( FOOD[ 3 ], FOOD_PAYLOAD[ 3 ] )
);
private final static String[] GREEK_LETTERS = new String[] { "Alpha", "Beta", "Gamma" }; private final static String[] GREEK_LETTERS = new String[]{"Alpha", "Beta", "Gamma"};
private final static String[] GREEK_LETTERS_PAYLOAD = new String[] { "greek:Alpha", "greek:Beta", "greek:Gamma" }; private final static String[] GREEK_LETTERS_PAYLOAD = new String[]{"greek:Alpha", "greek:Beta", "greek:Gamma"};
private final static List<Payload<String>> GREEK_LETTERS_WITH_PAYLOADS = asList( private final static List<Payload<String>> GREEK_LETTERS_WITH_PAYLOADS = asList(new Payload<>(GREEK_LETTERS[0], GREEK_LETTERS_PAYLOAD[0]),
new Payload<>( GREEK_LETTERS[ 0 ], GREEK_LETTERS_PAYLOAD[ 0 ] ), new Payload<>(GREEK_LETTERS[1], GREEK_LETTERS_PAYLOAD[1]),
new Payload<>( GREEK_LETTERS[ 1 ], GREEK_LETTERS_PAYLOAD[ 1 ] ), new Payload<>(GREEK_LETTERS[2], GREEK_LETTERS_PAYLOAD[2]));
new Payload<>( GREEK_LETTERS[ 2 ], GREEK_LETTERS_PAYLOAD[ 2 ] ));
private final static String[] UNICODE = new String[] { "turning", "once", "again", "börkü" }; private final static String[] UNICODE = new String[]{"turning", "once", "again", "börkü"};
private final static String[] UNICODE_PAYLOAD = new String[] { "uni:turning", "uni:once", "uni:again", "uni:börkü" }; private final static String[] UNICODE_PAYLOAD = new String[]{"uni:turning", "uni:once", "uni:again", "uni:börkü"};
private final static List<Payload<String>> UNICODE_WITH_PAYLOADS = asList( private final static List<Payload<String>> UNICODE_WITH_PAYLOADS = asList(new Payload<>(UNICODE[0], UNICODE_PAYLOAD[0]),
new Payload<>( UNICODE[ 0 ], UNICODE_PAYLOAD[ 0 ] ), new Payload<>(UNICODE[1], UNICODE_PAYLOAD[1]),
new Payload<>( UNICODE[ 1 ], UNICODE_PAYLOAD[ 1 ] ), new Payload<>(UNICODE[2], UNICODE_PAYLOAD[2]),
new Payload<>( UNICODE[ 2 ], UNICODE_PAYLOAD[ 2 ] ), new Payload<>(UNICODE[3], UNICODE_PAYLOAD[3]));
new Payload<>( UNICODE[ 3 ], UNICODE_PAYLOAD[ 3 ] ));
public static class Food { public static class Food {
private final String name; private final String name;
public Food(String name) { public Food(String name) {
this.name = name; this.name = name;
} }
@Override @Override
public int hashCode() { public int hashCode() {
final int prime = 31; final int prime = 31;
int result = 1; int result = 1;
result = prime * result + ((name == null) ? 0 : name.hashCode()); result = prime * result + ((name == null) ? 0 : name.hashCode());
return result; return result;
} }
@Override @Override
public boolean equals( Object obj ) { public boolean equals(Object obj) {
if( this == obj ) {
if (this == obj) {
return true; return true;
} }
if( obj == null ) { if (obj == null) {
return false; return false;
} }
if( getClass() != obj.getClass() ) { if (getClass() != obj.getClass()) {
return false; return false;
} }
Food other = (Food) obj; Food other = (Food) obj;
if( name == null ) { if (name == null) {
return other.name == null; return other.name == null;
} } else {
else { return name.equals(other.name);
return name.equals( other.name );
} }
} }
} }
@Test @Test
public void keywordAndTextAreTheSame() { public void keywordAndTextAreTheSame() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
Collection<PayloadEmit<String>> emits = trie.parseText(ALPHABET[0]); Collection<PayloadEmit<String>> emits = trie.parseText(ALPHABET[0]);
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]); checkEmit(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
} }
@Test @Test
public void keywordAndTextAreTheSameFirstMatch() { public void keywordAndTextAreTheSameFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
PayloadEmit<String> firstMatch = trie.firstMatch(ALPHABET[0]); PayloadEmit<String> firstMatch = trie.firstMatch(ALPHABET[0]);
checkEmit(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]); checkEmit(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
} }
@Test @Test
public void textIsLongerThanKeyword() { public void textIsLongerThanKeyword() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
Collection<PayloadEmit<String>> emits = trie.parseText(" " + ALPHABET[0]); Collection<PayloadEmit<String>> emits = trie.parseText(" " + ALPHABET[0]);
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
checkEmit(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]); checkEmit(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
} }
@Test @Test
public void textIsLongerThanKeywordFirstMatch() { public void textIsLongerThanKeywordFirstMatch() {
@ -130,23 +136,29 @@ public class PayloadTrieTest {
checkEmit(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]); checkEmit(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
} }
@Test @Test
public void variousKeywordsOneMatch() { public void variousKeywordsOneMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
Collection<PayloadEmit<String>> emits = trie.parseText("bcd"); Collection<PayloadEmit<String>> emits = trie.parseText("bcd");
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 2, "bcd", "alpha:bcd"); checkEmit(iterator.next(), 0, 2, "bcd", "alpha:bcd");
} }
@Test @Test
public void variousKeywordsFirstMatch() { public void variousKeywordsFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
PayloadEmit<String> firstMatch = trie.firstMatch("bcd"); PayloadEmit<String> firstMatch = trie.firstMatch("bcd");
checkEmit(firstMatch, 0, 2, "bcd", "alpha:bcd"); checkEmit(firstMatch, 0, 2, "bcd", "alpha:bcd");
} }
@Test @Test
public void ushersTestAndStopOnHit() { public void ushersTestAndStopOnHit() {
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build(); PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers"); Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers");
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5 assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
@ -154,15 +166,19 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 3, "he", 20); checkEmit(iterator.next(), 2, 3, "he", 20);
} }
@Test @Test
public void ushersTestStopOnHitSkipOne() { public void ushersTestStopOnHitSkipOne() {
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build(); PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
StatefulPayloadEmitHandler<Integer> testEmitHandler = new AbstractStatefulPayloadEmitHandler<Integer>() { StatefulPayloadEmitHandler<Integer> testEmitHandler = new AbstractStatefulPayloadEmitHandler<Integer>() {
boolean first = true; boolean first = true;
@Override @Override
public boolean emit(final PayloadEmit<Integer> emit) { public boolean emit(final PayloadEmit<Integer> emit) {
if (first) { if (first) {
// return false for the first element // return false for the first element
first = false; first = false;
@ -181,8 +197,10 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 1, 3, "she", 4); checkEmit(iterator.next(), 1, 3, "she", 4);
} }
@Test @Test
public void ushersTest() { public void ushersTest() {
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build(); PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers"); Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
@ -193,10 +211,17 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "hers", 9); checkEmit(iterator.next(), 2, 5, "hers", 9);
} }
@Test @Test
public void ushersTestWithCapitalKeywords() { public void ushersTestWithCapitalKeywords() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("HERS", "hers").addKeyword("HIS", "his")
.addKeyword("SHE", "she").addKeyword("HE", "he").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder()
.ignoreCase()
.addKeyword("HERS", "hers")
.addKeyword("HIS", "his")
.addKeyword("SHE", "she")
.addKeyword("HE", "he")
.build();
Collection<PayloadEmit<String>> emits = trie.parseText("ushers"); Collection<PayloadEmit<String>> emits = trie.parseText("ushers");
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
@ -205,15 +230,19 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "HERS", "hers"); checkEmit(iterator.next(), 2, 5, "HERS", "hers");
} }
@Test @Test
public void ushersTestFirstMatch() { public void ushersTestFirstMatch() {
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build(); PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
PayloadEmit<Integer> firstMatch = trie.firstMatch("ushers"); PayloadEmit<Integer> firstMatch = trie.firstMatch("ushers");
checkEmit(firstMatch, 2, 3, "he", 20); checkEmit(firstMatch, 2, 3, "he", 20);
} }
@Test @Test
public void ushersTestByCallback() { public void ushersTestByCallback() {
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build(); PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
final List<PayloadEmit<Integer>> emits = new LinkedList<>(); final List<PayloadEmit<Integer>> emits = new LinkedList<>();
@ -230,23 +259,29 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 5, "hers", 9); checkEmit(iterator.next(), 2, 5, "hers", 9);
} }
@Test @Test
public void misleadingTest() { public void misleadingTest() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build();
Collection<PayloadEmit<String>> emits = trie.parseText("h he her hers"); Collection<PayloadEmit<String>> emits = trie.parseText("h he her hers");
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
checkEmit(iterator.next(), 9, 12, "hers", "pronon:hers"); checkEmit(iterator.next(), 9, 12, "hers", "pronon:hers");
} }
@Test @Test
public void misleadingTestFirstMatch() { public void misleadingTestFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build();
PayloadEmit<String> firstMatch = trie.firstMatch("h he her hers"); PayloadEmit<String> firstMatch = trie.firstMatch("h he her hers");
checkEmit(firstMatch, 9, 12, "hers", "pronon:hers"); checkEmit(firstMatch, 9, 12, "hers", "pronon:hers");
} }
@Test @Test
public void recipes() { public void recipes() {
PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build(); PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build();
Collection<PayloadEmit<Food>> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); Collection<PayloadEmit<Food>> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
Iterator<PayloadEmit<Food>> iterator = emits.iterator(); Iterator<PayloadEmit<Food>> iterator = emits.iterator();
@ -256,17 +291,20 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 51, 58, "broccoli", new Food("broccoli")); checkEmit(iterator.next(), 51, 58, "broccoli", new Food("broccoli"));
} }
@Test @Test
public void recipesFirstMatch() { public void recipesFirstMatch() {
PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build(); PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build();
PayloadEmit<Food> firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli"); PayloadEmit<Food> firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
checkEmit(firstMatch, 2, 12, "cauliflower", new Food("cauliflower")); checkEmit(firstMatch, 2, 12, "cauliflower", new Food("cauliflower"));
} }
@Test @Test
public void longAndShortOverlappingMatch() { public void longAndShortOverlappingMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage")
.build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage").build();
Collection<PayloadEmit<String>> emits = trie.parseText("hehehehehe"); Collection<PayloadEmit<String>> emits = trie.parseText("hehehehehe");
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
checkEmit(iterator.next(), 0, 1, "he", "pronon:he"); checkEmit(iterator.next(), 0, 1, "he", "pronon:he");
@ -278,10 +316,16 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 2, 9, "hehehehe", "garbage"); checkEmit(iterator.next(), 2, 9, "hehehehe", "garbage");
} }
@Test @Test
public void nonOverlapping() { public void nonOverlapping() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder()
.ignoreOverlaps()
.addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba")
.addKeyword("ababc", "alpha:ababc")
.build();
Collection<PayloadEmit<String>> emits = trie.parseText("ababcbab"); Collection<PayloadEmit<String>> emits = trie.parseText("ababcbab");
assertEquals(2, emits.size()); assertEquals(2, emits.size());
Iterator<PayloadEmit<String>> iterator = emits.iterator(); Iterator<PayloadEmit<String>> iterator = emits.iterator();
@ -290,49 +334,79 @@ public class PayloadTrieTest {
checkEmit(iterator.next(), 6, 7, "ab", "alpha:ab"); checkEmit(iterator.next(), 6, 7, "ab", "alpha:ab");
} }
@Test @Test
public void nonOverlappingFirstMatch() { public void nonOverlappingFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder()
.ignoreOverlaps()
.addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba")
.addKeyword("ababc", "alpha:ababc")
.build();
PayloadEmit<String> firstMatch = trie.firstMatch("ababcbab"); PayloadEmit<String> firstMatch = trie.firstMatch("ababcbab");
checkEmit(firstMatch, 0, 4, "ababc", "alpha:ababc"); checkEmit(firstMatch, 0, 4, "ababc", "alpha:ababc");
} }
@Test @Test
public void containsMatch() { public void containsMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder()
.ignoreOverlaps()
.addKeyword("ab", "alpha:ab")
.addKeyword("cba", "alpha:cba")
.addKeyword("ababc", "alpha:ababc")
.build();
assertTrue(trie.containsMatch("ababcbab")); assertTrue(trie.containsMatch("ababcbab"));
} }
@Test @Test
public void startOfChurchillSpeech() { public void startOfChurchillSpeech() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
.addKeyword("r").addKeyword("urn").addKeyword("ni").addKeyword("i").addKeyword("in").addKeyword("n") PayloadTrie<String> trie = PayloadTrie.<String>builder()
.addKeyword("urning").build(); .ignoreOverlaps()
.addKeyword("T")
.addKeyword("u")
.addKeyword("ur")
.addKeyword("r")
.addKeyword("urn")
.addKeyword("ni")
.addKeyword("i")
.addKeyword("in")
.addKeyword("n")
.addKeyword("urning")
.build();
Collection<PayloadEmit<String>> emits = trie.parseText("Turning"); Collection<PayloadEmit<String>> emits = trie.parseText("Turning");
assertEquals(2, emits.size()); assertEquals(2, emits.size());
} }
@Test @Test
public void partialMatch() { public void partialMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build();
Collection<PayloadEmit<String>> emits = trie.parseText("sugarcane sugarcane sugar canesugar"); // left, middle, right test Collection<PayloadEmit<String>> emits = trie.parseText("sugarcane sugarcane sugar canesugar"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made assertEquals(1, emits.size()); // Match must not be made
checkEmit(emits.iterator().next(), 20, 24, "sugar", "food:sugar"); checkEmit(emits.iterator().next(), 20, 24, "sugar", "food:sugar");
} }
@Test @Test
public void partialMatchFirstMatch() { public void partialMatchFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build();
PayloadEmit<String> firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar"); // left, middle, right test PayloadEmit<String> firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar"); // left, middle, right test
checkEmit(firstMatch, 20, 24, "sugar", "food:sugar"); checkEmit(firstMatch, 20, 24, "sugar", "food:sugar");
} }
@Test @Test
public void tokenizeFullSentence() { public void tokenizeFullSentence() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build();
Collection<PayloadToken<String>> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve"); Collection<PayloadToken<String>> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
assertEquals(7, tokens.size()); assertEquals(7, tokens.size());
@ -346,11 +420,12 @@ public class PayloadTrieTest {
assertEquals(" in reserve", tokensIt.next().getFragment()); assertEquals(" in reserve", tokensIt.next().getFragment());
} }
// @see https://github.com/robert-bor/aho-corasick/issues/5 // @see https://github.com/robert-bor/aho-corasick/issues/5
@Test @Test
public void testStringIndexOutOfBoundsException() { public void testStringIndexOutOfBoundsException() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeywords(UNICODE_WITH_PAYLOADS)
.build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeywords(UNICODE_WITH_PAYLOADS).build();
Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
assertEquals(4, emits.size()); // Match must not be made assertEquals(4, emits.size()); // Match must not be made
Iterator<PayloadEmit<String>> it = emits.iterator(); Iterator<PayloadEmit<String>> it = emits.iterator();
@ -361,8 +436,10 @@ public class PayloadTrieTest {
checkEmit(it.next(), 19, 23, "börkü", "uni:börkü"); checkEmit(it.next(), 19, 23, "börkü", "uni:börkü");
} }
@Test @Test
public void testIgnoreCase() { public void testIgnoreCase() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build();
Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ"); Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
assertEquals(4, emits.size()); // Match must not be made assertEquals(4, emits.size()); // Match must not be made
@ -374,65 +451,75 @@ public class PayloadTrieTest {
checkEmit(it.next(), 19, 23, "börkü", "uni:börkü"); checkEmit(it.next(), 19, 23, "börkü", "uni:börkü");
} }
@Test @Test
public void testIgnoreCaseFirstMatch() { public void testIgnoreCaseFirstMatch() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build();
PayloadEmit<String> firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ"); PayloadEmit<String> firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
checkEmit(firstMatch, 0, 6, "turning", "uni:turning"); checkEmit(firstMatch, 0, 6, "turning", "uni:turning");
} }
@Test @Test
public void tokenizeTokensInSequence() { public void tokenizeTokensInSequence() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build();
Collection<PayloadToken<String>> tokens = trie.tokenize("Alpha Beta Gamma"); Collection<PayloadToken<String>> tokens = trie.tokenize("Alpha Beta Gamma");
assertEquals(5, tokens.size()); assertEquals(5, tokens.size());
} }
// @see https://github.com/robert-bor/aho-corasick/issues/7 // @see https://github.com/robert-bor/aho-corasick/issues/7
@Test @Test
public void testZeroLength() { public void testZeroLength() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().onlyWholeWords().ignoreCase().addKeyword("")
.build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().onlyWholeWords().ignoreCase().addKeyword("").build();
trie.tokenize( trie.tokenize(
"Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel."); "Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel.");
} }
// @see https://github.com/robert-bor/aho-corasick/issues/8 // @see https://github.com/robert-bor/aho-corasick/issues/8
@Test @Test
public void testUnicode1() { public void testUnicode1() {
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this") PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this").build();
.build();
Collection<PayloadEmit<String>> emits = trie.parseText(target); Collection<PayloadEmit<String>> emits = trie.parseText(target);
assertEquals(1, emits.size()); assertEquals(1, emits.size());
Iterator<PayloadEmit<String>> it = emits.iterator(); Iterator<PayloadEmit<String>> it = emits.iterator();
checkEmit(it.next(), 5, 8, "this", "pronon:this"); checkEmit(it.next(), 5, 8, "this", "pronon:this");
} }
// @see https://github.com/robert-bor/aho-corasick/issues/8 // @see https://github.com/robert-bor/aho-corasick/issues/8
@Test @Test
public void testUnicode2() { public void testUnicode2() {
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this") PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this").build();
.build();
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
PayloadEmit<String> firstMatch = trie.firstMatch(target); PayloadEmit<String> firstMatch = trie.firstMatch(target);
checkEmit(firstMatch, 5, 8, "this", "pronon:this"); checkEmit(firstMatch, 5, 8, "this", "pronon:this");
} }
@Test @Test
public void testPartialMatchWhiteSpaces() { public void testPartialMatchWhiteSpaces() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWordsWhiteSpaceSeparated()
.addKeyword("#sugar-123", "sugar").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWordsWhiteSpaceSeparated().addKeyword("#sugar-123", "sugar").build();
Collection<PayloadEmit<String>> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test Collection<PayloadEmit<String>> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
assertEquals(1, emits.size()); // Match must not be made assertEquals(1, emits.size()); // Match must not be made
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123", "sugar"); checkEmit(emits.iterator().next(), 0, 9, "#sugar-123", "sugar");
} }
@Test @Test
public void testLargeString() { public void testLargeString() {
final int interval = 100; final int interval = 100;
final int textSize = 1000000; final int textSize = 1000000;
final String keyword = FOOD[1]; final String keyword = FOOD[1];
@ -448,17 +535,21 @@ public class PayloadTrieTest {
assertEquals(textSize / interval, emits.size()); assertEquals(textSize / interval, emits.size());
} }
@Test @Test
public void test_containsMatchWithCaseInsensitive() { public void test_containsMatchWithCaseInsensitive() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("foo", "bar").build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("foo", "bar").build();
assertTrue(trie.containsMatch("FOOBAR")); assertTrue(trie.containsMatch("FOOBAR"));
assertFalse(trie.containsMatch("FO!?AR")); assertFalse(trie.containsMatch("FO!?AR"));
} }
// @see https://github.com/robert-bor/aho-corasick/issues/85 // @see https://github.com/robert-bor/aho-corasick/issues/85
@Test @Test
public void test_wholeWords() { public void test_wholeWords() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWords().build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWords().build();
// access via PayloadTrie.parseText(CharSequence) // access via PayloadTrie.parseText(CharSequence)
Collection<PayloadEmit<String>> result1 = trie.parseText("foobar"); Collection<PayloadEmit<String>> result1 = trie.parseText("foobar");
@ -470,9 +561,11 @@ public class PayloadTrieTest {
assertEquals(result1, result2); assertEquals(result1, result2);
} }
// @see https://github.com/robert-bor/aho-corasick/issues/85 // @see https://github.com/robert-bor/aho-corasick/issues/85
@Test @Test
public void test_wholeWordsWhiteSpaceSeparated() { public void test_wholeWordsWhiteSpaceSeparated() {
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWordsWhiteSpaceSeparated().build(); PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWordsWhiteSpaceSeparated().build();
// access via PayloadTrie.parseText(CharSequence) // access via PayloadTrie.parseText(CharSequence)
Collection<PayloadEmit<String>> result1 = trie.parseText("foo#bar"); Collection<PayloadEmit<String>> result1 = trie.parseText("foo#bar");
@ -484,39 +577,31 @@ public class PayloadTrieTest {
assertEquals(result1, result2); assertEquals(result1, result2);
} }
private void checkEmit(
final PayloadEmit<Food> next, private void checkEmit(final PayloadEmit<Food> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final Food expectedPayload) {
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final Food expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload()); assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
} }
private void checkEmit(
final PayloadEmit<Integer> next, private void checkEmit(final PayloadEmit<Integer> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final Integer expectedPayload) {
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final Integer expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload()); assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
} }
private void checkEmit(
final PayloadEmit<String> next, private void checkEmit(final PayloadEmit<String> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final String expectedPayload) {
final int expectedStart,
final int expectedEnd,
final String expectedKeyword,
final String expectedPayload) {
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart()); assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd()); assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword()); assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload()); assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
} }
} }

View File

@ -11,11 +11,9 @@ public class StateTest {
@Test @Test
public void test_constructSequenceOfCharacters() { public void test_constructSequenceOfCharacters() {
final State rootState = new State(); final State rootState = new State();
rootState rootState.addState('a').addState('b').addState('c');
.addState('a')
.addState('b')
.addState('c');
State currentState = rootState.nextState('a'); State currentState = rootState.nextState('a');
assertEquals(1, currentState.getDepth()); assertEquals(1, currentState.getDepth());
currentState = currentState.nextState('b'); currentState = currentState.nextState('b');
@ -26,8 +24,10 @@ public class StateTest {
assertNull(currentState); assertNull(currentState);
} }
@Test @Test
public void test_getStates() { public void test_getStates() {
final State rootState = new State(); final State rootState = new State();
rootState.addState("foo"); rootState.addState("foo");
final State currentState = rootState.nextState('f'); final State currentState = rootState.nextState('f');
@ -37,8 +37,10 @@ public class StateTest {
assertEquals(currentState, states.iterator().next()); assertEquals(currentState, states.iterator().next());
} }
@Test @Test
public void test_getTransitions() { public void test_getTransitions() {
final State rootState = new State(); final State rootState = new State();
rootState.addState("foo"); rootState.addState("foo");
final State currentState = rootState.nextState('f'); final State currentState = rootState.nextState('f');
@ -48,20 +50,23 @@ public class StateTest {
assertEquals(Character.valueOf('f'), transitions.iterator().next()); assertEquals(Character.valueOf('f'), transitions.iterator().next());
} }
@Test @Test
public void test_failure() { public void test_getFailure() {
final State failureState = new State(); final State failureState = new State();
final State rootState = new State(); final State rootState = new State();
rootState.setFailure(failureState); rootState.setFailure(failureState);
assertEquals(failureState, rootState.failure()); assertEquals(failureState, rootState.getFailure());
} }
@Test @Test
public void test_checkEmits() { public void test_checkEmits() {
final State rootState = new State(); final State rootState = new State();
rootState.addState('a') rootState.addState('a').addEmit(Collections.singleton("tag"));
.addEmit(Collections.singleton("tag"));
final Collection<String> actual = rootState.nextState('a').emit(); final Collection<String> actual = rootState.nextState('a').emit();
assertEquals(1, actual.size()); assertEquals(1, actual.size());

View File

@ -6,39 +6,42 @@ import static java.util.concurrent.ThreadLocalRandom.current;
* Contains functionality common to tests. * Contains functionality common to tests.
*/ */
public class TestHelper { public class TestHelper {
/**
* Injects keywords into a string builder.
*
* @param source Should contain a bunch of random data that cannot match
* any keyword.
* @param keyword A keyword to inject repeatedly in the text.
* @param interval How often to inject the keyword.
*/
@SuppressWarnings( "SameParameterValue" )
static void injectKeyword(
final StringBuilder source,
final String keyword,
final int interval ) {
final int length = source.length();
for( int i = 0; i < length; i += interval ) {
source.replace( i, i + keyword.length(), keyword );
}
}
/** /**
* Generates a random sequence of ASCII numbers. * Injects keywords into a string builder.
* *
* @param count The number of numbers to generate. * @param source Should contain a bunch of random data that cannot match
* @return A character sequence filled with random digits. * any keyword.
*/ * @param keyword A keyword to inject repeatedly in the text.
@SuppressWarnings( "SameParameterValue" ) * @param interval How often to inject the keyword.
public static StringBuilder randomNumbers( int count ) { */
final StringBuilder sb = new StringBuilder( count ); @SuppressWarnings("SameParameterValue")
static void injectKeyword(final StringBuilder source, final String keyword, final int interval) {
while( --count > 0 ) { final int length = source.length();
sb.append( current().nextInt( 0, 10 ) ); for (int i = 0; i < length; i += interval) {
source.replace(i, i + keyword.length(), keyword);
}
}
/**
* Generates a random sequence of ASCII numbers.
*
* @param count The number of numbers to generate.
* @return A character sequence filled with random digits.
*/
@SuppressWarnings("SameParameterValue")
public static StringBuilder randomNumbers(int count) {
int localCount = count;
final StringBuilder sb = new StringBuilder(localCount);
while (--localCount > 0) {
sb.append(current().nextInt(0, 10));
}
return sb;
} }
return sb;
}
} }

File diff suppressed because it is too large Load Diff