migrate to gradle
This commit is contained in:
parent
0856732f88
commit
45494521d7
@ -1,19 +1,21 @@
|
||||
stages:
|
||||
- test
|
||||
- versioning
|
||||
- deploy
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
GIT_SUBMODULE_FORCE_HTTPS: 'true'
|
||||
include:
|
||||
- project: 'gitlab/gitlab'
|
||||
ref: 'main'
|
||||
file: 'ci-templates/maven_deps.yml'
|
||||
file: 'ci-templates/gradle_java.yml'
|
||||
|
||||
verify:
|
||||
stage: test
|
||||
|
||||
deploy:
|
||||
stage: deploy
|
||||
tags:
|
||||
- dind
|
||||
script:
|
||||
- echo "Erfolgreich getestet"
|
||||
|
||||
- echo "Building with gradle version ${BUILDVERSION}"
|
||||
- gradle -Pversion=${BUILDVERSION} publish
|
||||
- echo "BUILDVERSION=$BUILDVERSION" >> version.env
|
||||
artifacts:
|
||||
reports:
|
||||
dotenv: version.env
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
||||
- if: $CI_COMMIT_BRANCH =~ /^release/
|
||||
- if: $CI_COMMIT_TAG
|
||||
@ -1,6 +0,0 @@
|
||||
language: java
|
||||
install: mvn install -DskipTests=true -Dgpg.skip=true
|
||||
jdk:
|
||||
- openjdk8
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
||||
63
build.gradle.kts
Normal file
63
build.gradle.kts
Normal file
@ -0,0 +1,63 @@
|
||||
plugins {
|
||||
`java-library`
|
||||
`maven-publish`
|
||||
pmd
|
||||
checkstyle
|
||||
id("io.freefair.lombok") version "8.4"
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/gindev/")
|
||||
credentials {
|
||||
username = providers.gradleProperty("mavenUser").getOrNull();
|
||||
password = providers.gradleProperty("mavenPassword").getOrNull();
|
||||
}
|
||||
}
|
||||
|
||||
maven {
|
||||
url = uri("https://repo.maven.apache.org/maven2/")
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
testImplementation("junit:junit:4.13.2")
|
||||
}
|
||||
|
||||
group = "org.ahocorasick"
|
||||
description = "Aho-CoraSick algorithm for efficient string matching"
|
||||
java.sourceCompatibility = JavaVersion.VERSION_17
|
||||
java.targetCompatibility = JavaVersion.VERSION_17
|
||||
|
||||
java {
|
||||
withSourcesJar()
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications.create<MavenPublication>("maven") {
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
|
||||
tasks.withType<JavaCompile>() {
|
||||
options.encoding = "UTF-8"
|
||||
}
|
||||
|
||||
tasks.withType<Javadoc>() {
|
||||
options.encoding = "UTF-8"
|
||||
}
|
||||
|
||||
pmd {
|
||||
isConsoleOutput = true
|
||||
}
|
||||
|
||||
tasks.pmdMain {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
|
||||
}
|
||||
|
||||
tasks.pmdTest {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
|
||||
}
|
||||
|
||||
38
config/checkstyle/checkstyle.xml
Normal file
38
config/checkstyle/checkstyle.xml
Normal file
@ -0,0 +1,38 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
||||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
||||
<module name="Checker">
|
||||
<property
|
||||
name="severity"
|
||||
value="error"/>
|
||||
<module name="TreeWalker">
|
||||
<module name="SuppressWarningsHolder"/>
|
||||
<module name="MissingDeprecated"/>
|
||||
<module name="MissingOverride"/>
|
||||
<module name="AnnotationLocation"/>
|
||||
<module name="NonEmptyAtclauseDescription"/>
|
||||
<module name="IllegalImport"/>
|
||||
<module name="RedundantImport"/>
|
||||
<module name="RedundantModifier"/>
|
||||
<module name="EmptyBlock"/>
|
||||
<module name="DefaultComesLast"/>
|
||||
<module name="EmptyStatement"/>
|
||||
<module name="EqualsHashCode"/>
|
||||
<module name="ExplicitInitialization"/>
|
||||
<module name="IllegalInstantiation"/>
|
||||
<module name="ModifiedControlVariable"/>
|
||||
<module name="MultipleVariableDeclarations"/>
|
||||
<module name="PackageDeclaration"/>
|
||||
<module name="ParameterAssignment"/>
|
||||
<module name="SimplifyBooleanExpression"/>
|
||||
<module name="SimplifyBooleanReturn"/>
|
||||
<module name="StringLiteralEquality"/>
|
||||
<module name="OneStatementPerLine"/>
|
||||
<module name="FinalClass"/>
|
||||
<module name="ArrayTypeStyle"/>
|
||||
<module name="UpperEll"/>
|
||||
<module name="OuterTypeFilename"/>
|
||||
</module>
|
||||
<module name="FileTabCharacter"/>
|
||||
<module name="SuppressWarningsFilter"/>
|
||||
</module>
|
||||
21
config/pmd/pmd.xml
Normal file
21
config/pmd/pmd.xml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
<rule ref="category/java/errorprone.xml">
|
||||
<exclude name="MissingSerialVersionUID"/>
|
||||
<exclude name="AvoidLiteralsInIfCondition"/>
|
||||
<exclude name="DataflowAnomalyAnalysis"/>
|
||||
<exclude name="AvoidDuplicateLiterals"/>
|
||||
<exclude name="NullAssignment"/>
|
||||
<exclude name="AssignmentInOperand"/>
|
||||
<exclude name="BeanMembersShouldSerialize"/>
|
||||
</rule>
|
||||
|
||||
</ruleset>
|
||||
11
config/pmd/test_pmd.xml
Normal file
11
config/pmd/test_pmd.xml
Normal file
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon test ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
</ruleset>
|
||||
1
gradle.properties.kts
Normal file
1
gradle.properties.kts
Normal file
@ -0,0 +1 @@
|
||||
version = 0.7-SNAPSHOT
|
||||
183
pom.xml
183
pom.xml
@ -1,183 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.ahocorasick</groupId>
|
||||
<artifactId>ahocorasick</artifactId>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
<name>Aho-CoraSick algorithm for efficient string matching</name>
|
||||
<description>Java library for efficient string matching against a large set of keywords</description>
|
||||
<inceptionYear>2014</inceptionYear>
|
||||
<url>https://github.com/robert-bor/aho-corasick</url>
|
||||
|
||||
<distributionManagement>
|
||||
<snapshotRepository>
|
||||
<id>ossrh</id>
|
||||
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
|
||||
</snapshotRepository>
|
||||
</distributionManagement>
|
||||
|
||||
<organization>
|
||||
<name>42 BV</name>
|
||||
<url>http://blog.42.nl/</url>
|
||||
</organization>
|
||||
|
||||
<licenses>
|
||||
<license>
|
||||
<name>The Apache Software License, Version 2.0</name>
|
||||
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
|
||||
<distribution>repo</distribution>
|
||||
</license>
|
||||
</licenses>
|
||||
|
||||
<scm>
|
||||
<url>scm:git://github.com/robert-bor/aho-corasick</url>
|
||||
<connection>scm:git://github.com/robert-bor/aho-corasick</connection>
|
||||
</scm>
|
||||
|
||||
<developers>
|
||||
<developer>
|
||||
<name>Robert Bor</name>
|
||||
<organization>42</organization>
|
||||
</developer>
|
||||
<developer>
|
||||
<name>Daniel Beck</name>
|
||||
<organization>neoSearch UG (haftungsbeschränkt)</organization>
|
||||
</developer>
|
||||
<developer>
|
||||
<name>Dave Jarvis</name>
|
||||
<organization>White Magic Software, Ltd.</organization>
|
||||
</developer>
|
||||
</developers>
|
||||
|
||||
<properties>
|
||||
<java.version>1.8</java.version>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
|
||||
<junit.version>4.13.2</junit.version>
|
||||
<!-- Reporting -->
|
||||
<maven.cobertura.version>2.5.2</maven.cobertura.version>
|
||||
<maven.javadoc.version>2.8</maven.javadoc.version>
|
||||
<maven.project.version>2.4</maven.project.version>
|
||||
<maven.site.plugin.version>3.3</maven.site.plugin.version>
|
||||
</properties>
|
||||
|
||||
<!-- <repositories>
|
||||
<repository>
|
||||
<id>central</id>
|
||||
<name>Maven Repository Switchboard</name>
|
||||
<url>https://repo1.maven.org/maven2/</url>
|
||||
</repository>
|
||||
</repositories> -->
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- Used for unit testing -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<defaultGoal>install</defaultGoal>
|
||||
<plugins>
|
||||
|
||||
<!-- <plugin>
|
||||
<groupId>org.sonatype.plugins</groupId>
|
||||
<artifactId>nexus-staging-maven-plugin</artifactId>
|
||||
<version>1.6.8</version>
|
||||
<extensions>true</extensions>
|
||||
<configuration>
|
||||
<serverId>ossrh</serverId>
|
||||
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
||||
<autoReleaseAfterClose>false</autoReleaseAfterClose>
|
||||
</configuration>
|
||||
</plugin> -->
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<configuration>
|
||||
<source>${java.version}</source>
|
||||
<target>${java.version}</target>
|
||||
<encoding>${project.build.sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-javadoc-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-javadocs</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<source>8</source>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
<goals>
|
||||
<goal>jar-no-fork</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- <plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>1.5</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
<phase>verify</phase>
|
||||
<goals>
|
||||
<goal>sign</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin> -->
|
||||
|
||||
<plugin>
|
||||
<groupId>org.jacoco</groupId>
|
||||
<artifactId>jacoco-maven-plugin</artifactId>
|
||||
<version>0.8.6</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>prepare-agent</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>report</id>
|
||||
<phase>test</phase>
|
||||
<goals>
|
||||
<goal>report</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
1
settings.gradle.kts
Normal file
1
settings.gradle.kts
Normal file
@ -0,0 +1 @@
|
||||
rootProject.name = "ahocorasick"
|
||||
@ -12,6 +12,7 @@ public class Interval implements Intervalable {
|
||||
private final int start;
|
||||
private final int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an interval with a start and end position.
|
||||
*
|
||||
@ -19,10 +20,12 @@ public class Interval implements Intervalable {
|
||||
* @param end The interval's ending text position.
|
||||
*/
|
||||
public Interval(final int start, final int end) {
|
||||
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the starting offset into the text for this interval.
|
||||
*
|
||||
@ -30,9 +33,11 @@ public class Interval implements Intervalable {
|
||||
*/
|
||||
@Override
|
||||
public int getStart() {
|
||||
|
||||
return this.start;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the ending offset into the text for this interval.
|
||||
*
|
||||
@ -40,9 +45,11 @@ public class Interval implements Intervalable {
|
||||
*/
|
||||
@Override
|
||||
public int getEnd() {
|
||||
|
||||
return this.end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of the interval.
|
||||
*
|
||||
@ -50,9 +57,11 @@ public class Interval implements Intervalable {
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
|
||||
return end - start + 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Answers whether the given interval overlaps this interval
|
||||
* instance.
|
||||
@ -61,31 +70,38 @@ public class Interval implements Intervalable {
|
||||
* @return true The intervals overlap.
|
||||
*/
|
||||
public boolean overlapsWith(final Interval other) {
|
||||
return this.start <= other.getEnd() &&
|
||||
this.end >= other.getStart();
|
||||
|
||||
return this.start <= other.getEnd() && this.end >= other.getStart();
|
||||
}
|
||||
|
||||
|
||||
public boolean overlapsWith(int point) {
|
||||
|
||||
return this.start <= point && point <= this.end;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
|
||||
if (!(o instanceof Intervalable)) {
|
||||
return false;
|
||||
}
|
||||
Intervalable other = (Intervalable) o;
|
||||
return this.start == other.getStart() &&
|
||||
this.end == other.getEnd();
|
||||
return this.start == other.getStart() && this.end == other.getEnd();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return this.start % 100 + this.end % 100;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(Object o) {
|
||||
|
||||
if (!(o instanceof Intervalable)) {
|
||||
return -1;
|
||||
}
|
||||
@ -94,6 +110,7 @@ public class Interval implements Intervalable {
|
||||
return comparison != 0 ? comparison : this.end - other.getEnd();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the starting offset and ending offset separated
|
||||
* by a full colon (:).
|
||||
@ -102,6 +119,8 @@ public class Interval implements Intervalable {
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return this.start + ":" + this.end;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -6,14 +6,19 @@ import java.util.List;
|
||||
|
||||
public class IntervalNode {
|
||||
|
||||
private enum Direction {LEFT, RIGHT}
|
||||
private enum Direction {
|
||||
LEFT,
|
||||
RIGHT
|
||||
}
|
||||
|
||||
private IntervalNode left;
|
||||
private IntervalNode right;
|
||||
private int point;
|
||||
private List<Intervalable> intervals = new ArrayList<>();
|
||||
|
||||
|
||||
public IntervalNode(final List<Intervalable> intervals) {
|
||||
|
||||
this.point = determineMedian(intervals);
|
||||
|
||||
final List<Intervalable> toLeft = new ArrayList<>();
|
||||
@ -37,7 +42,9 @@ public class IntervalNode {
|
||||
}
|
||||
}
|
||||
|
||||
public int determineMedian(final List<Intervalable> intervals) {
|
||||
|
||||
private int determineMedian(final List<Intervalable> intervals) {
|
||||
|
||||
int start = -1;
|
||||
int end = -1;
|
||||
for (Intervalable interval : intervals) {
|
||||
@ -53,7 +60,9 @@ public class IntervalNode {
|
||||
return (start + end) / 2;
|
||||
}
|
||||
|
||||
|
||||
public List<Intervalable> findOverlaps(final Intervalable interval) {
|
||||
|
||||
final List<Intervalable> overlaps = new ArrayList<>();
|
||||
|
||||
if (this.point < interval.getStart()) {
|
||||
@ -74,10 +83,9 @@ public class IntervalNode {
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
protected void addToOverlaps(
|
||||
final Intervalable interval,
|
||||
final List<Intervalable> overlaps,
|
||||
final List<Intervalable> newOverlaps) {
|
||||
|
||||
protected void addToOverlaps(final Intervalable interval, final List<Intervalable> overlaps, final List<Intervalable> newOverlaps) {
|
||||
|
||||
for (final Intervalable currentInterval : newOverlaps) {
|
||||
if (!currentInterval.equals(interval)) {
|
||||
overlaps.add(currentInterval);
|
||||
@ -85,16 +93,21 @@ public class IntervalNode {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected List<Intervalable> checkForOverlapsToTheLeft(final Intervalable interval) {
|
||||
|
||||
return checkForOverlaps(interval, Direction.LEFT);
|
||||
}
|
||||
|
||||
|
||||
protected List<Intervalable> checkForOverlapsToTheRight(final Intervalable interval) {
|
||||
|
||||
return checkForOverlaps(interval, Direction.RIGHT);
|
||||
}
|
||||
|
||||
protected List<Intervalable> checkForOverlaps(
|
||||
final Intervalable interval, final Direction direction) {
|
||||
|
||||
protected List<Intervalable> checkForOverlaps(final Intervalable interval, final Direction direction) {
|
||||
|
||||
final List<Intervalable> overlaps = new ArrayList<>();
|
||||
|
||||
for (final Intervalable currentInterval : this.intervals) {
|
||||
@ -115,9 +128,10 @@ public class IntervalNode {
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
|
||||
protected List<Intervalable> findOverlappingRanges(IntervalNode node, Intervalable interval) {
|
||||
return node == null
|
||||
? Collections.<Intervalable>emptyList()
|
||||
: node.findOverlaps(interval);
|
||||
|
||||
return node == null ? Collections.<Intervalable>emptyList() : node.findOverlaps(interval);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -10,10 +10,13 @@ public class IntervalTree {
|
||||
|
||||
private final IntervalNode rootNode;
|
||||
|
||||
|
||||
public IntervalTree(List<Intervalable> intervals) {
|
||||
|
||||
this.rootNode = new IntervalNode(intervals);
|
||||
}
|
||||
|
||||
|
||||
public List<Intervalable> removeOverlaps(final List<Intervalable> intervals) {
|
||||
|
||||
// Sort the intervals on size, then left-most position
|
||||
@ -42,7 +45,9 @@ public class IntervalTree {
|
||||
return intervals;
|
||||
}
|
||||
|
||||
|
||||
public List<Intervalable> findOverlaps(final Intervalable interval) {
|
||||
|
||||
return rootNode.findOverlaps(interval);
|
||||
}
|
||||
|
||||
|
||||
@ -4,8 +4,10 @@ public interface Intervalable extends Comparable {
|
||||
|
||||
int getStart();
|
||||
|
||||
|
||||
int getEnd();
|
||||
|
||||
|
||||
int size();
|
||||
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ public class IntervalableComparatorByPosition implements Comparator<Intervalable
|
||||
|
||||
@Override
|
||||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
|
||||
|
||||
return intervalable.getStart() - intervalable2.getStart();
|
||||
}
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ public class IntervalableComparatorBySize implements Comparator<Intervalable> {
|
||||
|
||||
@Override
|
||||
public int compare(final Intervalable intervalable, final Intervalable intervalable2) {
|
||||
|
||||
int comparison = intervalable2.size() - intervalable.size();
|
||||
|
||||
if (comparison == 0) {
|
||||
|
||||
@ -4,16 +4,22 @@ public class DefaultToken extends Token {
|
||||
|
||||
private PayloadToken<String> payloadToken;
|
||||
|
||||
|
||||
public DefaultToken(PayloadToken<String> payloadToken) {
|
||||
|
||||
super(payloadToken.getFragment());
|
||||
this.payloadToken = payloadToken;
|
||||
}
|
||||
|
||||
|
||||
public boolean isMatch() {
|
||||
|
||||
return payloadToken.isMatch();
|
||||
}
|
||||
|
||||
|
||||
public Emit getEmit() {
|
||||
|
||||
PayloadEmit<String> emit = payloadToken.getEmit();
|
||||
return new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
|
||||
}
|
||||
|
||||
@ -7,19 +7,26 @@ import org.ahocorasick.interval.Intervalable;
|
||||
* Responsible for tracking the bounds of matched terms.
|
||||
*/
|
||||
public class Emit extends Interval implements Intervalable {
|
||||
|
||||
private final String keyword;
|
||||
|
||||
|
||||
public Emit(final int start, final int end, final String keyword) {
|
||||
|
||||
super(start, end);
|
||||
this.keyword = keyword;
|
||||
}
|
||||
|
||||
|
||||
public String getKeyword() {
|
||||
|
||||
return this.keyword;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return super.toString() + "=" + this.keyword;
|
||||
}
|
||||
|
||||
|
||||
@ -3,16 +3,21 @@ package org.ahocorasick.trie;
|
||||
public class FragmentToken extends Token {
|
||||
|
||||
public FragmentToken(String fragment) {
|
||||
|
||||
super(fragment);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isMatch() {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Emit getEmit() {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@ -4,19 +4,26 @@ public class MatchToken extends Token {
|
||||
|
||||
private final Emit emit;
|
||||
|
||||
|
||||
public MatchToken(final String fragment, final Emit emit) {
|
||||
|
||||
super(fragment);
|
||||
this.emit = emit;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isMatch() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Emit getEmit() {
|
||||
|
||||
return this.emit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,32 +1,21 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
/**
|
||||
* Contains the matched keyword and some payload data.
|
||||
*
|
||||
* @author Daniel Beck
|
||||
* @param <T> The type of the wrapped payload data.
|
||||
* @author Daniel Beck
|
||||
*/
|
||||
public class Payload<T> implements Comparable<Payload<T>> {
|
||||
@Getter
|
||||
@EqualsAndHashCode
|
||||
@RequiredArgsConstructor
|
||||
public class Payload<T> {
|
||||
|
||||
private final String keyword;
|
||||
private final T data;
|
||||
|
||||
public Payload(final String keyword, final T data) {
|
||||
super();
|
||||
this.keyword = keyword;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public String getKeyword() {
|
||||
return keyword;
|
||||
}
|
||||
|
||||
public T getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Payload<T> other) {
|
||||
return keyword.compareTo(other.getKeyword());
|
||||
}
|
||||
}
|
||||
|
||||
@ -15,6 +15,7 @@ public class PayloadEmit<T> extends Interval implements Intervalable {
|
||||
|
||||
private final T payload;
|
||||
|
||||
|
||||
/**
|
||||
* Created a PayloadEmit
|
||||
*
|
||||
@ -24,26 +25,34 @@ public class PayloadEmit<T> extends Interval implements Intervalable {
|
||||
* @param payload Emitted payload data.
|
||||
*/
|
||||
public PayloadEmit(final int start, final int end, String keyword, T payload) {
|
||||
|
||||
super(start, end);
|
||||
this.keyword = keyword;
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
|
||||
public String getKeyword() {
|
||||
|
||||
return this.keyword;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the payload associated to this emit.
|
||||
*
|
||||
* @return the associated payload
|
||||
*/
|
||||
public T getPayload() {
|
||||
|
||||
return this.payload;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return super.toString() + "=" + this.keyword + (this.payload != null ? "->" + this.payload : "");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -14,19 +14,25 @@ package org.ahocorasick.trie;
|
||||
public class PayloadFragmentToken<T> extends PayloadToken<T> {
|
||||
|
||||
public PayloadFragmentToken(String fragment) {
|
||||
|
||||
super(fragment);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isMatch() {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns null.
|
||||
*/
|
||||
@Override
|
||||
public PayloadEmit<T> getEmit() {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -7,26 +7,32 @@ package org.ahocorasick.trie;
|
||||
* always returns {@code true}.
|
||||
* </p>
|
||||
*
|
||||
* @author Daniel Beck
|
||||
*
|
||||
* @param <T> The Type of the emitted payloads.
|
||||
* @author Daniel Beck
|
||||
*/
|
||||
public class PayloadMatchToken<T> extends PayloadToken<T> {
|
||||
|
||||
private final PayloadEmit<T> emit;
|
||||
|
||||
|
||||
public PayloadMatchToken(final String fragment, final PayloadEmit<T> emit) {
|
||||
|
||||
super(fragment);
|
||||
this.emit = emit;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isMatch() {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public PayloadEmit<T> getEmit() {
|
||||
|
||||
return this.emit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
@ -27,13 +31,14 @@ import java.util.*;
|
||||
public class PayloadState<T> {
|
||||
|
||||
/**
|
||||
* effective the size of the keyword
|
||||
* effective the size of the keyword.
|
||||
*/
|
||||
@Getter
|
||||
private final int depth;
|
||||
|
||||
/**
|
||||
* only used for the root state to refer to itself in case no matches have been
|
||||
* found
|
||||
* found.
|
||||
*/
|
||||
private final PayloadState<T> rootState;
|
||||
|
||||
@ -44,26 +49,34 @@ public class PayloadState<T> {
|
||||
private final Map<Character, PayloadState<T>> success = new HashMap<>();
|
||||
|
||||
/**
|
||||
* if no matching states are found, the failure state will be returned
|
||||
* if no matching states are found, the failure state will be returned.
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
private PayloadState<T> failure;
|
||||
|
||||
/**
|
||||
* whenever this state is reached, it will emit the matches keywords for future
|
||||
* reference
|
||||
* reference.
|
||||
*/
|
||||
private Set<Payload<T>> emits;
|
||||
|
||||
|
||||
public PayloadState() {
|
||||
|
||||
this(0);
|
||||
}
|
||||
|
||||
|
||||
public PayloadState(final int depth) {
|
||||
|
||||
this.depth = depth;
|
||||
this.rootState = depth == 0 ? this : null;
|
||||
}
|
||||
|
||||
|
||||
private PayloadState<T> nextState(final Character character, final boolean ignoreRootState) {
|
||||
|
||||
PayloadState<T> nextState = this.success.get(character);
|
||||
|
||||
if (!ignoreRootState && nextState == null && this.rootState != null) {
|
||||
@ -73,15 +86,21 @@ public class PayloadState<T> {
|
||||
return nextState;
|
||||
}
|
||||
|
||||
|
||||
public PayloadState<T> nextState(final Character character) {
|
||||
|
||||
return nextState(character, false);
|
||||
}
|
||||
|
||||
|
||||
public PayloadState<T> nextStateIgnoreRootState(Character character) {
|
||||
|
||||
return nextState(character, true);
|
||||
}
|
||||
|
||||
|
||||
public PayloadState<T> addState(Character character) {
|
||||
|
||||
PayloadState<T> nextState = nextStateIgnoreRootState(character);
|
||||
if (nextState == null) {
|
||||
nextState = new PayloadState<>(this.depth + 1);
|
||||
@ -90,9 +109,6 @@ public class PayloadState<T> {
|
||||
return nextState;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return this.depth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a payload to be emitted for this state.
|
||||
@ -100,45 +116,49 @@ public class PayloadState<T> {
|
||||
* @param payload to be emitted.
|
||||
*/
|
||||
public void addEmit(Payload<T> payload) {
|
||||
|
||||
if (this.emits == null) {
|
||||
this.emits = new TreeSet<>();
|
||||
this.emits = new HashSet<>();
|
||||
}
|
||||
this.emits.add(payload);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a collection of payloads to be emitted for this state.
|
||||
*
|
||||
* @param emits Collection of payloads to be emitted.
|
||||
*/
|
||||
public void addEmit(Collection<Payload<T>> emits) {
|
||||
|
||||
for (Payload<T> emit : emits) {
|
||||
addEmit(emit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a collection of emitted payloads for this state.
|
||||
*
|
||||
* @return Collection of emitted payloads.
|
||||
*/
|
||||
public Collection<Payload<T>> emit() {
|
||||
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits;
|
||||
|
||||
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits.stream()
|
||||
.sorted(Comparator.comparing(Payload::getKeyword))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public PayloadState<T> failure() {
|
||||
return this.failure;
|
||||
}
|
||||
|
||||
public void setFailure(PayloadState<T> failState) {
|
||||
this.failure = failState;
|
||||
}
|
||||
|
||||
public Collection<PayloadState<T>> getStates() {
|
||||
|
||||
return this.success.values();
|
||||
}
|
||||
|
||||
|
||||
public Collection<Character> getTransitions() {
|
||||
|
||||
return this.success.keySet();
|
||||
}
|
||||
|
||||
}
|
||||
@ -9,24 +9,33 @@ package org.ahocorasick.trie;
|
||||
* @param <T> The Type of the emitted payloads.
|
||||
*/
|
||||
public abstract class PayloadToken<T> {
|
||||
|
||||
private String fragment;
|
||||
|
||||
|
||||
public PayloadToken(String fragment) {
|
||||
|
||||
this.fragment = fragment;
|
||||
}
|
||||
|
||||
|
||||
public String getFragment() {
|
||||
|
||||
return this.fragment;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return {@code true} if a search term matched.
|
||||
*
|
||||
* @return {@code true} if this is a match
|
||||
*/
|
||||
public abstract boolean isMatch();
|
||||
|
||||
|
||||
/**
|
||||
* @return the payload
|
||||
*/
|
||||
public abstract PayloadEmit<T> getEmit();
|
||||
|
||||
}
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import static java.lang.Character.isWhitespace;
|
||||
import static java.lang.Character.toLowerCase;
|
||||
|
||||
import java.util.Deque;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
@ -23,8 +25,8 @@ import org.ahocorasick.trie.handler.StatefulPayloadEmitHandler;
|
||||
* added keyword.
|
||||
* </p>
|
||||
*
|
||||
* @author Daniel Beck
|
||||
* @param <T> The type of the supplied of the payload.
|
||||
* @author Daniel Beck
|
||||
*/
|
||||
public class PayloadTrie<T> {
|
||||
|
||||
@ -32,11 +34,14 @@ public class PayloadTrie<T> {
|
||||
|
||||
private final PayloadState<T> rootState;
|
||||
|
||||
|
||||
protected PayloadTrie(final TrieConfig trieConfig) {
|
||||
|
||||
this.trieConfig = trieConfig;
|
||||
this.rootState = new PayloadState<>();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used by the builder to add a text search keyword with an emit payload.
|
||||
*
|
||||
@ -45,6 +50,7 @@ public class PayloadTrie<T> {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
private void addKeyword(String keyword, T emit) {
|
||||
|
||||
if (keyword.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
@ -52,6 +58,7 @@ public class PayloadTrie<T> {
|
||||
addState(keyword).addEmit(new Payload<>(keyword, emit));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used by the builder to add a text search keyword.
|
||||
*
|
||||
@ -59,6 +66,7 @@ public class PayloadTrie<T> {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
private void addKeyword(String keyword) {
|
||||
|
||||
if (keyword.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
@ -66,15 +74,21 @@ public class PayloadTrie<T> {
|
||||
addState(keyword).addEmit(new Payload<>(keyword, null));
|
||||
}
|
||||
|
||||
|
||||
private PayloadState<T> addState(final String keyword) {
|
||||
|
||||
PayloadState<T> state = getRootState();
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
if (isIgnoreWhiteSpace() && isWhitespace(character)) {
|
||||
continue;
|
||||
}
|
||||
Character adjustedChar = isCaseInsensitive() ? Character.toLowerCase(character) : character;
|
||||
state = state.addState(adjustedChar);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tokenizes the specified text and returns the emitted outputs.
|
||||
*
|
||||
@ -82,6 +96,7 @@ public class PayloadTrie<T> {
|
||||
* @return the emitted outputs
|
||||
*/
|
||||
public Collection<PayloadToken<T>> tokenize(final String text) {
|
||||
|
||||
final Collection<PayloadToken<T>> tokens = new LinkedList<>();
|
||||
final Collection<PayloadEmit<T>> collectedEmits = parseText(text);
|
||||
int lastCollectedPosition = -1;
|
||||
@ -102,18 +117,19 @@ public class PayloadTrie<T> {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
|
||||
private PayloadToken<T> createFragment(final PayloadEmit<T> emit, final String text, final int lastCollectedPosition) {
|
||||
return new PayloadFragmentToken<>(
|
||||
text.substring( lastCollectedPosition + 1,
|
||||
emit == null ? text.length() : emit.getStart() ) );
|
||||
|
||||
return new PayloadFragmentToken<>(text.substring(lastCollectedPosition + 1, emit == null ? text.length() : emit.getStart()));
|
||||
}
|
||||
|
||||
|
||||
private PayloadToken<T> createMatch(PayloadEmit<T> emit, String text) {
|
||||
return new PayloadMatchToken<>( text.substring( emit.getStart(),
|
||||
emit.getEnd() + 1 ),
|
||||
emit );
|
||||
|
||||
return new PayloadMatchToken<>(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tokenizes a specified text and returns the emitted outputs.
|
||||
*
|
||||
@ -121,9 +137,11 @@ public class PayloadTrie<T> {
|
||||
* @return A collection of emits.
|
||||
*/
|
||||
public Collection<PayloadEmit<T>> parseText(final CharSequence text) {
|
||||
|
||||
return parseText(text, new DefaultPayloadEmitHandler<>());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tokenizes the specified text by using a custom EmitHandler and returns the
|
||||
* emitted outputs.
|
||||
@ -134,6 +152,7 @@ public class PayloadTrie<T> {
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection<PayloadEmit<T>> parseText(final CharSequence text, final StatefulPayloadEmitHandler<T> emitHandler) {
|
||||
|
||||
parseText(text, (PayloadEmitHandler<T>) emitHandler);
|
||||
|
||||
final List<PayloadEmit<T>> collectedEmits = emitHandler.getEmits();
|
||||
@ -146,6 +165,7 @@ public class PayloadTrie<T> {
|
||||
return collectedEmits;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the text contains one of the search terms; otherwise,
|
||||
* returns false.
|
||||
@ -155,9 +175,11 @@ public class PayloadTrie<T> {
|
||||
* false.
|
||||
*/
|
||||
public boolean containsMatch(final CharSequence text) {
|
||||
|
||||
return firstMatch(text) != null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tokenizes the specified text by using a custom EmitHandler and returns the
|
||||
* emitted outputs.
|
||||
@ -166,11 +188,15 @@ public class PayloadTrie<T> {
|
||||
* @param emitHandler The handler that will be used to parse the text.
|
||||
*/
|
||||
public void parseText(final CharSequence text, final PayloadEmitHandler<T> emitHandler) {
|
||||
PayloadState<T> currentState = getRootState();
|
||||
|
||||
PayloadState<T> currentState = getRootState();
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
char character = text.charAt(position);
|
||||
|
||||
if (trieConfig.isIgnoreWhiteSpace() && isWhitespace(character)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
}
|
||||
@ -183,6 +209,7 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The first matching text sequence.
|
||||
*
|
||||
@ -190,6 +217,7 @@ public class PayloadTrie<T> {
|
||||
* @return {@code null} if no matches found.
|
||||
*/
|
||||
public PayloadEmit<T> firstMatch(final CharSequence text) {
|
||||
|
||||
assert text != null;
|
||||
|
||||
if (!trieConfig.isAllowOverlaps()) {
|
||||
@ -206,6 +234,9 @@ public class PayloadTrie<T> {
|
||||
for (int position = 0; position < text.length(); position++) {
|
||||
char character = text.charAt(position);
|
||||
|
||||
if (trieConfig.isIgnoreWhiteSpace() && isWhitespace(character)) {
|
||||
continue;
|
||||
}
|
||||
if (trieConfig.isCaseInsensitive()) {
|
||||
character = Character.toLowerCase(character);
|
||||
}
|
||||
@ -215,8 +246,13 @@ public class PayloadTrie<T> {
|
||||
|
||||
if (payloads != null && !payloads.isEmpty()) {
|
||||
for (final Payload<T> payload : payloads) {
|
||||
final PayloadEmit<T> emit = new PayloadEmit<>(position - payload.getKeyword().length() + 1, position,
|
||||
payload.getKeyword(), payload.getData());
|
||||
int start;
|
||||
if (isIgnoreWhiteSpace()) {
|
||||
start = findStart(text, position, payload);
|
||||
} else {
|
||||
start = position - payload.getKeyword().length() + 1;
|
||||
}
|
||||
final PayloadEmit<T> emit = new PayloadEmit<>(start, position, payload.getKeyword(), payload.getData());
|
||||
if (trieConfig.isOnlyWholeWords()) {
|
||||
if (!isPartialMatch(text, emit)) {
|
||||
return emit;
|
||||
@ -232,29 +268,38 @@ public class PayloadTrie<T> {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private boolean isPartialMatch(final CharSequence searchText, final PayloadEmit<T> emit) {
|
||||
return (emit.getStart() != 0 && Character.isAlphabetic(searchText.charAt(emit.getStart() - 1)))
|
||||
|| (emit.getEnd() + 1 != searchText.length() && Character.isAlphabetic(searchText.charAt(emit.getEnd() + 1)));
|
||||
|
||||
return (emit.getStart() != 0 && Character.isAlphabetic(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != searchText.length() && Character.isAlphabetic(
|
||||
searchText.charAt(emit.getEnd() + 1)));
|
||||
}
|
||||
|
||||
|
||||
private boolean isPartialMatchWhiteSpaceSeparated(final CharSequence searchText, final PayloadEmit<T> emit) {
|
||||
|
||||
final long size = searchText.length();
|
||||
return (emit.getStart() != 0 && !isWhitespace(searchText.charAt(emit.getStart() - 1)))
|
||||
|| (emit.getEnd() + 1 != size && !isWhitespace(searchText.charAt(emit.getEnd() + 1)));
|
||||
return (emit.getStart() != 0 && !isWhitespace(searchText.charAt(emit.getStart() - 1))) || (emit.getEnd() + 1 != size && !isWhitespace(searchText.charAt(emit.getEnd()
|
||||
+ 1)));
|
||||
}
|
||||
|
||||
|
||||
private PayloadState<T> getState(PayloadState<T> currentState, final Character character) {
|
||||
|
||||
PayloadState<T> newCurrentState = currentState.nextState(character);
|
||||
|
||||
var tempState = currentState;
|
||||
while (newCurrentState == null) {
|
||||
currentState = currentState.failure();
|
||||
newCurrentState = currentState.nextState(character);
|
||||
tempState = tempState.getFailure();
|
||||
newCurrentState = tempState.nextState(character);
|
||||
}
|
||||
|
||||
return newCurrentState;
|
||||
}
|
||||
|
||||
|
||||
private void constructFailureStates() {
|
||||
|
||||
final Queue<PayloadState<T>> queue = new LinkedBlockingDeque<>();
|
||||
final PayloadState<T> startState = getRootState();
|
||||
|
||||
@ -272,9 +317,9 @@ public class PayloadTrie<T> {
|
||||
PayloadState<T> targetState = currentState.nextState(transition);
|
||||
queue.add(targetState);
|
||||
|
||||
PayloadState<T> traceFailureState = currentState.failure();
|
||||
PayloadState<T> traceFailureState = currentState.getFailure();
|
||||
while (traceFailureState.nextState(transition) == null) {
|
||||
traceFailureState = traceFailureState.failure();
|
||||
traceFailureState = traceFailureState.getFailure();
|
||||
}
|
||||
|
||||
final PayloadState<T> newFailureState = traceFailureState.nextState(transition);
|
||||
@ -284,13 +329,21 @@ public class PayloadTrie<T> {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean processEmits(final CharSequence text, final int position, final Collection<Payload<T>> payloads, final PayloadEmitHandler<T> emitHandler) {
|
||||
|
||||
boolean emitted = false;
|
||||
for (final Payload<T> payload : payloads) {
|
||||
final PayloadEmit<T> payloadEmit = new PayloadEmit<>(position - payload.getKeyword().length() + 1,
|
||||
position, payload.getKeyword(), payload.getData());
|
||||
if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, payloadEmit)) &&
|
||||
!(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() && isPartialMatchWhiteSpaceSeparated(text, payloadEmit))) {
|
||||
int start;
|
||||
if (isIgnoreWhiteSpace()) {
|
||||
start = findStart(text, position, payload);
|
||||
} else {
|
||||
start = position - payload.getKeyword().length() + 1;
|
||||
}
|
||||
final PayloadEmit<T> payloadEmit = new PayloadEmit<>(start, position, payload.getKeyword(), payload.getData());
|
||||
if (!(trieConfig.isOnlyWholeWords() && isPartialMatch(text, payloadEmit)) && !(trieConfig.isOnlyWholeWordsWhiteSpaceSeparated() && isPartialMatchWhiteSpaceSeparated(
|
||||
text,
|
||||
payloadEmit))) {
|
||||
emitted = emitHandler.emit(payloadEmit) || emitted;
|
||||
if (emitted && trieConfig.isStopOnHit()) {
|
||||
break;
|
||||
@ -301,41 +354,77 @@ public class PayloadTrie<T> {
|
||||
return emitted;
|
||||
}
|
||||
|
||||
|
||||
private int findStart(CharSequence text, int position, Payload<T> payload) {
|
||||
|
||||
Deque<Character> stack = new LinkedList<>();
|
||||
int i;
|
||||
for (i = 0; i < payload.getKeyword().length(); i++) {
|
||||
if (isWhitespace(payload.getKeyword().charAt(i))) {
|
||||
continue;
|
||||
}
|
||||
stack.push(isCaseInsensitive() ? toLowerCase(payload.getKeyword().charAt(i)) : payload.getKeyword().charAt(i));
|
||||
}
|
||||
for (i = position; !stack.isEmpty() && i >= 0; --i) {
|
||||
char c = isCaseInsensitive() ? toLowerCase(text.charAt(i)) : text.charAt(i);
|
||||
if (c == stack.peek()) {
|
||||
stack.pop();
|
||||
}
|
||||
}
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
|
||||
private boolean isCaseInsensitive() {
|
||||
|
||||
return trieConfig.isCaseInsensitive();
|
||||
}
|
||||
|
||||
|
||||
private boolean isIgnoreWhiteSpace() {
|
||||
|
||||
return trieConfig.isIgnoreWhiteSpace();
|
||||
}
|
||||
|
||||
|
||||
private PayloadState<T> getRootState() {
|
||||
|
||||
return this.rootState;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a fluent interface for constructing Trie instances with payloads.
|
||||
* @param <T> The type of the emitted payload.
|
||||
*
|
||||
* @param <T> The type of the emitted payload.
|
||||
* @return The builder used to configure its Trie.
|
||||
*/
|
||||
public static <T> PayloadTrieBuilder<T> builder() {
|
||||
|
||||
return new PayloadTrieBuilder<>();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Builder class to create a PayloadTrie instance.
|
||||
*
|
||||
* @param <T> The type of the emitted payload.
|
||||
*/
|
||||
public static class PayloadTrieBuilder<T> {
|
||||
public static final class PayloadTrieBuilder<T> {
|
||||
|
||||
private final TrieConfig trieConfig = new TrieConfig();
|
||||
|
||||
private final PayloadTrie<T> trie = new PayloadTrie<>(trieConfig);
|
||||
|
||||
|
||||
/**
|
||||
* Default (empty) constructor.
|
||||
*/
|
||||
private PayloadTrieBuilder() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore case when searching for keywords in the text.
|
||||
* This must be called before calling addKeyword because the algorithm converts
|
||||
@ -345,20 +434,24 @@ public class PayloadTrie<T> {
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> ignoreCase() {
|
||||
|
||||
this.trieConfig.setCaseInsensitive(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore overlapping keywords.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> ignoreOverlaps() {
|
||||
|
||||
this.trieConfig.setAllowOverlaps(false);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a keyword to the {@link Trie}'s list of text search keywords.
|
||||
* No {@link Payload} is supplied.
|
||||
@ -368,10 +461,12 @@ public class PayloadTrie<T> {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> addKeyword(final String keyword) {
|
||||
|
||||
this.trie.addKeyword(keyword);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a keyword and a payload to the {@link Trie}'s list of text
|
||||
* search keywords.
|
||||
@ -382,10 +477,12 @@ public class PayloadTrie<T> {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> addKeyword(final String keyword, final T payload) {
|
||||
|
||||
this.trie.addKeyword(keyword, payload);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a list of keywords and payloads to the {@link Trie}'s list of
|
||||
* text search keywords.
|
||||
@ -394,22 +491,26 @@ public class PayloadTrie<T> {
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> addKeywords(final Collection<Payload<T>> keywords) {
|
||||
|
||||
for (Payload<T> payload : keywords) {
|
||||
this.trie.addKeyword(payload.getKeyword(), payload.getData());
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords in the text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> onlyWholeWords() {
|
||||
|
||||
this.trieConfig.setOnlyWholeWords(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords that are separated by whitespace
|
||||
* in the text. For example, "this keyword thatkeyword" would only match the
|
||||
@ -418,46 +519,69 @@ public class PayloadTrie<T> {
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> onlyWholeWordsWhiteSpaceSeparated() {
|
||||
|
||||
this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to stop after the first keyword is found in the text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> stopOnHit() {
|
||||
|
||||
trie.trieConfig.setStopOnHit(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the PayloadTrie based on the builder settings.
|
||||
*
|
||||
* @return The configured PayloadTrie.
|
||||
*/
|
||||
public PayloadTrie<T> build() {
|
||||
|
||||
this.trie.constructFailureStates();
|
||||
return this.trie;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreCase()
|
||||
*/
|
||||
@Deprecated
|
||||
public PayloadTrieBuilder<T> caseInsensitive() {
|
||||
|
||||
return ignoreCase();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreOverlaps()
|
||||
*/
|
||||
@Deprecated
|
||||
public PayloadTrieBuilder<T> removeOverlaps() {
|
||||
|
||||
return ignoreOverlaps();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore whitespaces.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public PayloadTrieBuilder<T> ignoreWhiteSpace() {
|
||||
|
||||
trieConfig.setIgnoreWhiteSpace(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,9 @@ package org.ahocorasick.trie;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A state has various important tasks it must attend to:
|
||||
@ -26,6 +29,7 @@ public class State {
|
||||
/**
|
||||
* effective the size of the keyword
|
||||
*/
|
||||
@Getter
|
||||
private final int depth;
|
||||
|
||||
/**
|
||||
@ -42,6 +46,8 @@ public class State {
|
||||
/**
|
||||
* if no matching states are found, the failure state will be returned
|
||||
*/
|
||||
@Setter
|
||||
@Getter
|
||||
private State failure;
|
||||
|
||||
/**
|
||||
@ -49,16 +55,22 @@ public class State {
|
||||
*/
|
||||
private Set<String> emits;
|
||||
|
||||
|
||||
public State() {
|
||||
|
||||
this(0);
|
||||
}
|
||||
|
||||
|
||||
public State(final int depth) {
|
||||
|
||||
this.depth = depth;
|
||||
this.rootState = depth == 0 ? this : null;
|
||||
}
|
||||
|
||||
|
||||
private State nextState(final Character character, final boolean ignoreRootState) {
|
||||
|
||||
State nextState = this.success.get(character);
|
||||
|
||||
if (!ignoreRootState && nextState == null && this.rootState != null) {
|
||||
@ -68,15 +80,21 @@ public class State {
|
||||
return nextState;
|
||||
}
|
||||
|
||||
|
||||
public State nextState(final Character character) {
|
||||
|
||||
return nextState(character, false);
|
||||
}
|
||||
|
||||
|
||||
public State nextStateIgnoreRootState(Character character) {
|
||||
|
||||
return nextState(character, true);
|
||||
}
|
||||
|
||||
|
||||
public State addState(String keyword) {
|
||||
|
||||
State state = this;
|
||||
|
||||
for (final Character character : keyword.toCharArray()) {
|
||||
@ -86,7 +104,9 @@ public class State {
|
||||
return state;
|
||||
}
|
||||
|
||||
|
||||
public State addState(Character character) {
|
||||
|
||||
State nextState = nextStateIgnoreRootState(character);
|
||||
if (nextState == null) {
|
||||
nextState = new State(this.depth + 1);
|
||||
@ -95,40 +115,39 @@ public class State {
|
||||
return nextState;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return this.depth;
|
||||
}
|
||||
|
||||
public void addEmit(String keyword) {
|
||||
|
||||
if (this.emits == null) {
|
||||
this.emits = new TreeSet<>();
|
||||
}
|
||||
this.emits.add(keyword);
|
||||
}
|
||||
|
||||
|
||||
public void addEmit(Collection<String> emits) {
|
||||
|
||||
for (String emit : emits) {
|
||||
addEmit(emit);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Collection<String> emit() {
|
||||
|
||||
return this.emits == null ? Collections.<String>emptyList() : this.emits;
|
||||
}
|
||||
|
||||
public State failure() {
|
||||
return this.failure;
|
||||
}
|
||||
|
||||
public void setFailure(State failState) {
|
||||
this.failure = failState;
|
||||
}
|
||||
|
||||
public Collection<State> getStates() {
|
||||
|
||||
return this.success.values();
|
||||
}
|
||||
|
||||
|
||||
public Collection<Character> getTransitions() {
|
||||
|
||||
return this.success.keySet();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,17 +1,25 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
public abstract class Token {
|
||||
|
||||
private String fragment;
|
||||
|
||||
|
||||
public Token(String fragment) {
|
||||
|
||||
this.fragment = fragment;
|
||||
}
|
||||
|
||||
|
||||
public String getFragment() {
|
||||
|
||||
return this.fragment;
|
||||
}
|
||||
|
||||
|
||||
public abstract boolean isMatch();
|
||||
|
||||
|
||||
public abstract Emit getEmit();
|
||||
|
||||
}
|
||||
|
||||
@ -15,20 +15,26 @@ import org.ahocorasick.trie.handler.StatefulEmitHandler;
|
||||
*
|
||||
* @author Robert Bor
|
||||
*/
|
||||
public class Trie {
|
||||
public final class Trie {
|
||||
|
||||
private final PayloadTrie<String> payloadTrie;
|
||||
|
||||
|
||||
private Trie(final PayloadTrie<String> payloadTrie) {
|
||||
|
||||
this.payloadTrie = payloadTrie;
|
||||
}
|
||||
|
||||
|
||||
public Collection<Token> tokenize(final String text) {
|
||||
|
||||
Collection<PayloadToken<String>> tokens = this.payloadTrie.tokenize(text);
|
||||
return asTokens(tokens);
|
||||
}
|
||||
|
||||
|
||||
private static Collection<Token> asTokens(Collection<PayloadToken<String>> tokens) {
|
||||
|
||||
Collection<Token> result = new ArrayList<>();
|
||||
for (PayloadToken<String> payloadToken : tokens) {
|
||||
result.add(new DefaultToken(payloadToken));
|
||||
@ -36,7 +42,9 @@ public class Trie {
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private static Collection<Emit> asEmits(Collection<PayloadEmit<String>> emits) {
|
||||
|
||||
Collection<Emit> result = new ArrayList<>();
|
||||
for (PayloadEmit<String> emit : emits) {
|
||||
result.add(asEmit(emit));
|
||||
@ -44,30 +52,40 @@ public class Trie {
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private static Emit asEmit(PayloadEmit<String> payloadEmit) {
|
||||
|
||||
return new Emit(payloadEmit.getStart(), payloadEmit.getEnd(), payloadEmit.getKeyword());
|
||||
}
|
||||
|
||||
|
||||
public Collection<Emit> parseText(final CharSequence text) {
|
||||
|
||||
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text);
|
||||
return asEmits(parsedText);
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("UnusedReturnValue")
|
||||
public Collection<Emit> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
|
||||
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text,
|
||||
new StatefulPayloadEmitDelegateHandler(emitHandler));
|
||||
|
||||
Collection<PayloadEmit<String>> parsedText = this.payloadTrie.parseText(text, new StatefulPayloadEmitDelegateHandler(emitHandler));
|
||||
return asEmits(parsedText);
|
||||
}
|
||||
|
||||
|
||||
public boolean containsMatch(final CharSequence text) {
|
||||
|
||||
return firstMatch(text) != null;
|
||||
}
|
||||
|
||||
|
||||
public void parseText(final CharSequence text, final EmitHandler emitHandler) {
|
||||
|
||||
this.payloadTrie.parseText(text, new PayloadEmitDelegateHandler(emitHandler));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The first matching text sequence.
|
||||
*
|
||||
@ -75,35 +93,38 @@ public class Trie {
|
||||
* @return {@code null} if no matches found.
|
||||
*/
|
||||
public Emit firstMatch(final CharSequence text) {
|
||||
|
||||
assert text != null;
|
||||
|
||||
final PayloadEmit<String> payload = this.payloadTrie.firstMatch(text);
|
||||
return payload == null
|
||||
? null
|
||||
: new Emit( payload.getStart(),
|
||||
payload.getEnd(),
|
||||
payload.getKeyword() );
|
||||
return payload == null ? null : new Emit(payload.getStart(), payload.getEnd(), payload.getKeyword());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provides a fluent interface for constructing Trie instances.
|
||||
*
|
||||
* @return The builder used to configure its Trie.
|
||||
*/
|
||||
public static TrieBuilder builder() {
|
||||
|
||||
return new TrieBuilder();
|
||||
}
|
||||
|
||||
public static class TrieBuilder {
|
||||
|
||||
public static final class TrieBuilder {
|
||||
|
||||
private final PayloadTrieBuilder<String> delegate = PayloadTrie.builder();
|
||||
|
||||
|
||||
/**
|
||||
* Default (empty) constructor.
|
||||
*/
|
||||
private TrieBuilder() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore case when searching for keywords in the text.
|
||||
* This must be called before calling addKeyword because the algorithm converts
|
||||
@ -113,21 +134,37 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreCase() {
|
||||
|
||||
delegate.ignoreCase();
|
||||
// this.trieConfig.setCaseInsensitive(true);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore overlapping keywords.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreOverlaps() {
|
||||
|
||||
delegate.ignoreOverlaps();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to ignore whitespaces.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder ignoreWhiteSpace() {
|
||||
|
||||
delegate.ignoreWhiteSpace();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a keyword to the Trie's list of text search keywords.
|
||||
*
|
||||
@ -136,10 +173,12 @@ public class Trie {
|
||||
* @throws NullPointerException if the keyword is null.
|
||||
*/
|
||||
public TrieBuilder addKeyword(final String keyword) {
|
||||
|
||||
delegate.addKeyword(keyword, null);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
@ -147,12 +186,14 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder addKeywords(final String... keywords) {
|
||||
|
||||
for (String keyword : keywords) {
|
||||
delegate.addKeyword(keyword, null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a list of keywords to the Trie's list of text search keywords.
|
||||
*
|
||||
@ -161,22 +202,26 @@ public class Trie {
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public TrieBuilder addKeywords(final Collection<String> keywords) {
|
||||
|
||||
for (String keyword : keywords) {
|
||||
this.delegate.addKeyword(keyword, null);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords in the text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWords() {
|
||||
|
||||
this.delegate.onlyWholeWords();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to match whole keywords that are separated by whitespace
|
||||
* in the text. For example, "this keyword thatkeyword" would only match the
|
||||
@ -185,44 +230,35 @@ public class Trie {
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
|
||||
|
||||
this.delegate.onlyWholeWordsWhiteSpaceSeparated();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie to stop after the first keyword is found in the text.
|
||||
*
|
||||
* @return This builder.
|
||||
*/
|
||||
public TrieBuilder stopOnHit() {
|
||||
|
||||
this.delegate.stopOnHit();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Configure the Trie based on the builder settings.
|
||||
*
|
||||
* @return The configured Trie.
|
||||
*/
|
||||
public Trie build() {
|
||||
|
||||
PayloadTrie<String> payloadTrie = this.delegate.build();
|
||||
return new Trie(payloadTrie);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreCase()
|
||||
*/
|
||||
public TrieBuilder caseInsensitive() {
|
||||
return ignoreCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return This builder.
|
||||
* @deprecated Use ignoreOverlaps()
|
||||
*/
|
||||
public TrieBuilder removeOverlaps() {
|
||||
return ignoreOverlaps();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,51 +4,86 @@ public class TrieConfig {
|
||||
|
||||
private boolean allowOverlaps = true;
|
||||
|
||||
private boolean onlyWholeWords = false;
|
||||
private boolean onlyWholeWords;
|
||||
|
||||
private boolean onlyWholeWordsWhiteSpaceSeparated = false;
|
||||
private boolean onlyWholeWordsWhiteSpaceSeparated;
|
||||
|
||||
private boolean caseInsensitive = false;
|
||||
private boolean caseInsensitive;
|
||||
|
||||
private boolean ignoreWhiteSpace;
|
||||
|
||||
private boolean stopOnHit;
|
||||
|
||||
private boolean stopOnHit = false;
|
||||
|
||||
public boolean isStopOnHit() {
|
||||
|
||||
return stopOnHit;
|
||||
}
|
||||
|
||||
|
||||
public void setStopOnHit(boolean stopOnHit) {
|
||||
|
||||
this.stopOnHit = stopOnHit;
|
||||
}
|
||||
|
||||
|
||||
public boolean isAllowOverlaps() {
|
||||
|
||||
return allowOverlaps;
|
||||
}
|
||||
|
||||
|
||||
public void setAllowOverlaps(boolean allowOverlaps) {
|
||||
|
||||
this.allowOverlaps = allowOverlaps;
|
||||
}
|
||||
|
||||
|
||||
public boolean isOnlyWholeWords() {
|
||||
|
||||
return onlyWholeWords;
|
||||
}
|
||||
|
||||
|
||||
public void setOnlyWholeWords(boolean onlyWholeWords) {
|
||||
|
||||
this.onlyWholeWords = onlyWholeWords;
|
||||
}
|
||||
|
||||
|
||||
public boolean isOnlyWholeWordsWhiteSpaceSeparated() {
|
||||
|
||||
return onlyWholeWordsWhiteSpaceSeparated;
|
||||
}
|
||||
|
||||
|
||||
public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
|
||||
|
||||
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;
|
||||
}
|
||||
|
||||
|
||||
public boolean isCaseInsensitive() {
|
||||
|
||||
return caseInsensitive;
|
||||
}
|
||||
|
||||
|
||||
public boolean isIgnoreWhiteSpace() {
|
||||
|
||||
return ignoreWhiteSpace;
|
||||
}
|
||||
|
||||
|
||||
public void setCaseInsensitive(boolean caseInsensitive) {
|
||||
|
||||
this.caseInsensitive = caseInsensitive;
|
||||
}
|
||||
|
||||
|
||||
public void setIgnoreWhiteSpace(boolean ignoreWhiteSpace) {
|
||||
|
||||
this.ignoreWhiteSpace = ignoreWhiteSpace;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -9,12 +9,16 @@ public abstract class AbstractStatefulEmitHandler implements StatefulEmitHandler
|
||||
|
||||
private final List<Emit> emits = new ArrayList<>();
|
||||
|
||||
|
||||
public void addEmit(final Emit emit) {
|
||||
|
||||
this.emits.add(emit);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Emit> getEmits() {
|
||||
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
|
||||
@ -9,12 +9,16 @@ public abstract class AbstractStatefulPayloadEmitHandler<T> implements StatefulP
|
||||
|
||||
private final List<PayloadEmit<T>> emits = new ArrayList<>();
|
||||
|
||||
|
||||
public void addEmit(final PayloadEmit<T> emit) {
|
||||
|
||||
this.emits.add(emit);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<PayloadEmit<T>> getEmits() {
|
||||
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
|
||||
@ -9,14 +9,19 @@ public class DefaultEmitHandler implements StatefulEmitHandler {
|
||||
|
||||
private final List<Emit> emits = new ArrayList<>();
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(final Emit emit) {
|
||||
|
||||
this.emits.add(emit);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Emit> getEmits() {
|
||||
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -9,14 +9,19 @@ public class DefaultPayloadEmitHandler<T> implements StatefulPayloadEmitHandler<
|
||||
|
||||
private final List<PayloadEmit<T>> emits = new ArrayList<>();
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(final PayloadEmit<T> emit) {
|
||||
|
||||
this.emits.add(emit);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<PayloadEmit<T>> getEmits() {
|
||||
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
public interface EmitHandler {
|
||||
|
||||
boolean emit(Emit emit);
|
||||
|
||||
}
|
||||
|
||||
@ -11,13 +11,17 @@ public class PayloadEmitDelegateHandler implements PayloadEmitHandler<String> {
|
||||
|
||||
private EmitHandler handler;
|
||||
|
||||
|
||||
public PayloadEmitDelegateHandler(EmitHandler handler) {
|
||||
|
||||
this.handler = handler;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(PayloadEmit<String> emit) {
|
||||
|
||||
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
|
||||
return handler.emit(newEmit);
|
||||
}
|
||||
|
||||
@ -3,5 +3,7 @@ package org.ahocorasick.trie.handler;
|
||||
import org.ahocorasick.trie.PayloadEmit;
|
||||
|
||||
public interface PayloadEmitHandler<T> {
|
||||
|
||||
boolean emit(PayloadEmit<T> emit);
|
||||
|
||||
}
|
||||
|
||||
@ -5,5 +5,7 @@ import java.util.List;
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
public interface StatefulEmitHandler extends EmitHandler {
|
||||
|
||||
List<Emit> getEmits();
|
||||
|
||||
}
|
||||
|
||||
@ -15,12 +15,16 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
|
||||
|
||||
private StatefulEmitHandler handler;
|
||||
|
||||
|
||||
public StatefulPayloadEmitDelegateHandler(StatefulEmitHandler handler) {
|
||||
|
||||
this.handler = handler;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static List<PayloadEmit<String>> asEmits(Collection<Emit> emits) {
|
||||
|
||||
List<PayloadEmit<String>> result = new ArrayList<>();
|
||||
for (Emit emit : emits) {
|
||||
result.add(new PayloadEmit<String>(emit.getStart(), emit.getEnd(), emit.getKeyword(), null));
|
||||
@ -28,15 +32,20 @@ public class StatefulPayloadEmitDelegateHandler implements StatefulPayloadEmitHa
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(PayloadEmit<String> emit) {
|
||||
|
||||
Emit newEmit = new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword());
|
||||
return handler.emit(newEmit);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<PayloadEmit<String>> getEmits() {
|
||||
|
||||
List<Emit> emits = this.handler.getEmits();
|
||||
return asEmits(emits);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -5,5 +5,7 @@ import java.util.List;
|
||||
import org.ahocorasick.trie.PayloadEmit;
|
||||
|
||||
public interface StatefulPayloadEmitHandler<T> extends PayloadEmitHandler<T> {
|
||||
|
||||
List<PayloadEmit<T>> getEmits();
|
||||
|
||||
}
|
||||
|
||||
@ -12,38 +12,51 @@ public class IntervalTest {
|
||||
|
||||
@Test
|
||||
public void test_construct() {
|
||||
|
||||
final Interval i = new Interval(1, 3);
|
||||
assertEquals(1, i.getStart());
|
||||
assertEquals(3, i.getEnd());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_size() {
|
||||
|
||||
assertEquals(3, new Interval(0, 2).size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_intervaloverlaps() {
|
||||
|
||||
assertTrue(new Interval(1, 3).overlapsWith(new Interval(2, 4)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_intervalDoesNotOverlap() {
|
||||
|
||||
assertFalse(new Interval(1, 13).overlapsWith(new Interval(27, 42)));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_pointOverlaps() {
|
||||
|
||||
assertTrue(new Interval(1, 3).overlapsWith(2));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_pointDoesNotOverlap() {
|
||||
|
||||
assertFalse(new Interval(1, 13).overlapsWith(42));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_comparable() {
|
||||
|
||||
final Set<Interval> intervals = new TreeSet<>();
|
||||
intervals.add(new Interval(4, 6));
|
||||
intervals.add(new Interval(2, 7));
|
||||
@ -54,13 +67,17 @@ public class IntervalTest {
|
||||
assertEquals(4, it.next().getStart());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_checkToString() {
|
||||
|
||||
assertEquals("4:6", new Interval(4, 6).toString());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_compareToNegativeTest() {
|
||||
|
||||
assertEquals(-1, new Interval(4, 6).compareTo(new Object()));
|
||||
}
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ public class IntervalTreeTest {
|
||||
|
||||
@Test
|
||||
public void findOverlaps() {
|
||||
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(0, 2));
|
||||
intervals.add(new Interval(1, 3));
|
||||
@ -28,8 +29,10 @@ public class IntervalTreeTest {
|
||||
assertOverlap(overlapsIt.next(), 0, 2);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void removeOverlaps() {
|
||||
|
||||
List<Intervalable> intervals = new ArrayList<>();
|
||||
intervals.add(new Interval(0, 2));
|
||||
intervals.add(new Interval(4, 5));
|
||||
@ -43,7 +46,9 @@ public class IntervalTreeTest {
|
||||
|
||||
}
|
||||
|
||||
|
||||
protected void assertOverlap(Intervalable interval, int expectedStart, int expectedEnd) {
|
||||
|
||||
assertEquals(expectedStart, interval.getStart());
|
||||
assertEquals(expectedEnd, interval.getEnd());
|
||||
}
|
||||
|
||||
@ -12,6 +12,7 @@ public class IntervalableComparatorByPositionTest {
|
||||
|
||||
@Test
|
||||
public void sortOnPosition() {
|
||||
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
|
||||
@ -12,6 +12,7 @@ public class IntervalableComparatorBySizeTest {
|
||||
|
||||
@Test
|
||||
public void sortOnSize() {
|
||||
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 5));
|
||||
intervals.add(new Interval(1, 4));
|
||||
@ -22,8 +23,10 @@ public class IntervalableComparatorBySizeTest {
|
||||
assertEquals(2, intervals.get(2).size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sortOnSizeThenPosition() {
|
||||
|
||||
List<Intervalable> intervals = new ArrayList<Intervalable>();
|
||||
intervals.add(new Interval(4, 7));
|
||||
intervals.add(new Interval(2, 5));
|
||||
|
||||
@ -15,18 +15,22 @@ public class EmitTest {
|
||||
*/
|
||||
@Test
|
||||
public void test_Equality_SameValues_ObjectsAreEqual() {
|
||||
|
||||
final Emit one = new Emit(13, 42, null);
|
||||
final Emit two = new Emit(13, 42, null);
|
||||
assertEquals(one, two);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test that two {@link Emit} instances having different values are equal.
|
||||
*/
|
||||
@Test
|
||||
public void test_Equality_DifferingValues_ObjectsAreNotEqual() {
|
||||
|
||||
final Emit one = new Emit(13, 42, null);
|
||||
final Emit two = new Emit(13, 43, null);
|
||||
assertNotEquals(one, two);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -20,66 +20,65 @@ public class PayloadTrieTest {
|
||||
private final static String[] ALPHABET = new String[]{"abc", "bcd", "cde"};
|
||||
private final static String[] ALPHABET_PAYLOAD = new String[]{"alpha:abc", "alpha:bcd", "alpha:cde"};
|
||||
|
||||
private final static List<Payload<String>> ALPHABET_WITH_PAYLOADS = asList(
|
||||
new Payload<>( ALPHABET[ 0 ], ALPHABET_PAYLOAD[ 0 ] ),
|
||||
private final static List<Payload<String>> ALPHABET_WITH_PAYLOADS = asList(new Payload<>(ALPHABET[0], ALPHABET_PAYLOAD[0]),
|
||||
new Payload<>(ALPHABET[1], ALPHABET_PAYLOAD[1]),
|
||||
new Payload<>(ALPHABET[2], ALPHABET_PAYLOAD[2]));
|
||||
|
||||
private final static String[] PRONOUNS = new String[]{"hers", "his", "she", "he"};
|
||||
private final static int[] PRONOUNS_PAYLOAD_ID = new int[]{9, 12, 4, 20};
|
||||
|
||||
private final static List<Payload<Integer>> PRONOUNS_WITH_PAYLOADS = asList(
|
||||
new Payload<>( PRONOUNS[ 0 ], PRONOUNS_PAYLOAD_ID[ 0 ] ),
|
||||
private final static List<Payload<Integer>> PRONOUNS_WITH_PAYLOADS = asList(new Payload<>(PRONOUNS[0], PRONOUNS_PAYLOAD_ID[0]),
|
||||
new Payload<>(PRONOUNS[1], PRONOUNS_PAYLOAD_ID[1]),
|
||||
new Payload<>(PRONOUNS[2], PRONOUNS_PAYLOAD_ID[2]),
|
||||
new Payload<>( PRONOUNS[ 3 ], PRONOUNS_PAYLOAD_ID[ 3 ] )
|
||||
);
|
||||
new Payload<>(PRONOUNS[3], PRONOUNS_PAYLOAD_ID[3]));
|
||||
|
||||
private final static String[] FOOD = new String[]{"veal", "cauliflower", "broccoli", "tomatoes"};
|
||||
private final static Food[] FOOD_PAYLOAD = new Food[] { new Food("veal"), new Food("cauliflower"), new Food("broccoli"),
|
||||
new Food("tomatoes") };
|
||||
private final static Food[] FOOD_PAYLOAD = new Food[]{new Food("veal"), new Food("cauliflower"), new Food("broccoli"), new Food("tomatoes")};
|
||||
|
||||
private final static List<Payload<Food>> FOOD_WITH_PAYLOADS = asList(
|
||||
new Payload<>( FOOD[ 0 ], FOOD_PAYLOAD[ 0 ] ),
|
||||
private final static List<Payload<Food>> FOOD_WITH_PAYLOADS = asList(new Payload<>(FOOD[0], FOOD_PAYLOAD[0]),
|
||||
new Payload<>(FOOD[1], FOOD_PAYLOAD[1]),
|
||||
new Payload<>(FOOD[2], FOOD_PAYLOAD[2]),
|
||||
new Payload<>( FOOD[ 3 ], FOOD_PAYLOAD[ 3 ] )
|
||||
);
|
||||
new Payload<>(FOOD[3], FOOD_PAYLOAD[3]));
|
||||
|
||||
private final static String[] GREEK_LETTERS = new String[]{"Alpha", "Beta", "Gamma"};
|
||||
private final static String[] GREEK_LETTERS_PAYLOAD = new String[]{"greek:Alpha", "greek:Beta", "greek:Gamma"};
|
||||
|
||||
private final static List<Payload<String>> GREEK_LETTERS_WITH_PAYLOADS = asList(
|
||||
new Payload<>( GREEK_LETTERS[ 0 ], GREEK_LETTERS_PAYLOAD[ 0 ] ),
|
||||
private final static List<Payload<String>> GREEK_LETTERS_WITH_PAYLOADS = asList(new Payload<>(GREEK_LETTERS[0], GREEK_LETTERS_PAYLOAD[0]),
|
||||
new Payload<>(GREEK_LETTERS[1], GREEK_LETTERS_PAYLOAD[1]),
|
||||
new Payload<>(GREEK_LETTERS[2], GREEK_LETTERS_PAYLOAD[2]));
|
||||
|
||||
private final static String[] UNICODE = new String[]{"turning", "once", "again", "börkü"};
|
||||
private final static String[] UNICODE_PAYLOAD = new String[]{"uni:turning", "uni:once", "uni:again", "uni:börkü"};
|
||||
|
||||
private final static List<Payload<String>> UNICODE_WITH_PAYLOADS = asList(
|
||||
new Payload<>( UNICODE[ 0 ], UNICODE_PAYLOAD[ 0 ] ),
|
||||
private final static List<Payload<String>> UNICODE_WITH_PAYLOADS = asList(new Payload<>(UNICODE[0], UNICODE_PAYLOAD[0]),
|
||||
new Payload<>(UNICODE[1], UNICODE_PAYLOAD[1]),
|
||||
new Payload<>(UNICODE[2], UNICODE_PAYLOAD[2]),
|
||||
new Payload<>(UNICODE[3], UNICODE_PAYLOAD[3]));
|
||||
|
||||
public static class Food {
|
||||
|
||||
private final String name;
|
||||
|
||||
|
||||
public Food(String name) {
|
||||
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((name == null) ? 0 : name.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
@ -92,36 +91,43 @@ public class PayloadTrieTest {
|
||||
Food other = (Food) obj;
|
||||
if (name == null) {
|
||||
return other.name == null;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return name.equals(other.name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSame() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText(ALPHABET[0]);
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void keywordAndTextAreTheSameFirstMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch(ALPHABET[0]);
|
||||
checkEmit(firstMatch, 0, 2, ALPHABET[0], ALPHABET_PAYLOAD[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeyword() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword(ALPHABET[0], ALPHABET_PAYLOAD[0]).build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText(" " + ALPHABET[0]);
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void textIsLongerThanKeywordFirstMatch() {
|
||||
|
||||
@ -130,23 +136,29 @@ public class PayloadTrieTest {
|
||||
checkEmit(firstMatch, 1, 3, ALPHABET[0], ALPHABET_PAYLOAD[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void variousKeywordsOneMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("bcd");
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 2, "bcd", "alpha:bcd");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void variousKeywordsFirstMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(ALPHABET_WITH_PAYLOADS).build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("bcd");
|
||||
checkEmit(firstMatch, 0, 2, "bcd", "alpha:bcd");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTestAndStopOnHit() {
|
||||
|
||||
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
|
||||
Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers");
|
||||
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
@ -154,15 +166,19 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 2, 3, "he", 20);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTestStopOnHitSkipOne() {
|
||||
|
||||
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).stopOnHit().build();
|
||||
|
||||
StatefulPayloadEmitHandler<Integer> testEmitHandler = new AbstractStatefulPayloadEmitHandler<Integer>() {
|
||||
boolean first = true;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(final PayloadEmit<Integer> emit) {
|
||||
|
||||
if (first) {
|
||||
// return false for the first element
|
||||
first = false;
|
||||
@ -181,8 +197,10 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 1, 3, "she", 4);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTest() {
|
||||
|
||||
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
|
||||
Collection<PayloadEmit<Integer>> emits = trie.parseText("ushers");
|
||||
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
@ -193,10 +211,17 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "hers", 9);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTestWithCapitalKeywords() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("HERS", "hers").addKeyword("HIS", "his")
|
||||
.addKeyword("SHE", "she").addKeyword("HE", "he").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder()
|
||||
.ignoreCase()
|
||||
.addKeyword("HERS", "hers")
|
||||
.addKeyword("HIS", "his")
|
||||
.addKeyword("SHE", "she")
|
||||
.addKeyword("HE", "he")
|
||||
.build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("ushers");
|
||||
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
@ -205,15 +230,19 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "HERS", "hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTestFirstMatch() {
|
||||
|
||||
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
|
||||
PayloadEmit<Integer> firstMatch = trie.firstMatch("ushers");
|
||||
checkEmit(firstMatch, 2, 3, "he", 20);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void ushersTestByCallback() {
|
||||
|
||||
PayloadTrie<Integer> trie = PayloadTrie.<Integer>builder().addKeywords(PRONOUNS_WITH_PAYLOADS).build();
|
||||
|
||||
final List<PayloadEmit<Integer>> emits = new LinkedList<>();
|
||||
@ -230,23 +259,29 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "hers", 9);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void misleadingTest() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("h he her hers");
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 9, 12, "hers", "pronon:hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void misleadingTestFirstMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("hers", "pronon:hers").build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("h he her hers");
|
||||
checkEmit(firstMatch, 9, 12, "hers", "pronon:hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void recipes() {
|
||||
|
||||
PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build();
|
||||
Collection<PayloadEmit<Food>> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
Iterator<PayloadEmit<Food>> iterator = emits.iterator();
|
||||
@ -256,17 +291,20 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 51, 58, "broccoli", new Food("broccoli"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void recipesFirstMatch() {
|
||||
|
||||
PayloadTrie<Food> trie = PayloadTrie.<Food>builder().addKeywords(FOOD_WITH_PAYLOADS).build();
|
||||
PayloadEmit<Food> firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
checkEmit(firstMatch, 2, 12, "cauliflower", new Food("cauliflower"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void longAndShortOverlappingMatch() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage")
|
||||
.build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("he", "pronon:he").addKeyword("hehehehe", "garbage").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("hehehehehe");
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 1, "he", "pronon:he");
|
||||
@ -278,10 +316,16 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 2, 9, "hehehehe", "garbage");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void nonOverlapping() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba")
|
||||
.addKeyword("ababc", "alpha:ababc")
|
||||
.build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("ababcbab");
|
||||
assertEquals(2, emits.size());
|
||||
Iterator<PayloadEmit<String>> iterator = emits.iterator();
|
||||
@ -290,49 +334,79 @@ public class PayloadTrieTest {
|
||||
checkEmit(iterator.next(), 6, 7, "ab", "alpha:ab");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void nonOverlappingFirstMatch() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba")
|
||||
.addKeyword("ababc", "alpha:ababc")
|
||||
.build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("ababcbab");
|
||||
|
||||
checkEmit(firstMatch, 0, 4, "ababc", "alpha:ababc");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void containsMatch() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba").addKeyword("ababc", "alpha:ababc").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword("ab", "alpha:ab")
|
||||
.addKeyword("cba", "alpha:cba")
|
||||
.addKeyword("ababc", "alpha:ababc")
|
||||
.build();
|
||||
assertTrue(trie.containsMatch("ababcbab"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void startOfChurchillSpeech() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().addKeyword("T").addKeyword("u").addKeyword("ur")
|
||||
.addKeyword("r").addKeyword("urn").addKeyword("ni").addKeyword("i").addKeyword("in").addKeyword("n")
|
||||
.addKeyword("urning").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword("T")
|
||||
.addKeyword("u")
|
||||
.addKeyword("ur")
|
||||
.addKeyword("r")
|
||||
.addKeyword("urn")
|
||||
.addKeyword("ni")
|
||||
.addKeyword("i")
|
||||
.addKeyword("in")
|
||||
.addKeyword("n")
|
||||
.addKeyword("urning")
|
||||
.build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("Turning");
|
||||
assertEquals(2, emits.size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void partialMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("sugarcane sugarcane sugar canesugar"); // left, middle, right test
|
||||
assertEquals(1, emits.size()); // Match must not be made
|
||||
checkEmit(emits.iterator().next(), 20, 24, "sugar", "food:sugar");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void partialMatchFirstMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWords().addKeyword("sugar", "food:sugar").build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar"); // left, middle, right test
|
||||
|
||||
checkEmit(firstMatch, 20, 24, "sugar", "food:sugar");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void tokenizeFullSentence() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build();
|
||||
Collection<PayloadToken<String>> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
|
||||
assertEquals(7, tokens.size());
|
||||
@ -346,11 +420,12 @@ public class PayloadTrieTest {
|
||||
assertEquals(" in reserve", tokensIt.next().getFragment());
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/5
|
||||
@Test
|
||||
public void testStringIndexOutOfBoundsException() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeywords(UNICODE_WITH_PAYLOADS)
|
||||
.build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeywords(UNICODE_WITH_PAYLOADS).build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
assertEquals(4, emits.size()); // Match must not be made
|
||||
Iterator<PayloadEmit<String>> it = emits.iterator();
|
||||
@ -361,8 +436,10 @@ public class PayloadTrieTest {
|
||||
checkEmit(it.next(), 19, 23, "börkü", "uni:börkü");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testIgnoreCase() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
assertEquals(4, emits.size()); // Match must not be made
|
||||
@ -374,65 +451,75 @@ public class PayloadTrieTest {
|
||||
checkEmit(it.next(), 19, 23, "börkü", "uni:börkü");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testIgnoreCaseFirstMatch() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeywords(UNICODE_WITH_PAYLOADS).build();
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
|
||||
|
||||
checkEmit(firstMatch, 0, 6, "turning", "uni:turning");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void tokenizeTokensInSequence() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeywords(GREEK_LETTERS_WITH_PAYLOADS).build();
|
||||
Collection<PayloadToken<String>> tokens = trie.tokenize("Alpha Beta Gamma");
|
||||
assertEquals(5, tokens.size());
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/7
|
||||
@Test
|
||||
public void testZeroLength() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().onlyWholeWords().ignoreCase().addKeyword("")
|
||||
.build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreOverlaps().onlyWholeWords().ignoreCase().addKeyword("").build();
|
||||
trie.tokenize(
|
||||
"Try a natural lip and subtle bronzer to keep all the focus on those big bright eyes with NARS Eyeshadow Duo in Rated R And the winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic Peel Kit ($25 amazon.com) won most-appealing peel.");
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/8
|
||||
@Test
|
||||
public void testUnicode1() {
|
||||
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this")
|
||||
.build();
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText(target);
|
||||
assertEquals(1, emits.size());
|
||||
Iterator<PayloadEmit<String>> it = emits.iterator();
|
||||
checkEmit(it.next(), 5, 8, "this", "pronon:this");
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/8
|
||||
@Test
|
||||
public void testUnicode2() {
|
||||
|
||||
String target = "LİKE THIS"; // The second character ('İ') is Unicode, which was read by AC as a 2-byte char
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this")
|
||||
.build();
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().onlyWholeWords().addKeyword("this", "pronon:this").build();
|
||||
assertEquals("THIS", target.substring(5, 9)); // Java does it the right way
|
||||
PayloadEmit<String> firstMatch = trie.firstMatch(target);
|
||||
checkEmit(firstMatch, 5, 8, "this", "pronon:this");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPartialMatchWhiteSpaces() {
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWordsWhiteSpaceSeparated()
|
||||
.addKeyword("#sugar-123", "sugar").build();
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().onlyWholeWordsWhiteSpaceSeparated().addKeyword("#sugar-123", "sugar").build();
|
||||
Collection<PayloadEmit<String>> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
|
||||
assertEquals(1, emits.size()); // Match must not be made
|
||||
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123", "sugar");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testLargeString() {
|
||||
|
||||
final int interval = 100;
|
||||
final int textSize = 1000000;
|
||||
final String keyword = FOOD[1];
|
||||
@ -448,17 +535,21 @@ public class PayloadTrieTest {
|
||||
assertEquals(textSize / interval, emits.size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_containsMatchWithCaseInsensitive() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().ignoreCase().addKeyword("foo", "bar").build();
|
||||
|
||||
assertTrue(trie.containsMatch("FOOBAR"));
|
||||
assertFalse(trie.containsMatch("FO!?AR"));
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/85
|
||||
@Test
|
||||
public void test_wholeWords() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWords().build();
|
||||
// access via PayloadTrie.parseText(CharSequence)
|
||||
Collection<PayloadEmit<String>> result1 = trie.parseText("foobar");
|
||||
@ -470,9 +561,11 @@ public class PayloadTrieTest {
|
||||
assertEquals(result1, result2);
|
||||
}
|
||||
|
||||
|
||||
// @see https://github.com/robert-bor/aho-corasick/issues/85
|
||||
@Test
|
||||
public void test_wholeWordsWhiteSpaceSeparated() {
|
||||
|
||||
PayloadTrie<String> trie = PayloadTrie.<String>builder().addKeyword("foo", "bar").onlyWholeWordsWhiteSpaceSeparated().build();
|
||||
// access via PayloadTrie.parseText(CharSequence)
|
||||
Collection<PayloadEmit<String>> result1 = trie.parseText("foo#bar");
|
||||
@ -484,39 +577,31 @@ public class PayloadTrieTest {
|
||||
assertEquals(result1, result2);
|
||||
}
|
||||
|
||||
private void checkEmit(
|
||||
final PayloadEmit<Food> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final Food expectedPayload) {
|
||||
|
||||
private void checkEmit(final PayloadEmit<Food> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final Food expectedPayload) {
|
||||
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
|
||||
}
|
||||
|
||||
private void checkEmit(
|
||||
final PayloadEmit<Integer> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final Integer expectedPayload) {
|
||||
|
||||
private void checkEmit(final PayloadEmit<Integer> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final Integer expectedPayload) {
|
||||
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
|
||||
}
|
||||
|
||||
private void checkEmit(
|
||||
final PayloadEmit<String> next,
|
||||
final int expectedStart,
|
||||
final int expectedEnd,
|
||||
final String expectedKeyword,
|
||||
final String expectedPayload) {
|
||||
|
||||
private void checkEmit(final PayloadEmit<String> next, final int expectedStart, final int expectedEnd, final String expectedKeyword, final String expectedPayload) {
|
||||
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals("Keyword of emit shoud be " + expectedKeyword, expectedKeyword, next.getKeyword());
|
||||
assertEquals("Payload of emit shoud be " + expectedPayload, expectedPayload, next.getPayload());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -11,11 +11,9 @@ public class StateTest {
|
||||
|
||||
@Test
|
||||
public void test_constructSequenceOfCharacters() {
|
||||
|
||||
final State rootState = new State();
|
||||
rootState
|
||||
.addState('a')
|
||||
.addState('b')
|
||||
.addState('c');
|
||||
rootState.addState('a').addState('b').addState('c');
|
||||
State currentState = rootState.nextState('a');
|
||||
assertEquals(1, currentState.getDepth());
|
||||
currentState = currentState.nextState('b');
|
||||
@ -26,8 +24,10 @@ public class StateTest {
|
||||
assertNull(currentState);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_getStates() {
|
||||
|
||||
final State rootState = new State();
|
||||
rootState.addState("foo");
|
||||
final State currentState = rootState.nextState('f');
|
||||
@ -37,8 +37,10 @@ public class StateTest {
|
||||
assertEquals(currentState, states.iterator().next());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_getTransitions() {
|
||||
|
||||
final State rootState = new State();
|
||||
rootState.addState("foo");
|
||||
final State currentState = rootState.nextState('f');
|
||||
@ -48,20 +50,23 @@ public class StateTest {
|
||||
assertEquals(Character.valueOf('f'), transitions.iterator().next());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_failure() {
|
||||
public void test_getFailure() {
|
||||
|
||||
final State failureState = new State();
|
||||
final State rootState = new State();
|
||||
rootState.setFailure(failureState);
|
||||
|
||||
assertEquals(failureState, rootState.failure());
|
||||
assertEquals(failureState, rootState.getFailure());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_checkEmits() {
|
||||
|
||||
final State rootState = new State();
|
||||
rootState.addState('a')
|
||||
.addEmit(Collections.singleton("tag"));
|
||||
rootState.addState('a').addEmit(Collections.singleton("tag"));
|
||||
final Collection<String> actual = rootState.nextState('a').emit();
|
||||
|
||||
assertEquals(1, actual.size());
|
||||
|
||||
@ -6,6 +6,7 @@ import static java.util.concurrent.ThreadLocalRandom.current;
|
||||
* Contains functionality common to tests.
|
||||
*/
|
||||
public class TestHelper {
|
||||
|
||||
/**
|
||||
* Injects keywords into a string builder.
|
||||
*
|
||||
@ -15,16 +16,15 @@ public class TestHelper {
|
||||
* @param interval How often to inject the keyword.
|
||||
*/
|
||||
@SuppressWarnings("SameParameterValue")
|
||||
static void injectKeyword(
|
||||
final StringBuilder source,
|
||||
final String keyword,
|
||||
final int interval ) {
|
||||
static void injectKeyword(final StringBuilder source, final String keyword, final int interval) {
|
||||
|
||||
final int length = source.length();
|
||||
for (int i = 0; i < length; i += interval) {
|
||||
source.replace(i, i + keyword.length(), keyword);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generates a random sequence of ASCII numbers.
|
||||
*
|
||||
@ -33,12 +33,15 @@ public class TestHelper {
|
||||
*/
|
||||
@SuppressWarnings("SameParameterValue")
|
||||
public static StringBuilder randomNumbers(int count) {
|
||||
final StringBuilder sb = new StringBuilder( count );
|
||||
|
||||
while( --count > 0 ) {
|
||||
int localCount = count;
|
||||
final StringBuilder sb = new StringBuilder(localCount);
|
||||
|
||||
while (--localCount > 0) {
|
||||
sb.append(current().nextInt(0, 10));
|
||||
}
|
||||
|
||||
return sb;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -21,115 +21,143 @@ import static org.junit.Assert.*;
|
||||
* Test the {@link Trie} class functionality.
|
||||
*/
|
||||
public class TrieTest {
|
||||
private final static String[] ALPHABET = new String[]{
|
||||
"abc", "bcd", "cde"
|
||||
};
|
||||
|
||||
private final static String[] PRONOUNS = new String[]{
|
||||
"hers", "his", "she", "he"
|
||||
};
|
||||
private final static String[] ALPHABET = new String[]{"abc", "bcd", "cde"};
|
||||
|
||||
private final static String[] FOOD = new String[]{
|
||||
"veal", "cauliflower", "broccoli", "tomatoes"
|
||||
};
|
||||
private final static String[] PRONOUNS = new String[]{"hers", "his", "she", "he"};
|
||||
|
||||
private final static String[] GREEK_LETTERS = new String[]{
|
||||
"Alpha", "Beta", "Gamma"
|
||||
};
|
||||
private final static String[] FOOD = new String[]{"veal", "cauliflower", "broccoli", "tomatoes"};
|
||||
|
||||
private final static String[] GREEK_LETTERS = new String[]{"Alpha", "Beta", "Gamma"};
|
||||
|
||||
private final static String[] UNICODE = new String[]{"turning", "once", "again", "börkü"};
|
||||
|
||||
private final static String[] UNICODE = new String[]{
|
||||
"turning", "once", "again", "börkü"
|
||||
};
|
||||
|
||||
private static Trie trie(final String keyword) {
|
||||
return Trie.builder()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
|
||||
return Trie.builder().addKeyword(keyword).build();
|
||||
}
|
||||
|
||||
private static Trie trie( final String[] keywords ) {
|
||||
return Trie.builder()
|
||||
.addKeywords( keywords )
|
||||
.build();
|
||||
|
||||
private static Trie trieIgnoreWhiteSpace(final String keyword) {
|
||||
|
||||
return Trie.builder().addKeyword(keyword).ignoreWhiteSpace().build();
|
||||
}
|
||||
|
||||
|
||||
private static Trie trie(final String[] keywords) {
|
||||
|
||||
return Trie.builder().addKeywords(keywords).ignoreWhiteSpace().build();
|
||||
}
|
||||
|
||||
|
||||
private static Trie trieIgnoreWhiteSpace(final String[] keywords) {
|
||||
|
||||
return Trie.builder().addKeywords(keywords).ignoreWhiteSpace().build();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_KeywordAndTextAreTheSame() {
|
||||
|
||||
final Trie trie = trie(ALPHABET[0]);
|
||||
final Collection<Emit> emits = trie.parseText(ALPHABET[0]);
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 2, ALPHABET[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_ignoringWhitespace_KeywordAndTextAreTheSame() {
|
||||
|
||||
final Trie trie = trieIgnoreWhiteSpace(ALPHABET);
|
||||
final Collection<Emit> emits = trie.parseText("a b c d e");
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 4, ALPHABET[0]);
|
||||
checkEmit(iterator.next(), 2, 6, ALPHABET[1]);
|
||||
checkEmit(iterator.next(), 4, 8, ALPHABET[2]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_KeywordAndTextAreTheSameFirstMatch() {
|
||||
|
||||
final Trie trie = trie(ALPHABET[0]);
|
||||
final Emit firstMatch = trie.firstMatch(ALPHABET[0]);
|
||||
checkEmit(firstMatch, 0, 2, ALPHABET[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_TextIsLongerThanKeyword() {
|
||||
|
||||
final Trie trie = trie(ALPHABET[0]);
|
||||
final Collection<Emit> emits = trie.parseText(" " + ALPHABET[0]);
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 1, 3, ALPHABET[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_TextIsLongerThanKeywordFirstMatch() {
|
||||
|
||||
final Trie trie = trie(ALPHABET[0]);
|
||||
final Emit firstMatch = trie.firstMatch(" " + ALPHABET[0]);
|
||||
checkEmit(firstMatch, 1, 3, ALPHABET[0]);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_VariousKeywordsOneMatch() {
|
||||
|
||||
final Trie trie = trie(ALPHABET);
|
||||
final Collection<Emit> emits = trie.parseText("bcd");
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 2, "bcd");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_VariousKeywordsFirstMatch() {
|
||||
|
||||
final Trie trie = trie(ALPHABET);
|
||||
final Emit firstMatch = trie.firstMatch("bc d");
|
||||
checkEmit( firstMatch, 0, 2, "bcd" );
|
||||
checkEmit(firstMatch, 0, 3, "bcd");
|
||||
}
|
||||
|
||||
|
||||
@Test(expected = AssertionError.class)
|
||||
public void test_NullInputTextFirstMatch() {
|
||||
|
||||
final Trie trie = trie(ALPHABET);
|
||||
final Emit firstMatch = trie.firstMatch(null);
|
||||
assertNull(firstMatch);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTestAndStopOnHit() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( PRONOUNS )
|
||||
.stopOnHit()
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().addKeywords(PRONOUNS).stopOnHit().build();
|
||||
final Collection<Emit> emits = trie.parseText("ushers");
|
||||
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 2, 3, "he");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTestStopOnHitSkipOne() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeywords( PRONOUNS )
|
||||
.stopOnHit()
|
||||
.build();
|
||||
|
||||
final StatefulEmitHandler testEmitHandler =
|
||||
new AbstractStatefulEmitHandler() {
|
||||
final Trie trie = Trie.builder().addKeywords(PRONOUNS).stopOnHit().build();
|
||||
|
||||
final StatefulEmitHandler testEmitHandler = new AbstractStatefulEmitHandler() {
|
||||
boolean first = true;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean emit(final Emit emit) {
|
||||
|
||||
if (first) {
|
||||
// return false for the first element
|
||||
first = false;
|
||||
@ -147,8 +175,10 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 1, 3, "she");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTest() {
|
||||
|
||||
final Trie trie = trie(PRONOUNS);
|
||||
final Collection<Emit> emits = trie.parseText("ushers");
|
||||
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
@ -158,15 +188,11 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTestWithCapitalKeywords() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeyword( "HERS" )
|
||||
.addKeyword( "HIS" )
|
||||
.addKeyword( "SHE" )
|
||||
.addKeyword( "HE" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreCase().addKeyword("HERS").addKeyword("HIS").addKeyword("SHE").addKeyword("HE").build();
|
||||
final Collection<Emit> emits = trie.parseText("ushers");
|
||||
assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
@ -175,15 +201,19 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "HERS");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTestFirstMatch() {
|
||||
|
||||
final Trie trie = trie(PRONOUNS);
|
||||
final Emit firstMatch = trie.firstMatch("ushers");
|
||||
checkEmit(firstMatch, 2, 3, "he");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UshersTestByCallback() {
|
||||
|
||||
final Trie trie = trie(PRONOUNS);
|
||||
final List<Emit> emits = new ArrayList<>();
|
||||
final EmitHandler emitHandler = emit -> {
|
||||
@ -198,26 +228,31 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 2, 5, "hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_MisleadingTest() {
|
||||
|
||||
final Trie trie = trie("hers");
|
||||
final Collection<Emit> emits = trie.parseText("h he her hers");
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 9, 12, "hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_MisleadingTestFirstMatch() {
|
||||
|
||||
final Trie trie = trie("hers");
|
||||
final Emit firstMatch = trie.firstMatch("h he her hers");
|
||||
checkEmit(firstMatch, 9, 12, "hers");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_Recipes() {
|
||||
|
||||
final Trie trie = trie(FOOD);
|
||||
final Collection<Emit> emits = trie.parseText(
|
||||
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
|
||||
final Collection<Emit> emits = trie.parseText("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 2, 12, "cauliflower");
|
||||
checkEmit(iterator.next(), 18, 25, "tomatoes");
|
||||
@ -225,21 +260,21 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 51, 58, "broccoli");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_RecipesFirstMatch() {
|
||||
|
||||
final Trie trie = trie(FOOD);
|
||||
final Emit firstMatch = trie.firstMatch(
|
||||
"2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli" );
|
||||
final Emit firstMatch = trie.firstMatch("2 cauliflowers, 3 tomatoes, 4 slices of veal, 100g broccoli");
|
||||
|
||||
checkEmit(firstMatch, 2, 12, "cauliflower");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_LongAndShortOverlappingMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.addKeyword( "he" )
|
||||
.addKeyword( "hehehehe" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().addKeyword("he").addKeyword("hehehehe").build();
|
||||
final Collection<Emit> emits = trie.parseText("hehehehehe");
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 0, 1, "he");
|
||||
@ -251,14 +286,11 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 2, 9, "hehehehe");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_NonOverlapping() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreOverlaps().addKeyword("ab").addKeyword("cba").addKeyword("ababc").build();
|
||||
final Collection<Emit> emits = trie.parseText("ababcbab");
|
||||
assertEquals(2, emits.size());
|
||||
final Iterator<Emit> iterator = emits.iterator();
|
||||
@ -267,32 +299,28 @@ public class TrieTest {
|
||||
checkEmit(iterator.next(), 6, 7, "ab");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_NonOverlappingFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreOverlaps().addKeyword("ab").addKeyword("cba").addKeyword("ababc").build();
|
||||
final Emit firstMatch = trie.firstMatch("ababcbab");
|
||||
|
||||
checkEmit(firstMatch, 0, 4, "ababc");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_ContainsMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( "ab" )
|
||||
.addKeyword( "cba" )
|
||||
.addKeyword( "ababc" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreOverlaps().addKeyword("ab").addKeyword("cba").addKeyword("ababc").build();
|
||||
assertTrue(trie.containsMatch("ababcbab"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_StartOfChurchillSpeech() {
|
||||
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword("T")
|
||||
@ -310,37 +338,34 @@ public class TrieTest {
|
||||
assertEquals(2, emits.size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_PartialMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( "sugar" )
|
||||
.build();
|
||||
final Collection<Emit> emits = trie.parseText(
|
||||
"sugarcane sugarcane sugar canesugar" ); // left, middle, right test
|
||||
|
||||
final Trie trie = Trie.builder().onlyWholeWords().addKeyword("sugar").build();
|
||||
final Collection<Emit> emits = trie.parseText("sugarcane sugarcane sugar canesugar"); // left, middle, right test
|
||||
assertEquals(1, emits.size()); // Match must not be made
|
||||
checkEmit(emits.iterator().next(), 20, 24, "sugar");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_PartialMatchFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( "sugar" )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().onlyWholeWords().addKeyword("sugar").build();
|
||||
|
||||
// left, middle, right test
|
||||
final Emit firstMatch =
|
||||
trie.firstMatch( "sugarcane sugarcane sugar canesugar" );
|
||||
final Emit firstMatch = trie.firstMatch("sugarcane sugarcane sugar canesugar");
|
||||
|
||||
checkEmit(firstMatch, 20, 24, "sugar");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_TokenizeFullSentence() {
|
||||
|
||||
final Trie trie = trie(GREEK_LETTERS);
|
||||
final Collection<Token> tokens = trie.tokenize(
|
||||
"Hear: Alpha team first, Beta from the rear, Gamma in reserve" );
|
||||
final Collection<Token> tokens = trie.tokenize("Hear: Alpha team first, Beta from the rear, Gamma in reserve");
|
||||
assertEquals(7, tokens.size());
|
||||
final Iterator<Token> tokensIt = tokens.iterator();
|
||||
assertEquals("Hear: ", tokensIt.next().getFragment());
|
||||
@ -352,16 +377,14 @@ public class TrieTest {
|
||||
assertEquals(" in reserve", tokensIt.next().getFragment());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test boundary check with case-insensitive matches with whole words.
|
||||
*/
|
||||
@Test
|
||||
public void test_StringIndexOutOfBoundsException() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords().addKeywords(UNICODE).build();
|
||||
final Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
assertEquals(4, emits.size()); // Match must not be made
|
||||
final Iterator<Emit> it = emits.iterator();
|
||||
@ -371,12 +394,11 @@ public class TrieTest {
|
||||
checkEmit(it.next(), 19, 23, "börkü");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_IgnoreCase() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreCase().addKeywords(UNICODE).build();
|
||||
final Collection<Emit> emits = trie.parseText("TurninG OnCe AgAiN BÖRKÜ");
|
||||
assertEquals(4, emits.size()); // Match must not be made
|
||||
final Iterator<Emit> it = emits.iterator();
|
||||
@ -386,24 +408,26 @@ public class TrieTest {
|
||||
checkEmit(it.next(), 19, 23, "börkü");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_IgnoreCaseFirstMatch() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.addKeywords( UNICODE )
|
||||
.build();
|
||||
|
||||
final Trie trie = Trie.builder().ignoreCase().addKeywords(UNICODE).build();
|
||||
final Emit firstMatch = trie.firstMatch("TurninG OnCe AgAiN BÖRKÜ");
|
||||
|
||||
checkEmit(firstMatch, 0, 6, "turning");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_TokenizeTokensInSequence() {
|
||||
|
||||
final Trie trie = trie(GREEK_LETTERS);
|
||||
final Collection<Token> tokens = trie.tokenize("Alpha Beta Gamma");
|
||||
assertEquals(5, tokens.size());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fix adding a word of size 0 ("") as a dictionary. A bug in the dictionary
|
||||
* parsing code (at end of line) caused it to generate words of 0 length,
|
||||
@ -412,32 +436,23 @@ public class TrieTest {
|
||||
*/
|
||||
@Test
|
||||
public void test_ZeroLength() {
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreOverlaps()
|
||||
.onlyWholeWords()
|
||||
.ignoreCase()
|
||||
.addKeyword( "" )
|
||||
.build();
|
||||
trie.tokenize(
|
||||
"Try a natural lip and subtle bronzer to keep all the focus on those " +
|
||||
"big bright eyes with NARS Eyeshadow Duo in Rated R And the " +
|
||||
"winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic " +
|
||||
"Peel Kit ($25 amazon.com) won most-appealing peel." );
|
||||
|
||||
final Trie trie = Trie.builder().ignoreOverlaps().onlyWholeWords().ignoreCase().addKeyword("").build();
|
||||
trie.tokenize("Try a natural lip and subtle bronzer to keep all the focus on those "
|
||||
+ "big bright eyes with NARS Eyeshadow Duo in Rated R And the "
|
||||
+ "winner is... Boots No7 Advanced Renewal Anti-ageing Glycolic "
|
||||
+ "Peel Kit ($25 amazon.com) won most-appealing peel.");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_Emit_PunctuatedKeyword_AllOffsetsFound() {
|
||||
|
||||
final String keyword = "{{var}}";
|
||||
final int len = keyword.length() - 1;
|
||||
final Trie trie = builder()
|
||||
.ignoreOverlaps()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
final Trie trie = builder().ignoreOverlaps().addKeyword(keyword).build();
|
||||
|
||||
final Collection<Emit> emits = trie.parseText(
|
||||
format( "__%s__ **%s** {{%s}} %s%s",
|
||||
keyword, keyword, keyword, keyword, keyword )
|
||||
);
|
||||
final Collection<Emit> emits = trie.parseText(format("__%s__ **%s** {{%s}} %s%s", keyword, keyword, keyword, keyword, keyword));
|
||||
|
||||
assertEquals(5, emits.size());
|
||||
final Iterator<Emit> it = emits.iterator();
|
||||
@ -449,6 +464,7 @@ public class TrieTest {
|
||||
checkEmit(it.next(), 43, 43 + len, keyword);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Notice the capital I with a dot. The code used to compute the offsets
|
||||
* at (6, 9), which caused {@link Trie#tokenize(String)} to crash because
|
||||
@ -461,17 +477,15 @@ public class TrieTest {
|
||||
// Unicode, which was read by AC as a 2-byte char
|
||||
final String target = "LİKE THIS";
|
||||
// Java does it the right way
|
||||
assertEquals( "THIS",
|
||||
target.substring( 5, 9 ) );
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords()
|
||||
.addKeyword( "this" )
|
||||
.build();
|
||||
assertEquals("THIS", target.substring(5, 9));
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords().addKeyword("this").build();
|
||||
final Collection<Emit> emits = trie.parseText(target);
|
||||
assertEquals(1, emits.size());
|
||||
final Iterator<Emit> it = emits.iterator();
|
||||
checkEmit(it.next(), 5, 8, "this");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Notice the capital I with a dot. The code used to compute the offsets
|
||||
* at (6, 9), which caused {@link Trie#tokenize(String)} to crash because
|
||||
@ -483,32 +497,27 @@ public class TrieTest {
|
||||
// The second character ('İ') is
|
||||
// Unicode, which was read by AC as a 2-byte char
|
||||
final String target = "LİKE THIS";
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( "this" )
|
||||
.build();
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords().addKeyword("this").build();
|
||||
// Java does it the right way
|
||||
assertEquals( "THIS",
|
||||
target.substring( 5, 9 ) );
|
||||
assertEquals("THIS", target.substring(5, 9));
|
||||
final Emit firstMatch = trie.firstMatch(target);
|
||||
checkEmit(firstMatch, 5, 8, "this");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_PartialMatchWhiteSpaces() {
|
||||
final Trie trie = Trie.builder()
|
||||
.onlyWholeWordsWhiteSpaceSeparated()
|
||||
.addKeyword( "#sugar-123" )
|
||||
.build();
|
||||
final Collection<Emit> emits =
|
||||
trie.parseText( "#sugar-123 #sugar-1234" ); // left, middle, right test
|
||||
|
||||
final Trie trie = Trie.builder().onlyWholeWordsWhiteSpaceSeparated().addKeyword("#sugar-123").build();
|
||||
final Collection<Emit> emits = trie.parseText("#sugar-123 #sugar-1234"); // left, middle, right test
|
||||
assertEquals(1, emits.size()); // Match must not be made
|
||||
checkEmit(emits.iterator().next(), 0, 9, "#sugar-123");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_LargeString() {
|
||||
|
||||
final int interval = 100;
|
||||
final int textSize = 1000000;
|
||||
final String keyword = FOOD[1];
|
||||
@ -516,16 +525,14 @@ public class TrieTest {
|
||||
|
||||
injectKeyword(text, keyword, interval);
|
||||
|
||||
final Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
final Trie trie = Trie.builder().onlyWholeWords().addKeyword(keyword).build();
|
||||
|
||||
final Collection<Emit> emits = trie.parseText(text);
|
||||
|
||||
assertEquals(textSize / interval, emits.size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test_UnicodeIssueBug39ReportedByHumanzz() {
|
||||
// Problem: "İ".length => 1, "İ".toLowerCase().length => 2. This causes
|
||||
@ -536,40 +543,31 @@ public class TrieTest {
|
||||
// ('İ') => 'i' + make sure
|
||||
// that emit gets the properly cased keyword.
|
||||
final String upperLengthOne = "İnt";
|
||||
final Trie trie = Trie.builder()
|
||||
.ignoreCase()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( upperLengthOne )
|
||||
.build();
|
||||
final Trie trie = Trie.builder().ignoreCase().onlyWholeWords().addKeyword(upperLengthOne).build();
|
||||
final Collection<Emit> emits = trie.parseText("İnt is good");
|
||||
assertEquals(1, emits.size());
|
||||
checkEmit(emits.iterator().next(), 0, 2, upperLengthOne);
|
||||
}
|
||||
|
||||
|
||||
@Test(timeout = 30_000)
|
||||
public void test_ParallelSearch() throws InterruptedException {
|
||||
|
||||
final int interval = 100;
|
||||
final int textSize = 1000000;
|
||||
final String keyword = FOOD[1];
|
||||
final StringBuilder matchingText = randomNumbers(textSize);
|
||||
injectKeyword(matchingText, keyword, interval);
|
||||
final StringBuilder nonMatchingText = randomNumbers(textSize);
|
||||
injectKeyword( nonMatchingText,
|
||||
keyword.substring( 0, keyword.length() - 1 ),
|
||||
interval );
|
||||
injectKeyword(nonMatchingText, keyword.substring(0, keyword.length() - 1), interval);
|
||||
|
||||
final Trie trie = Trie.builder()
|
||||
.onlyWholeWords()
|
||||
.addKeyword( keyword )
|
||||
.build();
|
||||
final Trie trie = Trie.builder().onlyWholeWords().addKeyword(keyword).build();
|
||||
|
||||
final AtomicInteger matchCount = new AtomicInteger(0);
|
||||
final Runnable matchingTask = () -> matchCount.set(
|
||||
trie.parseText( matchingText ).size() );
|
||||
final Runnable matchingTask = () -> matchCount.set(trie.parseText(matchingText).size());
|
||||
|
||||
final AtomicInteger nonMatchCount = new AtomicInteger(0);
|
||||
final Runnable nonMatchingTask = () -> nonMatchCount.set( trie.parseText(
|
||||
nonMatchingText ).size() );
|
||||
final Runnable nonMatchingTask = () -> nonMatchCount.set(trie.parseText(nonMatchingText).size());
|
||||
final Thread matchingThread = new Thread(matchingTask);
|
||||
final Thread nonMatchingThread = new Thread(nonMatchingTask);
|
||||
matchingThread.start();
|
||||
@ -581,14 +579,12 @@ public class TrieTest {
|
||||
assertEquals(0, nonMatchCount.get());
|
||||
}
|
||||
|
||||
private void checkEmit( Emit next, int expectedStart, int expectedEnd,
|
||||
String expectedKeyword ) {
|
||||
assertEquals( "Start of emit should have been " + expectedStart,
|
||||
expectedStart,
|
||||
next.getStart() );
|
||||
assertEquals( "End of emit should have been " + expectedEnd,
|
||||
expectedEnd,
|
||||
next.getEnd() );
|
||||
|
||||
private void checkEmit(Emit next, int expectedStart, int expectedEnd, String expectedKeyword) {
|
||||
|
||||
assertEquals("Start of emit should have been " + expectedStart, expectedStart, next.getStart());
|
||||
assertEquals("End of emit should have been " + expectedEnd, expectedEnd, next.getEnd());
|
||||
assertEquals(expectedKeyword, next.getKeyword());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user