#53 Allow to ack emits

Also allow use of isOnlyWholeWords, isOnlyWholeWordsWhiteSpaceSeparated
and isAllowOverlaps using a StatefulEmitHandler
This commit is contained in:
Crystark 2017-05-15 16:02:02 +02:00
parent f3baace342
commit ea88eb987a
5 changed files with 61 additions and 10 deletions

View File

@ -4,6 +4,7 @@ import org.ahocorasick.interval.IntervalTree;
import org.ahocorasick.interval.Intervalable;
import org.ahocorasick.trie.handler.DefaultEmitHandler;
import org.ahocorasick.trie.handler.EmitHandler;
import org.ahocorasick.trie.handler.StatefulEmitHandler;
import java.util.ArrayList;
import java.util.Collection;
@ -103,10 +104,13 @@ public class Trie {
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
}
@SuppressWarnings("unchecked")
public Collection<Emit> parseText(final CharSequence text) {
final DefaultEmitHandler emitHandler = new DefaultEmitHandler();
parseText(text, emitHandler);
return parseText(text, new DefaultEmitHandler());
}
@SuppressWarnings("unchecked")
public Collection<Emit> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
parseText(text, (EmitHandler) emitHandler);
final List<Emit> collectedEmits = emitHandler.getEmits();
@ -281,8 +285,8 @@ public class Trie {
// TODO: The check for empty might be superfluous.
if (emits != null && !emits.isEmpty()) {
for (final String emit : emits) {
emitHandler.emit(new Emit(position - emit.length() + 1, position, emit));
emitted = true;
emitted = emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)) || emitted;
if(emitted && trieConfig.isStopOnHit()) break;
}
}

View File

@ -5,15 +5,17 @@ import org.ahocorasick.trie.Emit;
import java.util.ArrayList;
import java.util.List;
public class DefaultEmitHandler implements EmitHandler {
public class DefaultEmitHandler implements StatefulEmitHandler {
private final List<Emit> emits = new ArrayList<>();
@Override
public void emit(final Emit emit) {
public boolean emit(final Emit emit) {
this.emits.add(emit);
return true;
}
@Override
public List<Emit> getEmits() {
return this.emits;
}

View File

@ -3,5 +3,5 @@ package org.ahocorasick.trie.handler;
import org.ahocorasick.trie.Emit;
public interface EmitHandler {
void emit(Emit emit);
boolean emit(Emit emit);
}

View File

@ -0,0 +1,9 @@
package org.ahocorasick.trie.handler;
import java.util.List;
import org.ahocorasick.trie.Emit;
public interface StatefulEmitHandler extends EmitHandler {
List<Emit> getEmits();
}

View File

@ -1,6 +1,7 @@
package org.ahocorasick.trie;
import org.ahocorasick.trie.handler.EmitHandler;
import org.ahocorasick.trie.handler.StatefulEmitHandler;
import org.junit.Test;
import java.util.ArrayList;
@ -97,9 +98,43 @@ public class TrieTest {
.stopOnHit()
.build();
Collection<Emit> emits = trie.parseText("ushers");
assertEquals(2, emits.size()); // she @ 3, he @ 3, hers @ 5
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<Emit> iterator = emits.iterator();
checkEmit(iterator.next(), 2, 3, "he");
}
@Test
public void ushersTestStopOnHitSkipOne() {
Trie trie = Trie.builder()
.addKeywords(PRONOUNS)
.stopOnHit()
.build();
StatefulEmitHandler testEmitHandler = new StatefulEmitHandler() {
private final List<Emit> emits = new ArrayList<>();
boolean first = true;
@Override
public boolean emit(final Emit emit) {
if(first) {
// return false for the first element
first = false;
return false;
}
this.emits.add(emit);
return true;
}
@Override
public List<Emit> getEmits() {
return this.emits;
}
};
trie.parseText("ushers", testEmitHandler);
Collection<Emit> emits = testEmitHandler.getEmits();
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
Iterator<Emit> iterator = emits.iterator();
checkEmit(iterator.next(), 1, 3, "she");
}
@ -152,8 +187,9 @@ public class TrieTest {
EmitHandler emitHandler = new EmitHandler() {
@Override
public void emit(Emit emit) {
public boolean emit(Emit emit) {
emits.add(emit);
return true;
}
};
trie.parseText("ushers", emitHandler);