#53 Allow to ack emits
Also allow use of isOnlyWholeWords, isOnlyWholeWordsWhiteSpaceSeparated and isAllowOverlaps using a StatefulEmitHandler
This commit is contained in:
parent
f3baace342
commit
ea88eb987a
@ -4,6 +4,7 @@ import org.ahocorasick.interval.IntervalTree;
|
||||
import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import org.ahocorasick.trie.handler.StatefulEmitHandler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
@ -103,10 +104,13 @@ public class Trie {
|
||||
return new MatchToken(text.substring(emit.getStart(), emit.getEnd() + 1), emit);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection<Emit> parseText(final CharSequence text) {
|
||||
final DefaultEmitHandler emitHandler = new DefaultEmitHandler();
|
||||
parseText(text, emitHandler);
|
||||
return parseText(text, new DefaultEmitHandler());
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection<Emit> parseText(final CharSequence text, final StatefulEmitHandler emitHandler) {
|
||||
parseText(text, (EmitHandler) emitHandler);
|
||||
|
||||
final List<Emit> collectedEmits = emitHandler.getEmits();
|
||||
|
||||
@ -281,8 +285,8 @@ public class Trie {
|
||||
// TODO: The check for empty might be superfluous.
|
||||
if (emits != null && !emits.isEmpty()) {
|
||||
for (final String emit : emits) {
|
||||
emitHandler.emit(new Emit(position - emit.length() + 1, position, emit));
|
||||
emitted = true;
|
||||
emitted = emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)) || emitted;
|
||||
if(emitted && trieConfig.isStopOnHit()) break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -5,15 +5,17 @@ import org.ahocorasick.trie.Emit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class DefaultEmitHandler implements EmitHandler {
|
||||
public class DefaultEmitHandler implements StatefulEmitHandler {
|
||||
|
||||
private final List<Emit> emits = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public void emit(final Emit emit) {
|
||||
public boolean emit(final Emit emit) {
|
||||
this.emits.add(emit);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Emit> getEmits() {
|
||||
return this.emits;
|
||||
}
|
||||
|
||||
@ -3,5 +3,5 @@ package org.ahocorasick.trie.handler;
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
public interface EmitHandler {
|
||||
void emit(Emit emit);
|
||||
boolean emit(Emit emit);
|
||||
}
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
package org.ahocorasick.trie.handler;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.ahocorasick.trie.Emit;
|
||||
|
||||
public interface StatefulEmitHandler extends EmitHandler {
|
||||
List<Emit> getEmits();
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
package org.ahocorasick.trie;
|
||||
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import org.ahocorasick.trie.handler.StatefulEmitHandler;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -97,9 +98,43 @@ public class TrieTest {
|
||||
.stopOnHit()
|
||||
.build();
|
||||
Collection<Emit> emits = trie.parseText("ushers");
|
||||
assertEquals(2, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 2, 3, "he");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ushersTestStopOnHitSkipOne() {
|
||||
Trie trie = Trie.builder()
|
||||
.addKeywords(PRONOUNS)
|
||||
.stopOnHit()
|
||||
.build();
|
||||
|
||||
StatefulEmitHandler testEmitHandler = new StatefulEmitHandler() {
|
||||
private final List<Emit> emits = new ArrayList<>();
|
||||
boolean first = true;
|
||||
|
||||
@Override
|
||||
public boolean emit(final Emit emit) {
|
||||
if(first) {
|
||||
// return false for the first element
|
||||
first = false;
|
||||
return false;
|
||||
}
|
||||
this.emits.add(emit);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Emit> getEmits() {
|
||||
return this.emits;
|
||||
}
|
||||
};
|
||||
|
||||
trie.parseText("ushers", testEmitHandler);
|
||||
Collection<Emit> emits = testEmitHandler.getEmits();
|
||||
assertEquals(1, emits.size()); // she @ 3, he @ 3, hers @ 5
|
||||
Iterator<Emit> iterator = emits.iterator();
|
||||
checkEmit(iterator.next(), 1, 3, "she");
|
||||
}
|
||||
|
||||
@ -152,8 +187,9 @@ public class TrieTest {
|
||||
EmitHandler emitHandler = new EmitHandler() {
|
||||
|
||||
@Override
|
||||
public void emit(Emit emit) {
|
||||
public boolean emit(Emit emit) {
|
||||
emits.add(emit);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
trie.parseText("ushers", emitHandler);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user