diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 155698b..ab4e962 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -2,6 +2,9 @@ package org.ahocorasick.trie; import org.ahocorasick.interval.IntervalTree; import org.ahocorasick.interval.Intervalable; +import org.ahocorasick.trie.configuration.ParseConfiguration; +import org.ahocorasick.trie.handler.DefaultEmitHandler; +import org.ahocorasick.trie.handler.EmitHandler; import java.util.ArrayList; import java.util.Collection; @@ -87,19 +90,12 @@ public class Trie { @SuppressWarnings("unchecked") public Collection parseText(String text) { - checkForConstructedFailureStates(); + DefaultEmitHandler emitHandler = new DefaultEmitHandler(); + parseText(new ParseConfiguration() + .setEmitHandler(emitHandler) + .setText(text)); - int position = 0; - State currentState = this.rootState; - List collectedEmits = new ArrayList(); - for (Character character : text.toCharArray()) { - if (trieConfig.isCaseInsensitive()) { - character = Character.toLowerCase(character); - } - currentState = getState(currentState, character); - storeEmits(position, currentState, collectedEmits); - position++; - } + List collectedEmits = emitHandler.getEmits(); if (trieConfig.isOnlyWholeWords()) { removePartialMatches(text, collectedEmits); @@ -113,6 +109,22 @@ public class Trie { return collectedEmits; } + public void parseText(ParseConfiguration parseConfiguration) { + checkForConstructedFailureStates(); + + int position = 0; + State currentState = this.rootState; + for (Character character : parseConfiguration) { + if (trieConfig.isCaseInsensitive()) { + character = Character.toLowerCase(character); + } + currentState = getState(currentState, character); + storeEmits(position, currentState, parseConfiguration.getEmitHandler()); + position++; + } + + } + private void removePartialMatches(String searchText, List collectedEmits) { long size = searchText.length(); List removeEmits = new ArrayList(); @@ -175,11 +187,11 @@ public class Trie { } } - private void storeEmits(int position, State currentState, List collectedEmits) { + private void storeEmits(int position, State currentState, EmitHandler emitHandler) { Collection emits = currentState.emit(); if (emits != null && !emits.isEmpty()) { for (String emit : emits) { - collectedEmits.add(new Emit(position-emit.length()+1, position, emit)); + emitHandler.emit(new Emit(position - emit.length() + 1, position, emit)); } } } diff --git a/src/main/java/org/ahocorasick/trie/configuration/ParseConfiguration.java b/src/main/java/org/ahocorasick/trie/configuration/ParseConfiguration.java new file mode 100644 index 0000000..d0ffb86 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/configuration/ParseConfiguration.java @@ -0,0 +1,43 @@ +package org.ahocorasick.trie.configuration; + +import org.ahocorasick.trie.handler.EmitHandler; + +import java.io.Reader; +import java.util.Iterator; + +public class ParseConfiguration implements Iterable { + + private String text; + + private Reader reader; + + private EmitHandler emitHandler; + + public ParseConfiguration setText(String text) { + this.text = text; + return this; + } + + public ParseConfiguration setText(Reader reader) { + this.reader = reader; + return this; + } + + public ParseConfiguration setEmitHandler(EmitHandler emitHandler) { + this.emitHandler = emitHandler; + return this; + } + + public EmitHandler getEmitHandler() { + return emitHandler; + } + + @Override + public Iterator iterator() { + if (reader != null) { + return new ReaderIterator(reader); + } + return new StringIterator(text); + } + +} diff --git a/src/main/java/org/ahocorasick/trie/configuration/ReaderIterator.java b/src/main/java/org/ahocorasick/trie/configuration/ReaderIterator.java new file mode 100644 index 0000000..e24ebd1 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/configuration/ReaderIterator.java @@ -0,0 +1,44 @@ +package org.ahocorasick.trie.configuration; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.util.Iterator; + +public class ReaderIterator implements Iterator { + + private Reader reader; + + private int readCharacter; + + public ReaderIterator(Reader reader) { + this.reader = reader; + readIntoBuffer(); + } + + private void readIntoBuffer() { + try { + readCharacter = reader.read(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean hasNext() { + return readCharacter != -1; + } + + @Override + public Character next() { + Character returnChar = (char)readCharacter; + readIntoBuffer(); + return returnChar; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/main/java/org/ahocorasick/trie/configuration/StringIterator.java b/src/main/java/org/ahocorasick/trie/configuration/StringIterator.java new file mode 100644 index 0000000..b64f995 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/configuration/StringIterator.java @@ -0,0 +1,30 @@ +package org.ahocorasick.trie.configuration; + +import java.util.Iterator; + +public class StringIterator implements Iterator { + + private int counter = 0; + + private String text; + + public StringIterator(String text) { + this.text = text; + } + + @Override + public boolean hasNext() { + return counter < text.length(); + } + + @Override + public Character next() { + return text.charAt(counter++); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + +} diff --git a/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java new file mode 100644 index 0000000..656d1e2 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/handler/DefaultEmitHandler.java @@ -0,0 +1,21 @@ +package org.ahocorasick.trie.handler; + +import org.ahocorasick.trie.Emit; + +import java.util.ArrayList; +import java.util.List; + +public class DefaultEmitHandler implements EmitHandler { + + private List emits = new ArrayList<>(); + + @Override + public void emit(Emit emit) { + this.emits.add(emit); + } + + public List getEmits() { + return this.emits; + } + +} diff --git a/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java new file mode 100644 index 0000000..74fd71e --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/handler/EmitHandler.java @@ -0,0 +1,7 @@ +package org.ahocorasick.trie.handler; + +import org.ahocorasick.trie.Emit; + +public interface EmitHandler { + void emit(Emit emit); +} diff --git a/src/test/java/org/ahocorasick/trie/TrieTest.java b/src/test/java/org/ahocorasick/trie/TrieTest.java index 6cc7ff7..a593966 100644 --- a/src/test/java/org/ahocorasick/trie/TrieTest.java +++ b/src/test/java/org/ahocorasick/trie/TrieTest.java @@ -1,9 +1,13 @@ package org.ahocorasick.trie; +import org.ahocorasick.trie.configuration.ParseConfiguration; +import org.ahocorasick.trie.handler.EmitHandler; import org.junit.Test; +import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; +import java.util.List; import static junit.framework.Assert.assertEquals; @@ -53,6 +57,30 @@ public class TrieTest { checkEmit(iterator.next(), 2, 5, "hers"); } + @Test + public void ushersTestByCallback() { + Trie trie = new Trie(); + trie.addKeyword("hers"); + trie.addKeyword("his"); + trie.addKeyword("she"); + trie.addKeyword("he"); + + final List emits = new ArrayList<>(); + EmitHandler emitHandler = new EmitHandler() { + + @Override + public void emit(Emit emit) { + emits.add(emit); + } + }; + trie.parseText(new ParseConfiguration().setText("ushers").setEmitHandler(emitHandler)); + assertEquals(3, emits.size()); // she @ 3, he @ 3, hers @ 5 + Iterator iterator = emits.iterator(); + checkEmit(iterator.next(), 2, 3, "he"); + checkEmit(iterator.next(), 1, 3, "she"); + checkEmit(iterator.next(), 2, 5, "hers"); + } + @Test public void misleadingTest() { Trie trie = new Trie(); diff --git a/src/test/java/org/ahocorasick/trie/configuration/ParseConfigurationTest.java b/src/test/java/org/ahocorasick/trie/configuration/ParseConfigurationTest.java new file mode 100644 index 0000000..600e1c0 --- /dev/null +++ b/src/test/java/org/ahocorasick/trie/configuration/ParseConfigurationTest.java @@ -0,0 +1,33 @@ +package org.ahocorasick.trie.configuration; + +import org.junit.Test; + +import java.io.IOException; +import java.io.StringReader; + +import static org.junit.Assert.assertEquals; + +public class ParseConfigurationTest { + + @Test + public void reader() throws IOException { + StringReader reader = new StringReader("hällö"); + ParseConfiguration parseConfiguration = new ParseConfiguration().setText(reader); + assertIterator(parseConfiguration); + reader.close(); + } + + @Test + public void string() throws IOException { + ParseConfiguration parseConfiguration = new ParseConfiguration().setText("hällö"); + assertIterator(parseConfiguration); + } + + private void assertIterator(ParseConfiguration parseConfiguration) { + StringBuffer text = new StringBuffer(); + for (Character character : parseConfiguration) { + text.append(character); + } + assertEquals("hällö", text.toString()); + } +}