From 1f63ae71d4f1ee258e83be8eea86242d70d563a7 Mon Sep 17 00:00:00 2001 From: Douglas Lovell Date: Wed, 28 Oct 2015 16:56:27 -0600 Subject: [PATCH] StringBuffer, StringIterator are old school --- .../ahocorasick/trie/CharacterTransition.java | 2 +- .../java/org/ahocorasick/trie/Transition.java | 2 +- src/main/java/org/ahocorasick/trie/Trie.java | 30 +++++++++++++++---- .../org/ahocorasick/trie/WordTransition.java | 2 +- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/ahocorasick/trie/CharacterTransition.java b/src/main/java/org/ahocorasick/trie/CharacterTransition.java index acff751..31bf4e8 100644 --- a/src/main/java/org/ahocorasick/trie/CharacterTransition.java +++ b/src/main/java/org/ahocorasick/trie/CharacterTransition.java @@ -26,7 +26,7 @@ class CharacterTransition extends Transition { } @Override - public void updateMatch(StringBuffer match) { + public void updateMatch(StringBuilder match) { match.append(token); } diff --git a/src/main/java/org/ahocorasick/trie/Transition.java b/src/main/java/org/ahocorasick/trie/Transition.java index 9fa374c..382213b 100644 --- a/src/main/java/org/ahocorasick/trie/Transition.java +++ b/src/main/java/org/ahocorasick/trie/Transition.java @@ -29,6 +29,6 @@ public abstract class Transition { public T transitionToken() { return token; } - public abstract void updateMatch(StringBuffer match); + public abstract void updateMatch(StringBuilder match); public abstract boolean isWordSeparator(); } diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index c8956d4..1b5e49a 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -1,5 +1,7 @@ package org.ahocorasick.trie; +import java.text.CharacterIterator; +import java.util.Arrays; import org.ahocorasick.trie.candidate.EmitCandidateFlushHandler; import org.ahocorasick.trie.candidate.EmitCandidateHolder; import org.ahocorasick.trie.candidate.NonOverlappingEmitCandidateHolder; @@ -9,6 +11,7 @@ import org.ahocorasick.trie.handler.EmitHandler; import org.ahocorasick.trie.handler.FirstMatchHandler; import java.util.Collection; +import java.util.Iterator; import java.util.Queue; import java.util.concurrent.LinkedBlockingDeque; @@ -33,24 +36,37 @@ public class Trie { } private class WordTokenizer implements KeywordTokenizer { - private final java.util.StringTokenizer st; + private final Iterator st; public WordTokenizer(String keyword) { - st = new java.util.StringTokenizer(keyword); + String[] tokens = keyword.split("\\s"); + st = Arrays.asList(tokens).iterator(); } @Override public Transition nextTransition() { - return new WordTransition(st.nextToken()); + WordTransition t = null; + if (st.hasNext()) { + t = new WordTransition(st.next()); + } + return t; } } private class CharacterTokenizer implements KeywordTokenizer { private final java.text.StringCharacterIterator ct; + private char cur; public CharacterTokenizer(String keyword) { ct = new java.text.StringCharacterIterator(keyword); + cur = ct.first(); } @Override public Transition nextTransition() { - return new CharacterTransition(ct.next()); + CharacterTransition t = null; + if (cur != CharacterIterator.DONE) { + t = new CharacterTransition(cur); + cur = ct.next(); + } + + return t; } } @@ -68,7 +84,7 @@ public class Trie { private class TokenStream { private final KeywordTokenizer kwt; private Transition lookahead; - private final StringBuffer match = new StringBuffer(); + private final StringBuilder match = new StringBuilder(); public TokenStream(KeywordTokenizer kwt) { this.kwt = kwt; @@ -159,8 +175,10 @@ public class Trie { State currentState = this.rootState; Transition tn = tknz.nextTransition(); while (tn != null) { + if (flushHandler.stop()) { + return; + } currentState = getState(currentState, tn, flushHandler); - Collection emits = currentState.emit(); for (String emit : emits) { int position = tknz.position(); diff --git a/src/main/java/org/ahocorasick/trie/WordTransition.java b/src/main/java/org/ahocorasick/trie/WordTransition.java index 335988f..1d631b5 100644 --- a/src/main/java/org/ahocorasick/trie/WordTransition.java +++ b/src/main/java/org/ahocorasick/trie/WordTransition.java @@ -26,7 +26,7 @@ public class WordTransition extends Transition { } @Override - public void updateMatch(StringBuffer match) { + public void updateMatch(StringBuilder match) { if (0 < match.length()) { match.append(' '); }