From bfaa32b20e874b4d02e5c03e6cd84a567c8e0a4b Mon Sep 17 00:00:00 2001 From: robert-bor Date: Sun, 27 Sep 2015 14:37:36 +0200 Subject: [PATCH] Issue #24 tokenize() method implementation extracted to separate class --- .../java/org/ahocorasick/trie/Tokenizer.java | 44 +++++++++++++++++++ src/main/java/org/ahocorasick/trie/Trie.java | 26 +---------- 2 files changed, 45 insertions(+), 25 deletions(-) create mode 100644 src/main/java/org/ahocorasick/trie/Tokenizer.java diff --git a/src/main/java/org/ahocorasick/trie/Tokenizer.java b/src/main/java/org/ahocorasick/trie/Tokenizer.java new file mode 100644 index 0000000..b1bac1a --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/Tokenizer.java @@ -0,0 +1,44 @@ +package org.ahocorasick.trie; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +public class Tokenizer { + + private final Collection emits; + + private final String text; + + public Tokenizer(Collection emits, String text) { + this.emits = emits; + this.text = text; + } + + public Collection tokenize() { + + List tokens = new ArrayList<>(); + int lastCollectedPosition = -1; + for (Emit emit : emits) { + if (emit.getStart() - lastCollectedPosition > 1) { + tokens.add(createFragment(emit, text, lastCollectedPosition)); + } + tokens.add(createMatch(emit, text)); + lastCollectedPosition = emit.getEnd(); + } + if (text.length() - lastCollectedPosition > 1) { + tokens.add(createFragment(null, text, lastCollectedPosition)); + } + + return tokens; + } + + private Token createFragment(Emit emit, String text, int lastCollectedPosition) { + return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); + } + + private Token createMatch(Emit emit, String text) { + return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + } + +} diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 0b62c82..ac107b5 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -42,31 +42,7 @@ public class Trie { } public Collection tokenize(String text) { - - Collection tokens = new ArrayList<>(); - - Collection collectedEmits = parseText(text); - int lastCollectedPosition = -1; - for (Emit emit : collectedEmits) { - if (emit.getStart() - lastCollectedPosition > 1) { - tokens.add(createFragment(emit, text, lastCollectedPosition)); - } - tokens.add(createMatch(emit, text)); - lastCollectedPosition = emit.getEnd(); - } - if (text.length() - lastCollectedPosition > 1) { - tokens.add(createFragment(null, text, lastCollectedPosition)); - } - - return tokens; - } - - private Token createFragment(Emit emit, String text, int lastCollectedPosition) { - return new FragmentToken(text.substring(lastCollectedPosition+1, emit == null ? text.length() : emit.getStart())); - } - - private Token createMatch(Emit emit, String text) { - return new MatchToken(text.substring(emit.getStart(), emit.getEnd()+1), emit); + return new Tokenizer(parseText(text), text).tokenize(); } @SuppressWarnings("unchecked")