diff --git a/pom.xml b/pom.xml index 9d964e4..ed7c40b 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.ahocorasick ahocorasick - 0.3.1-SNAPSHOT + 0.3.1-heartbeat jar Aho-CoraSick algorithm for efficient string matching Java library for efficient string matching against a large set of keywords @@ -104,4 +104,4 @@ - \ No newline at end of file + diff --git a/src/main/java/org/ahocorasick/trie/CharacterTransition.java b/src/main/java/org/ahocorasick/trie/CharacterTransition.java new file mode 100644 index 0000000..5b19499 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/CharacterTransition.java @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Rogue Wave Software. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.ahocorasick.trie; + +/** + * Model transitions on characters + * @author doug.lovell + */ +class CharacterTransition extends Transition { + + /** + * Create a character transition from a position in the source string + * @param c character to match + * @param start positon of character in source string + */ + public CharacterTransition(Character c, int start) { + super(c, start, 1); + } + + /** + * Create a character transition without regard for position + * @param c character to match + */ + public CharacterTransition(Character c) { + this(c, 0); + } + +} diff --git a/src/main/java/org/ahocorasick/trie/Emit.java b/src/main/java/org/ahocorasick/trie/Emit.java index 60c1f9e..20d73ff 100644 --- a/src/main/java/org/ahocorasick/trie/Emit.java +++ b/src/main/java/org/ahocorasick/trie/Emit.java @@ -11,11 +11,11 @@ public class Emit extends Interval implements Intervalable { super(start, end); this.keyword = keyword; } - + public String getKeyword() { return this.keyword; } - + @Override public String toString() { return super.toString() + "=" + this.keyword; diff --git a/src/main/java/org/ahocorasick/trie/FragmentToken.java b/src/main/java/org/ahocorasick/trie/FragmentToken.java index f0c899f..be77a16 100644 --- a/src/main/java/org/ahocorasick/trie/FragmentToken.java +++ b/src/main/java/org/ahocorasick/trie/FragmentToken.java @@ -2,16 +2,8 @@ package org.ahocorasick.trie; public class FragmentToken extends Token { - private boolean whiteSpace; - - public FragmentToken(String fragment) { + public FragmentToken(final String fragment) { super(fragment); - this.whiteSpace = true; - for (int position = 0; position < fragment.length(); position++) { - if (!Character.isWhitespace(fragment.charAt(position))) { - whiteSpace = false; - } - } } @Override @@ -24,9 +16,4 @@ public class FragmentToken extends Token { return null; } - @Override - public boolean isWhiteSpace() { - return whiteSpace; - } - } diff --git a/src/main/java/org/ahocorasick/trie/Keyword.java b/src/main/java/org/ahocorasick/trie/Keyword.java new file mode 100644 index 0000000..dcfcaf9 --- /dev/null +++ b/src/main/java/org/ahocorasick/trie/Keyword.java @@ -0,0 +1,60 @@ +/* + * Copyright 2015 Rogue Wave Software. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.ahocorasick.trie; + +/** + * Keyword encapsulates part of a potential match along with the count + * of prior source tokens consumed to create the potential match. + * + * @author doug.lovell + */ +public class Keyword implements Comparable { + private final String text; + private final int depth; + + /** + * Create portion of potential match + * @param text content that matches + * @param depth count of prior source tokens that comprise the match + */ + public Keyword(final String text, final int depth) { + this.text = text; + this.depth = depth; + } + + public int getDepth() { + return depth; + } + + public String getText() { + return text; + } + + public String toString() { + final String t = getText(); + final int d = getDepth(); + + return "Keyword '" + t + "' at depth " + d; + } + + @Override + public int compareTo(final Object o) { + if (o instanceof Keyword) { + return text.compareTo(((Keyword) o).text); + } + throw new IllegalArgumentException("Only supports comparison with other keywords"); + } +} diff --git a/src/main/java/org/ahocorasick/trie/MatchToken.java b/src/main/java/org/ahocorasick/trie/MatchToken.java index 5becc64..8ec9b0d 100644 --- a/src/main/java/org/ahocorasick/trie/MatchToken.java +++ b/src/main/java/org/ahocorasick/trie/MatchToken.java @@ -2,19 +2,11 @@ package org.ahocorasick.trie; public class MatchToken extends Token { - private final boolean wholeWord; - private final Emit emit; - public MatchToken(String fragment, Emit emit, boolean wholeWord) { + public MatchToken(final String fragment, final Emit emit) { super(fragment); this.emit = emit; - this.wholeWord = wholeWord; - } - - @Override - public boolean isWholeWord() { - return wholeWord; } @Override diff --git a/src/main/java/org/ahocorasick/trie/State.java b/src/main/java/org/ahocorasick/trie/State.java index 82d167b..01349aa 100644 --- a/src/main/java/org/ahocorasick/trie/State.java +++ b/src/main/java/org/ahocorasick/trie/State.java @@ -10,8 +10,8 @@ import java.util.*; *

* *