From 773ff39e48e134a54af0bb700b0508de19513795 Mon Sep 17 00:00:00 2001 From: Luke Butters Date: Wed, 1 Nov 2017 16:10:53 +1100 Subject: [PATCH] Stop Trie#removePartialMatches() from being expensive #61 This changes the running time of `Trie#removePartialMatches()` from something that is subquadratic time or worse (I think n^3) to a running time that is linear. --- src/main/java/org/ahocorasick/trie/Trie.java | 22 ++++--- .../ahocorasick/util/ListElementRemoval.java | 51 +++++++++++++++ .../util/ListElementRemovalTest.java | 65 +++++++++++++++++++ 3 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 src/main/java/org/ahocorasick/util/ListElementRemoval.java create mode 100644 src/test/java/org/ahocorasick/util/ListElementRemovalTest.java diff --git a/src/main/java/org/ahocorasick/trie/Trie.java b/src/main/java/org/ahocorasick/trie/Trie.java index 23ba9d1..25922db 100644 --- a/src/main/java/org/ahocorasick/trie/Trie.java +++ b/src/main/java/org/ahocorasick/trie/Trie.java @@ -5,6 +5,8 @@ import org.ahocorasick.interval.Intervalable; import org.ahocorasick.trie.handler.DefaultEmitHandler; import org.ahocorasick.trie.handler.EmitHandler; import org.ahocorasick.trie.handler.StatefulEmitHandler; +import org.ahocorasick.util.ListElementRemoval; +import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate; import java.util.ArrayList; import java.util.Collection; @@ -207,17 +209,17 @@ public class Trie { } private void removePartialMatches(final CharSequence searchText, final List collectedEmits) { - final List removeEmits = new ArrayList<>(); - - for (final Emit emit : collectedEmits) { - if (isPartialMatch(searchText, emit)) { - removeEmits.add(emit); + + final RemoveElementPredicate predicate = new RemoveElementPredicate() { + + @Override + public boolean remove(Emit emit) { + return isPartialMatch(searchText, emit); } - } - - for (final Emit removeEmit : removeEmits) { - collectedEmits.remove(removeEmit); - } + + }; + + ListElementRemoval.removeIf(collectedEmits, predicate); } private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List collectedEmits) { diff --git a/src/main/java/org/ahocorasick/util/ListElementRemoval.java b/src/main/java/org/ahocorasick/util/ListElementRemoval.java new file mode 100644 index 0000000..96ec96c --- /dev/null +++ b/src/main/java/org/ahocorasick/util/ListElementRemoval.java @@ -0,0 +1,51 @@ +package org.ahocorasick.util; + +import java.util.ArrayList; +import java.util.List; + +/** + * Helps removes elements from a list in a efficient way + * + *

Removing elements from an ArrayList in a naive way can lead to O(n^3) + * running time. If the algorithm first creates a list of all the elements + * to remove, then we for each element in this list (assume n elements) we look + * for in element in the list (against n elements) and when found we need to remove + * the element and move the elements to the right one to the left the size of this + * operation is at worst n hence O(n^3).

+ * + *

This basically avoids that by making a new list and copying over only elements + * we want to keep, we then clear the given list and all of the elements this gives us + * (for ArrayList) O(n) running time.

+ * + *

The performance of this has not been thoroughly tested for linked list but + * it probably is not too bad.

+ * + *

This can be completely removed in java 8 as the List#removeIf() method can be used instead + * as this already is optimised for each list implementation. + * + */ +public class ListElementRemoval { + + public static interface RemoveElementPredicate { + public boolean remove(T t); + } + + /** + * Removes all elements from the list matching the given predicate. + * + * @param list + * @param predicate + */ + public static void removeIf(final List list, final RemoveElementPredicate predicate) { + final List newList = new ArrayList<>(list.size()); + + for(final T element : list) { + if (!predicate.remove(element)) { + newList.add(element); + } + } + + list.clear(); + list.addAll(newList); + } +} diff --git a/src/test/java/org/ahocorasick/util/ListElementRemovalTest.java b/src/test/java/org/ahocorasick/util/ListElementRemovalTest.java new file mode 100644 index 0000000..87dea7b --- /dev/null +++ b/src/test/java/org/ahocorasick/util/ListElementRemovalTest.java @@ -0,0 +1,65 @@ +package org.ahocorasick.util; + +import java.util.ArrayList; +import java.util.List; + +import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate; +import org.junit.Test; + +import junit.framework.Assert; + +import static java.util.Arrays.asList; +public class ListElementRemovalTest { + + @Test + public void removeNone() { + List list = new ArrayList<>(asList("a", "b", "c")); + RemoveElementPredicate matchNothing = new RemoveElementPredicate() { + + @Override + public boolean remove(String t) { + return false; + } + + }; + + ListElementRemoval.removeIf(list, matchNothing); + + Assert.assertEquals(3, list.size()); + } + + @Test + public void removeAll() { + List list = new ArrayList<>(asList("a", "b", "c")); + RemoveElementPredicate matchNothing = new RemoveElementPredicate() { + + @Override + public boolean remove(String t) { + return true; + } + + }; + + ListElementRemoval.removeIf(list, matchNothing); + + Assert.assertEquals(0, list.size()); + } + + @Test + public void removeSome() { + List list = new ArrayList<>(asList("a", "b", "c")); + RemoveElementPredicate matchNothing = new RemoveElementPredicate() { + + @Override + public boolean remove(String t) { + return t.equals("a") || t.endsWith("c"); + } + + }; + + ListElementRemoval.removeIf(list, matchNothing); + + Assert.assertEquals(1, list.size()); + Assert.assertEquals("b", list.get(0)); + } +}