Stop Trie#removePartialMatches() from being expensive #61

This changes the running time of `Trie#removePartialMatches()` from something that is subquadratic time or worse (I think n^3) to a running time that is linear.
This commit is contained in:
Luke Butters 2017-11-01 16:10:53 +11:00
parent 5acb073d06
commit 773ff39e48
3 changed files with 128 additions and 10 deletions

View File

@ -5,6 +5,8 @@ import org.ahocorasick.interval.Intervalable;
import org.ahocorasick.trie.handler.DefaultEmitHandler;
import org.ahocorasick.trie.handler.EmitHandler;
import org.ahocorasick.trie.handler.StatefulEmitHandler;
import org.ahocorasick.util.ListElementRemoval;
import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate;
import java.util.ArrayList;
import java.util.Collection;
@ -207,17 +209,17 @@ public class Trie {
}
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
final List<Emit> removeEmits = new ArrayList<>();
for (final Emit emit : collectedEmits) {
if (isPartialMatch(searchText, emit)) {
removeEmits.add(emit);
final RemoveElementPredicate<Emit> predicate = new RemoveElementPredicate<Emit>() {
@Override
public boolean remove(Emit emit) {
return isPartialMatch(searchText, emit);
}
}
for (final Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
};
ListElementRemoval.removeIf(collectedEmits, predicate);
}
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {

View File

@ -0,0 +1,51 @@
package org.ahocorasick.util;
import java.util.ArrayList;
import java.util.List;
/**
* Helps removes elements from a list in a efficient way
*
* <p>Removing elements from an ArrayList in a naive way can lead to O(n^3)
* running time. If the algorithm first creates a list of all the elements
* to remove, then we for each element in this list (assume n elements) we look
* for in element in the list (against n elements) and when found we need to remove
* the element and move the elements to the right one to the left the size of this
* operation is at worst n hence O(n^3).</p>
*
* <p>This basically avoids that by making a new list and copying over only elements
* we want to keep, we then clear the given list and all of the elements this gives us
* (for ArrayList) O(n) running time.</p>
*
* <p>The performance of this has not been thoroughly tested for linked list but
* it probably is not too bad.</p>
*
* <p>This can be completely removed in java 8 as the List#removeIf() method can be used instead
* as this already is optimised for each list implementation.
*
*/
public class ListElementRemoval {
public static interface RemoveElementPredicate<T> {
public boolean remove(T t);
}
/**
* Removes all elements from the list matching the given predicate.
*
* @param list
* @param predicate
*/
public static <T> void removeIf(final List<T> list, final RemoveElementPredicate<T> predicate) {
final List<T> newList = new ArrayList<>(list.size());
for(final T element : list) {
if (!predicate.remove(element)) {
newList.add(element);
}
}
list.clear();
list.addAll(newList);
}
}

View File

@ -0,0 +1,65 @@
package org.ahocorasick.util;
import java.util.ArrayList;
import java.util.List;
import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate;
import org.junit.Test;
import junit.framework.Assert;
import static java.util.Arrays.asList;
public class ListElementRemovalTest {
@Test
public void removeNone() {
List<String> list = new ArrayList<>(asList("a", "b", "c"));
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
@Override
public boolean remove(String t) {
return false;
}
};
ListElementRemoval.removeIf(list, matchNothing);
Assert.assertEquals(3, list.size());
}
@Test
public void removeAll() {
List<String> list = new ArrayList<>(asList("a", "b", "c"));
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
@Override
public boolean remove(String t) {
return true;
}
};
ListElementRemoval.removeIf(list, matchNothing);
Assert.assertEquals(0, list.size());
}
@Test
public void removeSome() {
List<String> list = new ArrayList<>(asList("a", "b", "c"));
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
@Override
public boolean remove(String t) {
return t.equals("a") || t.endsWith("c");
}
};
ListElementRemoval.removeIf(list, matchNothing);
Assert.assertEquals(1, list.size());
Assert.assertEquals("b", list.get(0));
}
}