Stop Trie#removePartialMatches() from being expensive #61
This changes the running time of `Trie#removePartialMatches()` from something that is subquadratic time or worse (I think n^3) to a running time that is linear.
This commit is contained in:
parent
5acb073d06
commit
773ff39e48
@ -5,6 +5,8 @@ import org.ahocorasick.interval.Intervalable;
|
||||
import org.ahocorasick.trie.handler.DefaultEmitHandler;
|
||||
import org.ahocorasick.trie.handler.EmitHandler;
|
||||
import org.ahocorasick.trie.handler.StatefulEmitHandler;
|
||||
import org.ahocorasick.util.ListElementRemoval;
|
||||
import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
@ -207,17 +209,17 @@ public class Trie {
|
||||
}
|
||||
|
||||
private void removePartialMatches(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
final List<Emit> removeEmits = new ArrayList<>();
|
||||
|
||||
for (final Emit emit : collectedEmits) {
|
||||
if (isPartialMatch(searchText, emit)) {
|
||||
removeEmits.add(emit);
|
||||
|
||||
final RemoveElementPredicate<Emit> predicate = new RemoveElementPredicate<Emit>() {
|
||||
|
||||
@Override
|
||||
public boolean remove(Emit emit) {
|
||||
return isPartialMatch(searchText, emit);
|
||||
}
|
||||
}
|
||||
|
||||
for (final Emit removeEmit : removeEmits) {
|
||||
collectedEmits.remove(removeEmit);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
ListElementRemoval.removeIf(collectedEmits, predicate);
|
||||
}
|
||||
|
||||
private void removePartialMatchesWhiteSpaceSeparated(final CharSequence searchText, final List<Emit> collectedEmits) {
|
||||
|
||||
51
src/main/java/org/ahocorasick/util/ListElementRemoval.java
Normal file
51
src/main/java/org/ahocorasick/util/ListElementRemoval.java
Normal file
@ -0,0 +1,51 @@
|
||||
package org.ahocorasick.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Helps removes elements from a list in a efficient way
|
||||
*
|
||||
* <p>Removing elements from an ArrayList in a naive way can lead to O(n^3)
|
||||
* running time. If the algorithm first creates a list of all the elements
|
||||
* to remove, then we for each element in this list (assume n elements) we look
|
||||
* for in element in the list (against n elements) and when found we need to remove
|
||||
* the element and move the elements to the right one to the left the size of this
|
||||
* operation is at worst n hence O(n^3).</p>
|
||||
*
|
||||
* <p>This basically avoids that by making a new list and copying over only elements
|
||||
* we want to keep, we then clear the given list and all of the elements this gives us
|
||||
* (for ArrayList) O(n) running time.</p>
|
||||
*
|
||||
* <p>The performance of this has not been thoroughly tested for linked list but
|
||||
* it probably is not too bad.</p>
|
||||
*
|
||||
* <p>This can be completely removed in java 8 as the List#removeIf() method can be used instead
|
||||
* as this already is optimised for each list implementation.
|
||||
*
|
||||
*/
|
||||
public class ListElementRemoval {
|
||||
|
||||
public static interface RemoveElementPredicate<T> {
|
||||
public boolean remove(T t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all elements from the list matching the given predicate.
|
||||
*
|
||||
* @param list
|
||||
* @param predicate
|
||||
*/
|
||||
public static <T> void removeIf(final List<T> list, final RemoveElementPredicate<T> predicate) {
|
||||
final List<T> newList = new ArrayList<>(list.size());
|
||||
|
||||
for(final T element : list) {
|
||||
if (!predicate.remove(element)) {
|
||||
newList.add(element);
|
||||
}
|
||||
}
|
||||
|
||||
list.clear();
|
||||
list.addAll(newList);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,65 @@
|
||||
package org.ahocorasick.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.ahocorasick.util.ListElementRemoval.RemoveElementPredicate;
|
||||
import org.junit.Test;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
public class ListElementRemovalTest {
|
||||
|
||||
@Test
|
||||
public void removeNone() {
|
||||
List<String> list = new ArrayList<>(asList("a", "b", "c"));
|
||||
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
|
||||
|
||||
@Override
|
||||
public boolean remove(String t) {
|
||||
return false;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
ListElementRemoval.removeIf(list, matchNothing);
|
||||
|
||||
Assert.assertEquals(3, list.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void removeAll() {
|
||||
List<String> list = new ArrayList<>(asList("a", "b", "c"));
|
||||
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
|
||||
|
||||
@Override
|
||||
public boolean remove(String t) {
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
ListElementRemoval.removeIf(list, matchNothing);
|
||||
|
||||
Assert.assertEquals(0, list.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void removeSome() {
|
||||
List<String> list = new ArrayList<>(asList("a", "b", "c"));
|
||||
RemoveElementPredicate<String> matchNothing = new RemoveElementPredicate<String>() {
|
||||
|
||||
@Override
|
||||
public boolean remove(String t) {
|
||||
return t.equals("a") || t.endsWith("c");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
ListElementRemoval.removeIf(list, matchNothing);
|
||||
|
||||
Assert.assertEquals(1, list.size());
|
||||
Assert.assertEquals("b", list.get(0));
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user