RED-3800 String Performance matching test

This commit is contained in:
Timo Bejan 2022-05-24 12:01:10 +03:00
parent c85ce25ed4
commit 21d717f083

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.stringmatching;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.SneakyThrows;
import org.ahocorasick.trie.Trie;
import org.apache.commons.io.IOUtils;
@ -14,6 +15,8 @@ import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
@RunWith(SpringRunner.class)
public class StringMatchingPerformanceTest {
@ -47,7 +50,7 @@ public class StringMatchingPerformanceTest {
} while (startIndex != -1);
}
long t2 = System.currentTimeMillis();
System.out.println("Naive approach found " + naiveIndexes.size() + " entires in " + (t2 - t1) + "ms");
System.out.println("Naive approach found " + naiveIndexes.size() + " entries in " + (t2 - t1) + "ms");
// 2. Boyer Moore
@ -57,7 +60,7 @@ public class StringMatchingPerformanceTest {
boyerMooreIndexes.addAll(pattern.matcher(text).results().map(r -> new Index(r.start(), r.end())).collect(Collectors.toList()));
}
t2 = System.currentTimeMillis();
System.out.println("Boyer Moore found " + boyerMooreIndexes.size() + " entires in " + (t2 - t1) + "ms");
System.out.println("Boyer Moore found " + boyerMooreIndexes.size() + " entries in " + (t2 - t1) + "ms");
// 3. Aho Corasick
@ -65,11 +68,16 @@ public class StringMatchingPerformanceTest {
var result = trie.parseText(text);
var ahoCorasickIndexes = result.stream().map(r -> new Index(r.getStart(), r.getEnd() + 1)).collect(Collectors.toSet());
t2 = System.currentTimeMillis();
System.out.println("Aho Corasick found " + ahoCorasickIndexes.size() + " entires in " + (t2 - t1) + "ms");
System.out.println("Aho Corasick found " + ahoCorasickIndexes.size() + " entries in " + (t2 - t1) + "ms");
// Assert that all algorithms are equal
assertThat(naiveIndexes).isEqualTo(boyerMooreIndexes).isEqualTo(ahoCorasickIndexes);
}
@AllArgsConstructor
@EqualsAndHashCode(of = {"start", "end"})
public static class Index {
int start;
int end;