RED-9149 - Header and footer extraction by page-association

This commit is contained in:
Andrei Isvoran 2024-05-10 15:49:08 +03:00
parent fda25852d1
commit f1dbcc24a2

View File

@ -58,7 +58,7 @@ public class HeaderFooterDetection {
private boolean detectHeadersOrFootersByPageAssociation(String testString, List<List<AbstractPageBlock>> candidates, int window, double[] weights) {
double highestScore = 0.0;
for (int i = 0; i < candidates.size(); i++) {
@ -80,7 +80,7 @@ public class HeaderFooterDetection {
}
}
// Compare the testString against each candidates in the window
// Compare the testString against each candidate in the window
for (int j = 0; j < maxLen; j++) {
double score = 0.0;
try {
@ -103,18 +103,18 @@ public class HeaderFooterDetection {
}
private double compare(String a, String b) {
private double compare(String candidate1, String candidate2) {
int count = 0;
a = a.replaceAll("\\d", "@");
b = b.replaceAll("\\d", "@");
candidate1 = candidate1.replaceAll("\\d", "@");
candidate2 = candidate2.replaceAll("\\d", "@");
for (int i = 0; i < Math.min(a.length(), b.length()); i++) {
if (a.charAt(i) == b.charAt(i)) {
for (int i = 0; i < Math.min(candidate1.length(), candidate2.length()); i++) {
if (candidate1.charAt(i) == candidate2.charAt(i)) {
count++;
}
}
return (double) count / Math.max(a.length(), b.length());
return (double) count / Math.max(candidate1.length(), candidate2.length());
}
@ -139,7 +139,7 @@ public class HeaderFooterDetection {
for (int i = start; i <= end; i++) {
if (i != currentPageIndex) {
nearestPages.add(pagesCache.computeIfAbsent(i, idx -> allPages.get(idx)));
nearestPages.add(pagesCache.computeIfAbsent(i, allPages::get));
}
}