RED-9149 - Header and footer extraction by page-association
This commit is contained in:
parent
fda25852d1
commit
f1dbcc24a2
@ -58,7 +58,7 @@ public class HeaderFooterDetection {
|
||||
|
||||
|
||||
private boolean detectHeadersOrFootersByPageAssociation(String testString, List<List<AbstractPageBlock>> candidates, int window, double[] weights) {
|
||||
|
||||
|
||||
double highestScore = 0.0;
|
||||
|
||||
for (int i = 0; i < candidates.size(); i++) {
|
||||
@ -80,7 +80,7 @@ public class HeaderFooterDetection {
|
||||
}
|
||||
}
|
||||
|
||||
// Compare the testString against each candidates in the window
|
||||
// Compare the testString against each candidate in the window
|
||||
for (int j = 0; j < maxLen; j++) {
|
||||
double score = 0.0;
|
||||
try {
|
||||
@ -103,18 +103,18 @@ public class HeaderFooterDetection {
|
||||
}
|
||||
|
||||
|
||||
private double compare(String a, String b) {
|
||||
private double compare(String candidate1, String candidate2) {
|
||||
|
||||
int count = 0;
|
||||
a = a.replaceAll("\\d", "@");
|
||||
b = b.replaceAll("\\d", "@");
|
||||
candidate1 = candidate1.replaceAll("\\d", "@");
|
||||
candidate2 = candidate2.replaceAll("\\d", "@");
|
||||
|
||||
for (int i = 0; i < Math.min(a.length(), b.length()); i++) {
|
||||
if (a.charAt(i) == b.charAt(i)) {
|
||||
for (int i = 0; i < Math.min(candidate1.length(), candidate2.length()); i++) {
|
||||
if (candidate1.charAt(i) == candidate2.charAt(i)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return (double) count / Math.max(a.length(), b.length());
|
||||
return (double) count / Math.max(candidate1.length(), candidate2.length());
|
||||
}
|
||||
|
||||
|
||||
@ -139,7 +139,7 @@ public class HeaderFooterDetection {
|
||||
|
||||
for (int i = start; i <= end; i++) {
|
||||
if (i != currentPageIndex) {
|
||||
nearestPages.add(pagesCache.computeIfAbsent(i, idx -> allPages.get(idx)));
|
||||
nearestPages.add(pagesCache.computeIfAbsent(i, allPages::get));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user