Fixed table offset bug

This commit is contained in:
deiflaender 2020-12-10 19:33:47 +01:00
parent 03a09860f4
commit 50ec16601c
5 changed files with 34 additions and 19 deletions

View File

@ -1,5 +1,8 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.Iterator;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@ -9,9 +12,9 @@ import lombok.Value;
@Value
public class CellValue {
TextBlock textBlock;
private List<TextBlock> textBlocks;
int rowSpanStart;
private int rowSpanStart;
@Override
@ -19,17 +22,24 @@ public class CellValue {
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : textBlock.getSequences()) {
if (previous != null) {
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
Iterator<TextBlock> itty = textBlocks.iterator();
while (itty.hasNext()) {
TextBlock textBlock = itty.next();
TextPositionSequence previous = null;
for (TextPositionSequence word : textBlock.getSequences()) {
if (previous != null) {
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
if (itty.hasNext()) {
sb.append(' ');
}
sb.append(word.toString());
previous = word;
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())

View File

@ -51,8 +51,7 @@ public class Section {
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName)
.getTextBlock()
.getText()
.toString()
.equals(value);
}
@ -295,7 +294,7 @@ public class Section {
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
entity.setTargetSequences(value.getTextBlock()
entity.setTargetSequences(value.getTextBlocks().get(0)
.getSequences()); // Make sure no other cells with same content are highlighted
entity.setLegalBasis(legalBasis);

View File

@ -126,9 +126,9 @@ public class EntityRedactionService {
.replaceAll("\n", "")
.replaceAll(" ", "")
.replaceAll("-", "");
tabularData.put(headerName, new CellValue(cell.getTextBlocks().get(0), cellStart));
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
});
start = start + cell.toString().length() + 1; // include automatically appended white space
start = start + cell.toString().length() + 1;
for (TextBlock textBlock : cell.getTextBlocks()) {
searchableRow.addAll(textBlock.getSequences());
}

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
@ -41,7 +42,9 @@ public class Cell extends Rectangle {
StringBuilder sb = new StringBuilder();
for (TextBlock textBlock : textBlocks) {
Iterator<TextBlock> itty = textBlocks.iterator();
while (itty.hasNext()) {
TextBlock textBlock = itty.next();
TextPositionSequence previous = null;
for (TextPositionSequence word : textBlock.getSequences()) {
if (previous != null) {
@ -54,6 +57,9 @@ public class Cell extends Rectangle {
sb.append(word.toString());
previous = word;
}
if (itty.hasNext()) {
sb.append(' ');
}
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())

View File

@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
System.out.println("redactionTest");
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder()
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
@ -392,7 +392,7 @@ public class RedactionIntegrationTest {
RedactionResult result = redactionController.redact(request);
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
if(entry.isRecommendation()){
if(entry.isDictionaryEntry()){
System.out.println(entry.getValue());
}
});