Fixed table offset bug
This commit is contained in:
parent
03a09860f4
commit
50ec16601c
@ -1,5 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
@ -9,9 +12,9 @@ import lombok.Value;
|
||||
@Value
|
||||
public class CellValue {
|
||||
|
||||
TextBlock textBlock;
|
||||
private List<TextBlock> textBlocks;
|
||||
|
||||
int rowSpanStart;
|
||||
private int rowSpanStart;
|
||||
|
||||
|
||||
@Override
|
||||
@ -19,17 +22,24 @@ public class CellValue {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
Iterator<TextBlock> itty = textBlocks.iterator();
|
||||
while (itty.hasNext()) {
|
||||
TextBlock textBlock = itty.next();
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
if (itty.hasNext()) {
|
||||
sb.append(' ');
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
|
||||
@ -51,8 +51,7 @@ public class Section {
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName)
|
||||
.getTextBlock()
|
||||
.getText()
|
||||
.toString()
|
||||
.equals(value);
|
||||
}
|
||||
|
||||
@ -295,7 +294,7 @@ public class Section {
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
entity.setTargetSequences(value.getTextBlock()
|
||||
entity.setTargetSequences(value.getTextBlocks().get(0)
|
||||
.getSequences()); // Make sure no other cells with same content are highlighted
|
||||
entity.setLegalBasis(legalBasis);
|
||||
|
||||
|
||||
@ -126,9 +126,9 @@ public class EntityRedactionService {
|
||||
.replaceAll("\n", "")
|
||||
.replaceAll(" ", "")
|
||||
.replaceAll("-", "");
|
||||
tabularData.put(headerName, new CellValue(cell.getTextBlocks().get(0), cellStart));
|
||||
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
|
||||
});
|
||||
start = start + cell.toString().length() + 1; // include automatically appended white space
|
||||
start = start + cell.toString().length() + 1;
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
@ -41,7 +42,9 @@ public class Cell extends Rectangle {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (TextBlock textBlock : textBlocks) {
|
||||
Iterator<TextBlock> itty = textBlocks.iterator();
|
||||
while (itty.hasNext()) {
|
||||
TextBlock textBlock = itty.next();
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
@ -54,6 +57,9 @@ public class Cell extends Rectangle {
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
if (itty.hasNext()) {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
|
||||
@ -382,7 +382,7 @@ public class RedactionIntegrationTest {
|
||||
|
||||
System.out.println("redactionTest");
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
@ -392,7 +392,7 @@ public class RedactionIntegrationTest {
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
if(entry.isRecommendation()){
|
||||
if(entry.isDictionaryEntry()){
|
||||
System.out.println(entry.getValue());
|
||||
}
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user