diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java index 7855a750..c566bbb5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/CellValue.java @@ -1,5 +1,8 @@ package com.iqser.red.service.redaction.v1.server.redaction.model; +import java.util.Iterator; +import java.util.List; + import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence; import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities; @@ -9,9 +12,9 @@ import lombok.Value; @Value public class CellValue { - TextBlock textBlock; + private List textBlocks; - int rowSpanStart; + private int rowSpanStart; @Override @@ -19,17 +22,24 @@ public class CellValue { StringBuilder sb = new StringBuilder(); - TextPositionSequence previous = null; - for (TextPositionSequence word : textBlock.getSequences()) { - if (previous != null) { - if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) { - sb.append('\n'); - } else { - sb.append(' '); + Iterator itty = textBlocks.iterator(); + while (itty.hasNext()) { + TextBlock textBlock = itty.next(); + TextPositionSequence previous = null; + for (TextPositionSequence word : textBlock.getSequences()) { + if (previous != null) { + if (Math.abs(previous.getRotationAdjustedY() - word.getRotationAdjustedY()) > word.getTextHeight()) { + sb.append('\n'); + } else { + sb.append(' '); + } } + sb.append(word.toString()); + previous = word; + } + if (itty.hasNext()) { + sb.append(' '); } - sb.append(word.toString()); - previous = word; } return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index f70766b0..aa94bda4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -51,8 +51,7 @@ public class Section { String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", ""); return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName) - .getTextBlock() - .getText() + .toString() .equals(value); } @@ -295,7 +294,7 @@ public class Section { entity.setRedaction(redact); entity.setMatchedRule(ruleNumber); entity.setRedactionReason(reason); - entity.setTargetSequences(value.getTextBlock() + entity.setTargetSequences(value.getTextBlocks().get(0) .getSequences()); // Make sure no other cells with same content are highlighted entity.setLegalBasis(legalBasis); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java index aedaf96f..b9bc4e8e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionService.java @@ -126,9 +126,9 @@ public class EntityRedactionService { .replaceAll("\n", "") .replaceAll(" ", "") .replaceAll("-", ""); - tabularData.put(headerName, new CellValue(cell.getTextBlocks().get(0), cellStart)); + tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart)); }); - start = start + cell.toString().length() + 1; // include automatically appended white space + start = start + cell.toString().length() + 1; for (TextBlock textBlock : cell.getTextBlocks()) { searchableRow.addAll(textBlock.getSequences()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java index d5efa982..aaecbc20 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/tableextraction/model/Cell.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model; import java.awt.geom.Point2D; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock; @@ -41,7 +42,9 @@ public class Cell extends Rectangle { StringBuilder sb = new StringBuilder(); - for (TextBlock textBlock : textBlocks) { + Iterator itty = textBlocks.iterator(); + while (itty.hasNext()) { + TextBlock textBlock = itty.next(); TextPositionSequence previous = null; for (TextPositionSequence word : textBlock.getSequences()) { if (previous != null) { @@ -54,6 +57,9 @@ public class Cell extends Rectangle { sb.append(word.toString()); previous = word; } + if (itty.hasNext()) { + sb.append(' '); + } } return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index f9fd5135..a10e8ae1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -382,7 +382,7 @@ public class RedactionIntegrationTest { System.out.println("redactionTest"); long start = System.currentTimeMillis(); - ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/52 Fludioxonil_RAR_07_Volume_3CA_B-5_2018-02-21.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf"); RedactionRequest request = RedactionRequest.builder() .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) @@ -392,7 +392,7 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.redact(request); result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { - if(entry.isRecommendation()){ + if(entry.isDictionaryEntry()){ System.out.println(entry.getValue()); } });