diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index a63343d7..dd6f9419 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -7,6 +7,7 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import lombok.Builder; @@ -100,7 +101,7 @@ public class Section { if (values != null) { for (String value : values) { - if (value != null && StringUtils.isNotBlank(value)) { + if (StringUtils.isNotBlank(value)) { Set found = findEntities(value.trim(), asType); entities.addAll(found); } @@ -160,14 +161,22 @@ public class Section { } - public void highlightCell(String reason) { + public void highlightCell(String cellHeader, int ruleNumber) { - String value = tabularData.get(reason); + String value = tabularData.get(cellHeader); if (value == null) { - log.warn("Could not find any data for {}.", reason); + log.warn("Could not find any data for {}.", cellHeader); } else { - Entity entity = findEntities(value, "some type").iterator().next(); - entity.setRedaction(false); + Set found = findEntities(value, "must_redact"); + if (CollectionUtils.isEmpty(found)) { + log.warn("Could not identify value {} in row.", value); + } else { + Entity entity = found.iterator().next(); + entity.setRedaction(false); + entity.setMatchedRule(ruleNumber); + entity.setRedactionReason(cellHeader); + entities.add(entity); + } } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index bc067897..2b933aff 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -117,16 +117,16 @@ public class EntityRedactionServiceTest { "import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" + "\n" + "global Section section\n" + - "rule \"8: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" + + "rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" + " when\n" + " Section(tabularData != null && tabularData.size() > 0\n" + " && tabularData.containsKey(\"Vertebrate\\nstudy Y/N\")\n" + " && tabularData.get(\"Vertebrate\\nstudy Y/N\").equals(\"Y\")\n" + " )\n" + " then\n" + - " section.redact(\"name\", 8, \"Redacted because row is a vertebrate study\");\n" + - " section.redact(\"address\", 8, \"Redacted because rows is a vertebrate study\");\n" + - " section.highlightCell(\"Vertebrate\\nstudy Y/N\");\n" + + " section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" + + " section.redact(\"address\", 9, \"Redacted because rows is a vertebrate study\");\n" + + " section.highlightCell(\"Vertebrate\\nstudy Y/N\", 9);\n" + " end"; when(rulesClient.getVersion()).thenReturn(1L); when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules)); @@ -149,7 +149,7 @@ public class EntityRedactionServiceTest { Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); entityRedactionService.processDocument(classifiedDoc); assertThat(classifiedDoc.getEntities()).hasSize(1); // one page - assertThat(classifiedDoc.getEntities().get(1)).hasSize(4); // 4 out of 5 entities recognized on page 1 + assertThat(classifiedDoc.getEntities().get(1)).hasSize(5); // 4 out of 5 entities recognized on page 1 } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index bb5f051c..0a887fbe 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -106,7 +106,7 @@ rule "9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate && tabularData.get("Vertebrate\nstudy Y/N").equals("Y") ) then - section.redact("name", 8, "Redacted because row is a vertebrate study"); - section.redact("address", 8, "Redacted because rows is a vertebrate study"); - section.highlightCell("Vertebrate\nstudy Y/N"); + section.redact("name", 9, "Redacted because row is a vertebrate study"); + section.redact("address", 9, "Redacted because rows is a vertebrate study"); + section.highlightCell("Vertebrate\nstudy Y/N", 9); end \ No newline at end of file