diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index 736c98bc..d2e8a8cb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -275,6 +275,27 @@ public class EntityRedactionServiceTest { assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8); assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(4); } + + pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf"); + + dictionaryResponse = DictionaryResponse.builder() + .entries(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")) + .build(); + + when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet()); + when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse); + addressResponse = DictionaryResponse.builder() + .entries(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")) + .build(); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse); + + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { + Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); + entityRedactionService.processDocument(classifiedDoc, null); + assertThat(classifiedDoc.getEntities()).hasSize(1); // one page + assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3); + assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(8); + } } @@ -312,7 +333,7 @@ public class EntityRedactionServiceTest { "global Section section\n" + "rule \"8: Not redacted because Vertebrate Study = N\"\n" + " when\n" + - " Section(rowEquals(\"Vertebrate study Y/N\", \"N\"))\n" + + " Section(rowEquals(\"Vertebrate study Y/N\", \"N\") || rowEquals(\"Vertebrate study Y/N\", \"No\"))\n" + " then\n" + " section.redactNot(\"name\", 8, \"Not redacted because row is not a vertebrate study\");\n" + " section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" + @@ -320,7 +341,8 @@ public class EntityRedactionServiceTest { " end\n" + "rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" + " when\n" + - " Section(rowEquals(\"Vertebrate study Y/N\", \"Y\"))\n" + + " Section(rowEquals(\"Vertebrate study Y/N\", \"Y\") || rowEquals(\"Vertebrate study Y/N\", " + + "\"Yes\"))\n" + " then\n" + " section.redact(\"name\", 9, \"Redacted because row is a vertebrate study\");\n" + " section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\");\n" + diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 7fd18d3d..2e9cab7b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -99,7 +99,7 @@ rule "7: Redact contact information if Producer is found" rule "8: Not redacted because Vertebrate Study = N" when - Section(rowEquals("Vertebrate study Y/N", "N")) + Section(rowEquals("Vertebrate study Y/N", "N") || rowEquals("Vertebrate study Y/N", "No")) then section.redactNot("name", 8, "Not redacted because row is not a vertebrate study"); section.redactNot("address", 8, "Not redacted because row is not a vertebrate study"); @@ -118,7 +118,7 @@ rule "9: Redact if must redact entry is found" rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate study" when - Section(rowEquals("Vertebrate study Y/N", "Y")) + Section(rowEquals("Vertebrate study Y/N", "Y") || rowEquals("Vertebrate study Y/N", "Yes")) then section.redact("name", 10, "Redacted because row is a vertebrate study"); section.redact("address", 10, "Redacted because row is a vertebrate study"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Header Propagation2.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Header Propagation2.pdf new file mode 100644 index 00000000..f53bdce6 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/Header Propagation2.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/US Syngenta.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/US Syngenta.pdf new file mode 100644 index 00000000..669efb48 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/US Syngenta.pdf differ