diff --git a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/IdRemoval.java b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/IdRemoval.java index bc11c440..2b7b506d 100644 --- a/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/IdRemoval.java +++ b/redaction-service-v1/redaction-service-api-v1/src/main/java/com/iqser/red/service/redaction/v1/model/IdRemoval.java @@ -16,4 +16,4 @@ public class IdRemoval { private Status status; private boolean removeFromDictionary; -} +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java index 6b74cf7d..baa7f11e 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/redaction/service/EntityRedactionServiceTest.java @@ -55,9 +55,11 @@ public class EntityRedactionServiceTest { private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl"); private static final String NAME_CODE = "name"; private static final String ADDRESS_CODE = "address"; + private static final String SPONSOR_CODE = "sponsor"; private static final AtomicLong DICTIONARY_VERSION = new AtomicLong(); private static final AtomicLong RULES_VERSION = new AtomicLong(); + private static final AtomicLong SPONSORS_VERSION = new AtomicLong(); @MockBean private DictionaryClient dictionaryClient; @@ -291,6 +293,39 @@ public class EntityRedactionServiceTest { } + @Test + public void testSponsorInCell() throws IOException { + + String tableRules = "package drools\n" + + "\n" + + "import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" + + "\n" + + "global Section section\n" + "rule \"11: Redact sponsor company\"\n" + " when\n" + " " + + "Section(searchText.toLowerCase().contains(\"batches produced at\"))\n" + " then\n" + " section" + + ".redactIfPrecededBy(\"batches produced at\", \"sponsor\", 11, \"Redacted because it represents a " + + "sponsor company\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " end"; + when(rulesClient.getVersion()).thenReturn(RULES_VERSION.incrementAndGet()); + when(rulesClient.getRules()).thenReturn(new RulesResponse(tableRules)); + droolsExecutionService.updateRules(); + + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf"); + when(dictionaryClient.getVersion()).thenReturn(SPONSORS_VERSION.incrementAndGet()); + DictionaryResponse dictionaryResponse = DictionaryResponse.builder() + .entries(new ArrayList<>(ResourceLoader.load("dictionaries/sponsor_companies.txt"))) + .build(); + when(dictionaryClient.getDictionaryForType(SPONSOR_CODE)).thenReturn(dictionaryResponse); + try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) { + Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument); + entityRedactionService.processDocument(classifiedDoc, null); + assertThat(classifiedDoc.getEntities()).hasSize(1); // one page + assertThat(classifiedDoc.getEntities().get(1).stream() + .filter(entity -> entity.getMatchedRule() == 11) + .count()).isEqualTo(1); + } + + } + + @Test public void headerPropagation() throws IOException { @@ -393,11 +428,22 @@ public class EntityRedactionServiceTest { TypeResponse typeResponse = TypeResponse.builder() .types(Arrays.asList( TypeResult.builder().type(NAME_CODE).hexColor("#ffff00").build(), - TypeResult.builder().type(ADDRESS_CODE).hexColor("#00ffff").build())) + TypeResult.builder().type(ADDRESS_CODE).hexColor("#ff00ff").build(), + TypeResult.builder().type(SPONSOR_CODE).hexColor("#00ffff").build())) .build(); when(dictionaryClient.getVersion()).thenReturn(DICTIONARY_VERSION.incrementAndGet()); when(dictionaryClient.getAllTypes()).thenReturn(typeResponse); + // Default empty return to prevent NPEs + DictionaryResponse dictionaryResponse = DictionaryResponse.builder() + .entries(Collections.emptyList()) + .build(); + when(dictionaryClient.getDictionaryForType(NAME_CODE)).thenReturn(dictionaryResponse); + DictionaryResponse addressResponse = DictionaryResponse.builder() + .entries(Collections.emptyList()) + .build(); + when(dictionaryClient.getDictionaryForType(ADDRESS_CODE)).thenReturn(addressResponse); + Colors colors = new Colors(); colors.setDefaultColor("#acfc00"); colors.setNotRedacted("#cccccc"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/batches_new_line.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/batches_new_line.pdf new file mode 100644 index 00000000..d09e278f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/batches_new_line.pdf differ