diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index e0ec1bb8..68e7140b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -44,7 +44,7 @@ public class Section { } - public boolean contains(String type) { + public boolean matchesType(String type) { return entities.stream().anyMatch(entity -> entity.getType().equals(type)); } @@ -83,12 +83,10 @@ public class Section { public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason) { entities.forEach(entity -> { - if (entity.getType().equals(type)) { - if (searchText.indexOf(prefix + entity.getWord()) != 1) { - entity.setRedaction(true); - entity.setMatchedRule(ruleNumber); - entity.setRedactionReason(reason); - } + if (entity.getType().equals(type) && searchText.indexOf(prefix + entity.getWord()) != 1) { + entity.setRedaction(true); + entity.setMatchedRule(ruleNumber); + entity.setRedactionReason(reason); } }); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index 73526fbc..5c942080 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -445,13 +445,35 @@ public class RedactionIntegrationTest { RedactionResult result = redactionController.redact(request); result.getRedactionLog().getRedactionLogEntry().forEach(entry -> { - if(!entry.isHint()){ + if (!entry.isHint()) { assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study"); } }); } + @Test + public void sponsorCompanyTest() throws IOException { + + long start = System.currentTimeMillis(); + ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf"); + + RedactionRequest request = RedactionRequest.builder() + .document(IOUtils.toByteArray(pdfFileResource.getInputStream())) + .build(); + request.setFlatRedaction(false); + + RedactionResult result = redactionController.redact(request); + + try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) { + fileOutputStream.write(result.getDocument()); + } + long end = System.currentTimeMillis(); + + System.out.println("duration: " + (end - start)); + System.out.println("numberOfPages: " + result.getNumberOfPages()); + } + private static String loadFromClassPath(String path) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt index 34387b57..0a4416d4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/dictionaries/must_redact.txt @@ -1,3 +1,2 @@ -Batches Produced at CTL determination of residues \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index 2c31a62b..fc0cc47f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -7,7 +7,7 @@ global Section section rule "1: Redacted because Section contains Vertebrate" when - eval(section.contains("vertebrate")==true); + Section(matchesType("vertebrate")) then section.redact("name", 1, "Redacted because Section contains Vertebrate"); section.redact("address", 1, "Redacted because Section contains Vertebrate"); @@ -16,7 +16,7 @@ rule "1: Redacted because Section contains Vertebrate" rule "2: Not Redacted because Section contains no Vertebrate" when - eval(section.contains("vertebrate")==false); + Section(matchesType("vertebrate")) then section.redactNot("name", 2, "Not Redacted because Section contains no Vertebrate"); section.redactNot("address", 2, "Not Redacted because Section contains no Vertebrate"); @@ -25,7 +25,7 @@ rule "2: Not Redacted because Section contains no Vertebrate" rule "3: Do not redact Names and Addresses if no redaction Indicator is contained" when - eval(section.contains("vertebrate")==true && section.contains("no_redaction_indicator")==true); + Section(matchesType("vertebrate"), matchesType("no_redaction_indicator")) then section.redactNot("name", 3, "Vertebrate was found, but also a no redaction indicator"); section.redactNot("address", 3, "Vertebrate was found, but also a no redaction indicator"); @@ -34,7 +34,7 @@ rule "3: Do not redact Names and Addresses if no redaction Indicator is containe rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indicator is contained" when - eval(section.contains("vertebrate")==true && section.contains("no_redaction_indicator")==true && section.contains("redaction_indicator")==true); + Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"), matchesType("redaction_indicator")) then section.redact("name", 4, "Vertebrate was found and no_redaction_indicator and redaction_indicator"); section.redact("address", 4, "Vertebrate was found and no_redaction_indicator and redaction_indicator"); @@ -43,7 +43,7 @@ rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indi rule "5: Do not redact in guideline sections" when - eval(section.headlineContainsWord("guideline") || section.headlineContainsWord("Guidance")); + Section(headlineContainsWord("guideline") || headlineContainsWord("Guidance")) then section.redactNot("name", 5, "Section is a guideline section."); section.redactNot("address", 5, "Section is a guideline section."); @@ -52,7 +52,7 @@ rule "5: Do not redact in guideline sections" rule "6: Redact contact information if applicant is found" when - eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact")); + Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact")) then section.redactLineAfter("Name:", "address", 6, "Applicant information was found"); section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found"); @@ -79,7 +79,7 @@ rule "6: Redact contact information if applicant is found" rule "7: Redact contact information if Producer is found" when - eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance")); + Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance")) then section.redactLineAfter("Name:", "address", 7, "Producer was found"); section.redactBetween("Address:", "Contact", "address", 7, "Producer was found"); @@ -110,7 +110,7 @@ rule "8: Not redacted because Vertebrate Study = N" rule "9: Redact if must redact entry is found" when - eval(section.contains("must_redact")==true); + Section(matchesType("must_redact")) then section.redact("name", 9, "must_redact entry was found."); section.redact("address", 9, "must_redact entry was found."); @@ -128,7 +128,7 @@ rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate rule "11: Redact sponsor company" when - Section(section.getText().toLowerCase().contains("batches produced at")) + Section(text.toLowerCase().contains("batches produced at")) then section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company"); end \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/sponsor_companies.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/sponsor_companies.pdf new file mode 100644 index 00000000..45b9f39a Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Minimal Examples/sponsor_companies.pdf differ