Remove entries from must_redact dict, add test and refactor rules
This commit is contained in:
parent
cec5fd3d5e
commit
6a8f366519
@ -44,7 +44,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(String type) {
|
||||
public boolean matchesType(String type) {
|
||||
|
||||
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
|
||||
}
|
||||
@ -83,12 +83,10 @@ public class Section {
|
||||
public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason) {
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type)) {
|
||||
if (searchText.indexOf(prefix + entity.getWord()) != 1) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
}
|
||||
if (entity.getType().equals(type) && searchText.indexOf(prefix + entity.getWord()) != 1) {
|
||||
entity.setRedaction(true);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -445,13 +445,35 @@ public class RedactionIntegrationTest {
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
result.getRedactionLog().getRedactionLogEntry().forEach(entry -> {
|
||||
if(!entry.isHint()){
|
||||
if (!entry.isHint()) {
|
||||
assertThat(entry.getReason()).isEqualTo("Not redacted because row is not a vertebrate study");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sponsorCompanyTest() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder()
|
||||
.document(IOUtils.toByteArray(pdfFileResource.getInputStream()))
|
||||
.build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
private static String loadFromClassPath(String path) {
|
||||
|
||||
|
||||
@ -1,3 +1,2 @@
|
||||
Batches Produced at
|
||||
CTL
|
||||
determination of residues
|
||||
@ -7,7 +7,7 @@ global Section section
|
||||
|
||||
rule "1: Redacted because Section contains Vertebrate"
|
||||
when
|
||||
eval(section.contains("vertebrate")==true);
|
||||
Section(matchesType("vertebrate"))
|
||||
then
|
||||
section.redact("name", 1, "Redacted because Section contains Vertebrate");
|
||||
section.redact("address", 1, "Redacted because Section contains Vertebrate");
|
||||
@ -16,7 +16,7 @@ rule "1: Redacted because Section contains Vertebrate"
|
||||
|
||||
rule "2: Not Redacted because Section contains no Vertebrate"
|
||||
when
|
||||
eval(section.contains("vertebrate")==false);
|
||||
Section(matchesType("vertebrate"))
|
||||
then
|
||||
section.redactNot("name", 2, "Not Redacted because Section contains no Vertebrate");
|
||||
section.redactNot("address", 2, "Not Redacted because Section contains no Vertebrate");
|
||||
@ -25,7 +25,7 @@ rule "2: Not Redacted because Section contains no Vertebrate"
|
||||
|
||||
rule "3: Do not redact Names and Addresses if no redaction Indicator is contained"
|
||||
when
|
||||
eval(section.contains("vertebrate")==true && section.contains("no_redaction_indicator")==true);
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"))
|
||||
then
|
||||
section.redactNot("name", 3, "Vertebrate was found, but also a no redaction indicator");
|
||||
section.redactNot("address", 3, "Vertebrate was found, but also a no redaction indicator");
|
||||
@ -34,7 +34,7 @@ rule "3: Do not redact Names and Addresses if no redaction Indicator is containe
|
||||
|
||||
rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indicator is contained"
|
||||
when
|
||||
eval(section.contains("vertebrate")==true && section.contains("no_redaction_indicator")==true && section.contains("redaction_indicator")==true);
|
||||
Section(matchesType("vertebrate"), matchesType("no_redaction_indicator"), matchesType("redaction_indicator"))
|
||||
then
|
||||
section.redact("name", 4, "Vertebrate was found and no_redaction_indicator and redaction_indicator");
|
||||
section.redact("address", 4, "Vertebrate was found and no_redaction_indicator and redaction_indicator");
|
||||
@ -43,7 +43,7 @@ rule "4: Redact Names and Addresses if no_redaction_indicator and redaction_indi
|
||||
|
||||
rule "5: Do not redact in guideline sections"
|
||||
when
|
||||
eval(section.headlineContainsWord("guideline") || section.headlineContainsWord("Guidance"));
|
||||
Section(headlineContainsWord("guideline") || headlineContainsWord("Guidance"))
|
||||
then
|
||||
section.redactNot("name", 5, "Section is a guideline section.");
|
||||
section.redactNot("address", 5, "Section is a guideline section.");
|
||||
@ -52,7 +52,7 @@ rule "5: Do not redact in guideline sections"
|
||||
|
||||
rule "6: Redact contact information if applicant is found"
|
||||
when
|
||||
eval(section.headlineContainsWord("applicant") || section.getText().contains("Applicant") || section.headlineContainsWord("Primary contact") || section.headlineContainsWord("Alternative contact"));
|
||||
Section(headlineContainsWord("applicant") || text.contains("Applicant") || headlineContainsWord("Primary contact") || headlineContainsWord("Alternative contact"))
|
||||
then
|
||||
section.redactLineAfter("Name:", "address", 6, "Applicant information was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 6, "Applicant information was found");
|
||||
@ -79,7 +79,7 @@ rule "6: Redact contact information if applicant is found"
|
||||
|
||||
rule "7: Redact contact information if Producer is found"
|
||||
when
|
||||
eval(section.getText().toLowerCase().contains("producer of the plant protection") || section.getText().toLowerCase().contains("producer of the active substance") || section.getText().contains("Manufacturer of the active substance") || section.getText().contains("Manufacturer:") || section.getText().contains("Producer or producers of the active substance"));
|
||||
Section(text.toLowerCase().contains("producer of the plant protection") || text.toLowerCase().contains("producer of the active substance") || text.contains("Manufacturer of the active substance") || text.contains("Manufacturer:") || text.contains("Producer or producers of the active substance"))
|
||||
then
|
||||
section.redactLineAfter("Name:", "address", 7, "Producer was found");
|
||||
section.redactBetween("Address:", "Contact", "address", 7, "Producer was found");
|
||||
@ -110,7 +110,7 @@ rule "8: Not redacted because Vertebrate Study = N"
|
||||
|
||||
rule "9: Redact if must redact entry is found"
|
||||
when
|
||||
eval(section.contains("must_redact")==true);
|
||||
Section(matchesType("must_redact"))
|
||||
then
|
||||
section.redact("name", 9, "must_redact entry was found.");
|
||||
section.redact("address", 9, "must_redact entry was found.");
|
||||
@ -128,7 +128,7 @@ rule "10: Redact Authors and Addresses in Reference Table if it is a Vertebrate
|
||||
|
||||
rule "11: Redact sponsor company"
|
||||
when
|
||||
Section(section.getText().toLowerCase().contains("batches produced at"))
|
||||
Section(text.toLowerCase().contains("batches produced at"))
|
||||
then
|
||||
section.redactIfPrecededBy("batches produced at", "sponsor", 11, "Redacted because it represents a sponsor company");
|
||||
end
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user