diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java index ff704162..e404af3b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/redaction/model/Section.java @@ -998,7 +998,17 @@ public class Section { private void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis, boolean redaction) { - String[] values = StringUtils.substringsBetween(searchText, start, stop); + String[] values = new String[1]; + + if(start.isEmpty() && stop.isEmpty()){ + values[0] = searchText; + } else if(start.isEmpty() && searchText.contains(stop)){ + values[0] = StringUtils.substringBefore(searchText, stop); + } else if (stop.isEmpty() && searchText.contains(start)){ + values[0] = StringUtils.substringAfter(searchText, start); + } else { + values = StringUtils.substringsBetween(searchText, start, stop); + } if (values != null) { for (String value : values) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java index fc3debce..fe31be8b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/RedactionIntegrationTest.java @@ -330,6 +330,36 @@ public class RedactionIntegrationTest { } + + @Test + public void titleExtraction() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf"); + AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); + + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + AnalyzeResult result = analyzeService.analyze(request); + + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder() + .dossierId(TEST_DOSSIER_ID) + .fileId(TEST_FILE_ID) + .build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + } + + + + @Test @Ignore @SneakyThrows @@ -1064,7 +1094,7 @@ public class RedactionIntegrationTest { public void classificationTest() throws IOException { System.out.println("classificationTest"); - ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf"); + ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf"); AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl index d9d1622d..57508d08 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/rules.drl @@ -390,4 +390,21 @@ rule "101: Redact CAS numbers" Section(hasTableHeader("Sample #")) then section.redactCell("Sample #", 8, "PII", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)"); - end \ No newline at end of file + end + +rule "102: Extract title" + when + Section(sectionNumber <= 3 && !text.contains("AUTHOR")) + then + section.redactBetween("", "Final Report", "PII", 5, false, "Title found", "n-a"); + section.redactBetween("", "FINAL REPORT", "PII", 5, false, "Title found", "n-a"); + section.redactBetween("TITLE", "DATA REQUIREMENT", "PII", 5, false, "Title found", "n-a"); + section.redactBetween("TITLE", "DATA REQUIREMENT", "PII", 5, false, "Title found", "n-a"); + end + +rule "102-1: Extract title" + when + Section(sectionNumber <= 3 && text.contains("SPL")) + then + section.redactBetween("Laboratories", "SPL", "PII", 5, false, "Title found", "n-a"); + end \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf new file mode 100644 index 00000000..243b10c8 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/02 - A22833B - Acute Oral (Up and Down) - Final Report.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/02 - A22833B - Acute Oral (Up and Down) - Final Report.pdf new file mode 100644 index 00000000..707a7d43 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/02 - A22833B - Acute Oral (Up and Down) - Final Report.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/03 - Acute Oral Toxicity Up and Down Procedur.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/03 - Acute Oral Toxicity Up and Down Procedur.pdf new file mode 100644 index 00000000..c379a2c2 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/03 - Acute Oral Toxicity Up and Down Procedur.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/04 - CGA80154 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/04 - CGA80154 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf new file mode 100644 index 00000000..84fb9dd6 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/04 - CGA80154 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/05 - CGA80154 Acute Oral Toxicity (Up and Down Procedure) - Rat (2).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/05 - CGA80154 Acute Oral Toxicity (Up and Down Procedure) - Rat (2).pdf new file mode 100644 index 00000000..84fb9dd6 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/05 - CGA80154 Acute Oral Toxicity (Up and Down Procedure) - Rat (2).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf new file mode 100644 index 00000000..1426f5d3 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/06 - Isopyrazam - Acute Oral Toxicity Rat.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/07 - Acute Oral Toxicity in the Rat- Up and D.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/07 - Acute Oral Toxicity in the Rat- Up and D.pdf new file mode 100644 index 00000000..08304c5e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/07 - Acute Oral Toxicity in the Rat- Up and D.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/08 - Acute Oral Toxicity Up and Down Procedur.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/08 - Acute Oral Toxicity Up and Down Procedur.pdf new file mode 100644 index 00000000..c286235b Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/08 - Acute Oral Toxicity Up and Down Procedur.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/09 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/09 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf new file mode 100644 index 00000000..243b10c8 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/09 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/10 - Cyper TC - Acute Oral Toxicity Up and Do.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/10 - Cyper TC - Acute Oral Toxicity Up and Do.pdf new file mode 100644 index 00000000..a722f310 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/10 - Cyper TC - Acute Oral Toxicity Up and Do.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/11 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (1).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/11 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (1).pdf new file mode 100644 index 00000000..c5317f9e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/11 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (1).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/12 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (2).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/12 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (2).pdf new file mode 100644 index 00000000..c5317f9e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/12 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure) (2).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/13 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/13 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf new file mode 100644 index 00000000..c5317f9e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/13 - Glyphosate Technical - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/14 - Paclobutrazol - Acute Oral Up-and-Down Procedure Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/14 - Paclobutrazol - Acute Oral Up-and-Down Procedure Rats.pdf new file mode 100644 index 00000000..9453789d Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/14 - Paclobutrazol - Acute Oral Up-and-Down Procedure Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf new file mode 100644 index 00000000..6eba225e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/15 - Pretilachlor - Acute Oral Toxicity (Up and Down Procedure) - Rat.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/16 - Prevail FT - Acute Oral Toxicity Up and.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/16 - Prevail FT - Acute Oral Toxicity Up and.pdf new file mode 100644 index 00000000..87d8cec7 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/16 - Prevail FT - Acute Oral Toxicity Up and.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/17 - R61837 - OECD summary - Acute oral toxicity study in the rat (up and down procedure) (1).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/17 - R61837 - OECD summary - Acute oral toxicity study in the rat (up and down procedure) (1).pdf new file mode 100644 index 00000000..8f1c2d68 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/17 - R61837 - OECD summary - Acute oral toxicity study in the rat (up and down procedure) (1).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/18 - SYN520453 - OECD summary - Acute Oral Toxicity Study in Rats - Up-and-Down-Procedure.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/18 - SYN520453 - OECD summary - Acute Oral Toxicity Study in Rats - Up-and-Down-Procedure.pdf new file mode 100644 index 00000000..6f40acf0 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/18 - SYN520453 - OECD summary - Acute Oral Toxicity Study in Rats - Up-and-Down-Procedure.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/19 - SYN545192 - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/19 - SYN545192 - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf new file mode 100644 index 00000000..7dc98d53 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/19 - SYN545192 - Acute Oral Toxicity Study in the Rat (Up and Down Procedure).pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/20 - SYN550004 - Acute Oral Toxicity (Up & Down Procedure) - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/20 - SYN550004 - Acute Oral Toxicity (Up & Down Procedure) - Rats.pdf new file mode 100644 index 00000000..919d4966 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/20 - SYN550004 - Acute Oral Toxicity (Up & Down Procedure) - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/21 - CA6572 - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/21 - CA6572 - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..86317ea9 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/21 - CA6572 - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/22 - SYN550023 - Acute Oral - Rat.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/22 - SYN550023 - Acute Oral - Rat.pdf new file mode 100644 index 00000000..7826b7ca Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/22 - SYN550023 - Acute Oral - Rat.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/23 - SYN549888 - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/23 - SYN549888 - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..d696e4b3 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/23 - SYN549888 - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/24 - SYN549522 - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/24 - SYN549522 - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..e0567199 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/24 - SYN549522 - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/25 - SYN546412 - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/25 - SYN546412 - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..8ed5237c Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/25 - SYN546412 - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/26 - Sedaxane - Acute Oral Toxicity - Rat.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/26 - Sedaxane - Acute Oral Toxicity - Rat.pdf new file mode 100644 index 00000000..f3c89232 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/26 - Sedaxane - Acute Oral Toxicity - Rat.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/27 - Profenofos Technical - Acute Oral Toxici.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/27 - Profenofos Technical - Acute Oral Toxici.pdf new file mode 100644 index 00000000..7d580f1c Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/27 - Profenofos Technical - Acute Oral Toxici.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/28 - Emamectin Technical - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/28 - Emamectin Technical - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..ee1dba97 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/28 - Emamectin Technical - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/29 - Abamectin Technical (MK936C) - Acute Ora.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/29 - Abamectin Technical (MK936C) - Acute Ora.pdf new file mode 100644 index 00000000..3810f5ba Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/29 - Abamectin Technical (MK936C) - Acute Ora.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/30 - Dicamba - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/30 - Dicamba - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..9f674b35 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/30 - Dicamba - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/31 - CA6375 - Acute Oral Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/31 - CA6375 - Acute Oral Toxicity - Rats.pdf new file mode 100644 index 00000000..4e578a11 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/31 - CA6375 - Acute Oral Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf new file mode 100644 index 00000000..29ee151e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf differ