RSS-42: Made redactBetween rule more generic
This commit is contained in:
parent
9867cd6848
commit
c29c5eef0d
@ -998,7 +998,17 @@ public class Section {
|
||||
private void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchText, start, stop);
|
||||
String[] values = new String[1];
|
||||
|
||||
if(start.isEmpty() && stop.isEmpty()){
|
||||
values[0] = searchText;
|
||||
} else if(start.isEmpty() && searchText.contains(stop)){
|
||||
values[0] = StringUtils.substringBefore(searchText, stop);
|
||||
} else if (stop.isEmpty() && searchText.contains(start)){
|
||||
values[0] = StringUtils.substringAfter(searchText, start);
|
||||
} else {
|
||||
values = StringUtils.substringsBetween(searchText, start, stop);
|
||||
}
|
||||
|
||||
if (values != null) {
|
||||
for (String value : values) {
|
||||
|
||||
@ -330,6 +330,36 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void titleExtraction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/32 - Emamectin Benzoate Technical - Acute Oral Toxicity - Mouse.pdf");
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
String outputFileName = OsUtils.getTemporaryDirectory() + "/Annotated.pdf";
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
@SneakyThrows
|
||||
@ -1064,7 +1094,7 @@ public class RedactionIntegrationTest {
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
System.out.println("classificationTest");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/RSS/01 - CGA100251 - Acute Oral Toxicity (Up and Down Procedure) - Rat (1).pdf");
|
||||
|
||||
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
|
||||
|
||||
|
||||
@ -390,4 +390,21 @@ rule "101: Redact CAS numbers"
|
||||
Section(hasTableHeader("Sample #"))
|
||||
then
|
||||
section.redactCell("Sample #", 8, "PII", true, "Redacted because row is a vertebrate study", "Reg (EC) No 1107/2009 Art. 63 (2g)");
|
||||
end
|
||||
end
|
||||
|
||||
rule "102: Extract title"
|
||||
when
|
||||
Section(sectionNumber <= 3 && !text.contains("AUTHOR"))
|
||||
then
|
||||
section.redactBetween("", "Final Report", "PII", 5, false, "Title found", "n-a");
|
||||
section.redactBetween("", "FINAL REPORT", "PII", 5, false, "Title found", "n-a");
|
||||
section.redactBetween("TITLE", "DATA REQUIREMENT", "PII", 5, false, "Title found", "n-a");
|
||||
section.redactBetween("TITLE", "DATA REQUIREMENT", "PII", 5, false, "Title found", "n-a");
|
||||
end
|
||||
|
||||
rule "102-1: Extract title"
|
||||
when
|
||||
Section(sectionNumber <= 3 && text.contains("SPL"))
|
||||
then
|
||||
section.redactBetween("Laboratories", "SPL", "PII", 5, false, "Title found", "n-a");
|
||||
end
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user