diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml index e91526ed..4917363b 100644 --- a/redaction-service-v1/redaction-service-api-v1/pom.xml +++ b/redaction-service-v1/redaction-service-api-v1/pom.xml @@ -12,7 +12,7 @@ redaction-service-api-v1 - 2.79.0 + 2.84.0 diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java new file mode 100644 index 00000000..66a80836 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -0,0 +1,110 @@ +package com.iqser.red.service.redaction.v1.server; + +import static org.mockito.Mockito.when; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.List; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.FilterType; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest; +import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult; +import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; +import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest; +import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse; +import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext; +import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(DocumineFloraTest.RedactionIntegrationTestConfiguration.class) +public class DocumineFloraTest extends AbstractRedactionIntegrationTest { + + private static final String RULES = loadFromClassPath("drools/documine_flora.drl"); + + + @Test + public void titleExtraction() throws IOException { + + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf"); + System.out.println("Start Full integration test"); + analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); + System.out.println("Finished structure analysis"); + AnalyzeResult result = analyzeService.analyze(request); + System.out.println("Finished analysis"); + var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID); + + AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build()); + + String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf"; + + try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) { + fileOutputStream.write(annotateResponse.getDocument()); + } + + } + + + @Configuration + @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class}) + @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)}) + static class RedactionIntegrationTestConfiguration { + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + + + @BeforeEach + public void stubClients() { + + TenantContext.setTenantId("redaction"); + + when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES)); + + loadTypeForTest(); + loadNerForTest(); + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse()); + + when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L); + when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder() + .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID) + .type(DOSSIER_REDACTIONS_INDICATOR) + .dossierTemplateId(TEST_DOSSIER_ID) + .hexColor("#ffe187") + .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR)) + .build())); + + mockDictionaryCalls(null); + + when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors); + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl new file mode 100644 index 00000000..ef0d3980 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -0,0 +1,1364 @@ +package drools + +import static java.lang.String.format; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch; +import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch; + +import java.util.List; +import java.util.LinkedList; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.Collection; +import java.util.stream.Stream; +import java.util.Optional; + +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType; +import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary; +import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService; +import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter; +import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities; +import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility + +global Document document +global EntityCreationService entityCreationService +global ManualRedactionApplicationService manualRedactionApplicationService +global NerEntitiesAdapter nerEntitiesAdapter +global Dictionary dictionary + +//------------------------------------ queries ------------------------------------ + +query "getFileAttributes" + $fileAttribute: FileAttribute() + end + +//--------------------------------------------------------------------------- + + +// Rule unit: MAN.0 +rule "H.0.0: Show headlines" + when + $headline: Headline() + then +// entityCreationService.bySemanticNode($headline, "headline", EntityType.RECOMMENDATION); + end + + +rule "H.0.0: Study Type File Attribute" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section( + (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):")) + ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS")) + ) + then + Stream.of(RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()), + RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()), + RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(boundary -> $section.getTextBlock().subSequence(boundary).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + + + + +rule "1: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + then + Stream.of(entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found")), + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found")), + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EPA Guideline found")), + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EC Guideline found")), + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EC Guideline found")), + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")), + // Examples found in PoC 1 + //entityCreationService.byRegex("((OECD Guidelines for Testing of Chemicals, Procedure)|(OECD Guidelines for the Testing of Chemicals No\\.)|(OECD Test Guideline)|(OECD \\[Test Guideline, Number)) \\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")), + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EPA Guideline found")), + // new approach OECD Guideline + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline no. found")), + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")), + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")), + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline number found")), + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")), + // missing OECD guideline rules for RFP demo file + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline number found")), + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found"))) + .flatMap(a -> a) + .forEach(e -> { + e.addMatchedRule("1"); + e.setRedaction(true); + e.setLegalBasis("n-a"); + }); + end + + +rule "1b: Guidelines" + when + $section: Section( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + && ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline","ec_guideline", "epa_guideline")) + .forEach(entity -> { + entity.setRedactionReason("OECD guideline found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("1"); + }); + end + + +rule "1c: Guidelines" + when + $section: Section( + ( + hasEntitiesOfType("oecd_guideline") + || hasEntitiesOfType("epa_guideline") + || hasEntitiesOfType("ec_guideline") + ) + && !( + ( + containsString("DATA REQUIREMENT") + || containsString("TEST GUIDELINE") + || containsString("MÉTODO(S) DE REFERÊNCIA(S):") + ) + && ( + containsString("OECD") + || containsString("EPA") + || containsString("OPPTS") + ) + ) + ) + then + $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + + + +rule "2: Report number" + when + $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:")) + then + entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.addMatchedRule("2"); + entity.setRedaction(true); + entity.setRedactionReason("Report number found"); + entity.setLegalBasis("n-a"); + }); + end + + + + +rule "3: Experimental Starting Date" + when + $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental start date", + "Experimental start date:", + "Experimental Starting Date", + "Experimental Starting Date:", + "Experimental starting date", + "Experimental starting date:", + "Experimental Start Date", + "Experimental Start Date:", + "Experimental I. Starting Date:", + "Experimental II. Starting Date:"), + "experimental_start_date", EntityType.ENTITY, $section) + .forEach(entity -> { + entity.addMatchedRule("3"); + entity.setRedaction(true); + entity.setRedactionReason("Experimental start date found"); + entity.setLegalBasis("n-a"); + }); + end + + +rule "3: Experimental Completion Date" + when + $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date")) + then + entityCreationService.lineAfterStrings( + List.of("Experimental termination date", + "Experimental termination date:", + "Experimental Completion Date", + "Experimental Completion Date:", + "Experimental completion date", + "Experimental completion date:", + "Experimental Termination Date", + "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section) + .forEach(entity -> { + entity.addMatchedRule("3"); + entity.setRedaction(true); + entity.setRedactionReason("Experimental end date found"); + entity.setLegalBasis("n-a"); + }); + end + + + // ignore species and strain in irrelevant study types + rule "4a: Species" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + + // hide all skipped species and strains except in the relevant sections + rule "4b: Species" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + (hasEntitiesOfType("species") || hasEntitiesOfType("strain")) + && !( + getHeadline().containsString("test system") + || getHeadline().containsString("animals") + || getHeadline().containsString("specification") + ) + ) + then + $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + +// redact the relevant species +rule "4c: Species" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + hasEntitiesOfType("species") + ) + then + $section.getEntitiesOfType("species") + .forEach(entity -> { + entity.setRedactionReason("Species found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("25"); + }); + end + + +// redact the strain +rule "5: Strain" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436")) + $section: Section( + hasEntitiesOfType("species") + && hasEntitiesOfType("strain") + && ( + getHeadline().containsString("test system") + || getHeadline().containsString("animals") + || getHeadline().containsString("specification") + ) + ) + then + $section.getEntitiesOfType("strain") + .forEach(entity -> { + entity.setRedactionReason("Strain found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("26"); + }); + end + + +rule "7: study title by document structure" + when + $table: Table(isOnPage(1), + (containsString("Final Report") || containsString("SPL")), + numberOfRows == 1, + numberOfCols == 1) + then + + entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Study title found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("7"); + }); + end + + +rule "7: study title old" + when + $section: Section(isOnPage(1) && (containsString("Final Report") || containsString("SPL"))) + then +// TODO +// section.redactByRegExWithNewlines("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", true, 0, "title", 7, "Study title found", "n-a"); + + entityCreationService.betweenStrings("TITLE", "DATA REQUIREMENT", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.setRedactionReason("Title found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("7"); + }); + entityCreationService.betweenStrings("Laboratories", "SPL", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.setRedactionReason("Title found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("7"); + }); + end + + + +rule "8a: Performing Laboratory (Name)" + when + $section: Section(containsString("PERFORMING LABORATORY:")) + then + entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> { + entity.setRedactionReason("Performing Laboratory found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("8"); + }); + end + + + rule "8b: Performing Laboratory (Country)" + when + nerEntities: NerEntities(hasEntitiesOfType("COUNTRY")) + $section: Section(containsString("PERFORMING LABORATORY:")) + then + nerEntities.streamEntitiesOfType("COUNTRY") + .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section)) + .forEach(entity -> { + entity.setRedactionReason("Performing Laboratory found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("8"); + insert(entity); + }); + end + + +rule "8c: Performing Laboratory (Country & Name) from dict" + when + $section: Section( + ( + hasEntitiesOfType("laboratory_country") + || hasEntitiesOfType("laboratory_name") + ) + && ( + containsString("PERFORMING LABORATORY:") + || ( + containsString("PERFORMING") + && containsString("LABORATORY:") + ) + ) + ) + then + $section.getEntitiesOfType("laboratory_country") + .forEach(entity -> { + entity.setRedactionReason("Performing laboratory country dictionary entry found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("8"); + }); + $section.getEntitiesOfType("laboratory_name") + .forEach(entity -> { + entity.setRedactionReason("Performing laboratory name dictionary entry found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("8"); + }); + end + + + rule "8d: Performing Laboratory (Country) from dict" + when + $section: Section( + ( + hasEntitiesOfType("laboratory_country") + || hasEntitiesOfType("laboratory_name") + ) + && !( + containsString("PERFORMING LABORATORY:") + || ( + containsString("PERFORMING") + && containsString("LABORATORY:") + ) + ) + ) + then + $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> { + entity.removeFromGraph(); + retract(entity); + }); + end + + + +// Headline not found because of ocr. +rule "9: GLP Study" + when + $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE") + || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT") + || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO")) + || containsString("GLP Certificate") + || containsString("GLP Certificates") + || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE") + || containsString("Good Laboratory Practice Certificate") + || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION")) + then + entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("GLP Study found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("9"); + }); + end + + +rule "10: Batch number from CoA" + when + $section: Section( + ( + getHeadline().containsString("Analytical Report") + || getHeadline().containsString("Certificate of Analysis") + || containsStringIgnoreCase("certificate of analysis") + ) + && ( + containsStringIgnoreCase("batch") + || containsStringIgnoreCase("bath") + || containsStringIgnoreCase("barch") + || containsStringIgnoreCase("bateb") + ) + && ( + containsStringIgnoreCase("identification") + || containsStringIgnoreCase("ldentitfication") + || containsStringIgnoreCase("wentification") + || containsStringIgnoreCase("mentification") + || containsStringIgnoreCase("kientification") + || containsStringIgnoreCase("reference number") + || containsStringIgnoreCase("test substance") + ) + ) + then + entityCreationService.lineAfterStrings(List.of("Batch Identification", + "(Batch Identification):", + "Bateb Identification", + "Batch Wentification", + "Batch Mentification", + "Batch Kientification", + "Barch Identification", + "Bath ldentitfication", + "Batch of test substance :" + ), "batch_number", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Batch number found in CoA"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("10"); + }); + end + + +rule "10a: Batch number" + when + $section: Section( + ( + getHeadline().containsString("Test and Control Substances") + || getHeadline().containsString("Test Substances") + || getHeadline().containsString("Test Substance") + || getHeadline().containsString("Test Item") + ) + && !( + getHeadline().containsString("component") + || getHeadline().containsString("reference") + || getHeadline().containsString("blank") + ) + && containsStringIgnoreCase("batch") + ) + then + Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, $section) + , entityCreationService.lineAfterStrings(List.of("Batch Identification", + "Batch number:", + "Batch reference number:", + "Batch:", + "Batch/Lot number:", + "Batch (Lot) Number:", + "Batch Number:", + "Batch Nº:", + "Batch no:" + ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a) + .forEach(entity -> { + entity.setRedactionReason("Batch number found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("10"); + }); + end + + + +rule "11: Conclusions - LD50, LC50, Confidence" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436")) + $section: Section( + (getHeadline().containsString("Conclusion") || getHeadline().containsString("Lethality")) + && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose")) + && ( + containsString("greater than") + || containsString("higher than") + || containsString("above") + || containsString("in excess") + || containsString("exceeds") + || containsString("was found to be") + || containsString("was calculated to be") + || containsString("estimated to be") + ) + ) + then + entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section) + .forEach(entity -> { + entity.setRedactionReason("LD50 greater than found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("11"); + }); + + entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section) + .forEach(entity -> { + entity.setRedactionReason("LD50 value found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("11"); + }); + + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section) + .forEach(entity -> { + entity.setRedactionReason("Minimal Confidence found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("11"); + }); + + entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section) + .forEach(entity -> { + entity.setRedactionReason("Maximal Confidence found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("11"); + }); + end + + +rule "12: Guideline Deviation" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + ( + getHeadline().containsString("General Information") + || containsString("GENERAL INFORMATION") + ) + && ( + containsStringIgnoreCase("from the") + || containsStringIgnoreCase("to the") + ) + ) + then + + entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Deviation from Guidelines found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + + entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Deviation from Study Plan found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + + entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Deviation from the study plan found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + + entityCreationService.byRegex("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> { + entity.setRedactionReason("Guideline deviation found in text."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + + entityCreationService.betweenStrings("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Deviation from the study plan found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + end + + +rule "12a: Guideline Deviation in text" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsString("Introduction") + && containsStringIgnoreCase("deviations from the protocol") + ) + then + entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Guideline deviation found in text."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("12"); + }); + end + +rule "13: Clinical Signs" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + ( + getHeadline().containsString("Clinical Signs") + || getHeadline().containsString("Macroscopic Findings") + ) + && !getHeadline().containsString("TABLE") + ) + then + entityCreationService.bySemanticNode($section, "clinical_signs", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Clinical Signs found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("13"); + }); + end + + +rule "14: Dosages" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + ( + getHeadline().containsString("Dosages") + || getHeadline().containsString("Study Design") + ) + && !getHeadline().containsString("TABLE") + ) + then + entityCreationService.betweenStrings("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Dosage found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("14"); + }); + + entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Dosage found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("14"); + }); + +//TODO section.redactByRegExWithNewlines("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])",true, 1, "dosages", 14, "Dosage found", "n-a"); + end + +rule "15: Mortality" + when + $headline: Headline(containsString("Mortality") && !containsString("TABLE")) + FileAttribute(label == "OECD Number", value == "425") + then + + //FIXME + var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent()); + entity.setRedactionReason("Mortality found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("15"); + + end + + +rule "17: Study Conclusion" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471")) + $section: Section( + getHeadline().containsString("Conclusion") + ) + then + entityCreationService.bySemanticNode($section, "study_conclusion", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Study Conclusion found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("17"); + }); + end + + +rule "18: Weight Behavior Changes" + when + FileAttribute(label == "OECD Number", value == "425") + $section: Section( + getHeadline().containsString("Results") + && ( + containsString("body weight") + || containsString("body weights") + || containsString("bodyweight") + || containsString("bodyweights") + ) + ) + then + + //FIXME + entityCreationService.bySemanticNode($section, "weight_behavior_changes", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Weight behavior changes found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("18"); + }); + end + +rule "19: Necropsy findings" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436")) + $section: Section( + ( + getHeadline().containsString("Necropsy") + || getHeadline().containsString("Macroscopic Findings") + || getHeadline().containsString("Macroscopic examination") + ) + && !getHeadline().containsString("Table") + && !getHeadline().containsString("Appendix") + ) + then + entityCreationService.bySemanticNode($section, "necropsy_findings", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Necropsy section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("19"); + }); + end + + +rule "22: Clinical observations" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + ( + getHeadline().containsString("Clinical Observations") + || getHeadline().containsString("Clinical observations") + || getHeadline().containsString("In-life Observations") + || getHeadline().containsString("Postmortem Observations") + ) + && !getHeadline().containsString("Appendix") + && !getHeadline().containsString("Table") + ) + then + + entityCreationService.bySemanticNode($section, "clinical_observations", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Clinical observations section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("22"); + }); + end + + +/* Die beiden waren vorher auch auskommentiert +rule "23a: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + containsString("Bodyweight") + && containsString("Group") + ) + then + entityCreationService.betweenRegexes("\\.\\s\\bBodyweight\\s", "Group.{0,40}[\\s\\d(.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Bodyweight changes found in results"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule(23); + }); + entityCreationService.betweenRegexes("\\.\\s\\bBodyweight.{100,500}Group.{0,30}mg/L\\)", "\\..{1,20}Group", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Bodyweight changes found in results"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule(23); + }); + end + + +rule "23b: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + getHeadline().containsString("Bodyweight") + && containsString("Sighting") + && containsString("Main") + ) + then + entityCreationService.betweenRegexes("\\bSighting[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "\\bMain[\\w\\s]{0,15}[\\s\\(\\d\\.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Bodyweight section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule(23); + }); + + entityCreationService.betweenRegexes("\\bMain[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "the study\\.", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("Bodyweight section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule(23); + }); + end +*/ + + +rule "23: Bodyweight changes" + when + FileAttribute(label == "OECD Number", value == "403") + $section: Section( + ( + getHeadline().containsString("Bodyweight") + || getHeadline().containsString("Bodyweights") + || getHeadline().containsString("Body Weights") + || getHeadline().containsString("Body Weight") + ) + && !getHeadline().containsString("Appendix") + && !getHeadline().containsString("TABLE") + ) + then + + entityCreationService.bySemanticNode($section, "bodyweight_changes", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Bodyweight section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("23"); + }); + end + + +rule "24: Study Design" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487")) + $section: Section( + getHeadline().containsString("study design") + ) + then + + entityCreationService.bySemanticNode($section, "study_design", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Study design section found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("24"); + }); + end + + + +rule "25: Results and Conclusion (406, 428, 438, 439, 474 & 487)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487")) + $section: Section( + (getHeadline().containsString("Results") || getHeadline().containsString("Conclusion")) + && !getHeadline().containsString("POSITIVE CONTROL") && !getHeadline().containsString("Positive Control") + && !getHeadline().containsString("Evaluation") && !getHeadline().containsString("Micronucleus") && + !getHeadline().containsString("TABLE") && !getHeadline().containsString("DISCUSSION") && + !getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability")) + then + + entityCreationService.bySemanticNode($section, "results_and_conclusion", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Results and Conclusion found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("25"); + }); + end + + + +rule "26: Detailing (404 & 405)" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405")) + $section: Section( + getHeadline().containsString("Results") && !getHeadline().containsString("Evaluation") && !getHeadline().containsString("study") + ) + then + entityCreationService.bySemanticNode($section, "detailing", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Detailing found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("26"); + }); + end + + +rule "32: Preliminary Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ( + (getHeadline().containsString("Preliminary Screening Test") && containsString("Clinical observations")) + || getHeadline().containsString("Pre-Experiment")) + ) + then + entityCreationService.bySemanticNode($section, "preliminary_test_results", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Preliminary Test Results found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("32"); + }); + end + + +rule "33: Test Results (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment"))) + then + entityCreationService.bySemanticNode($section, "test_results", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Test Results found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("33"); + }); + end + + +rule "34: Approach used (429)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + hasEntitiesOfType("species") + && ( + containsStringIgnoreCase("animals per") + || containsStringIgnoreCase("animals /") + ) + ) + then + + entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.setRedactionReason("Study animal approach found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("34"); + }); + end + + +rule "35: Sex" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429")) + $section: Section( + ( + getHeadline().containsString("animal") + || getHeadline().containsString("test system") + ) + && !getHeadline().containsString("selection") + && ( + containsStringIgnoreCase("sex:") + || containsStringIgnoreCase("male") + || containsStringIgnoreCase("female") + ) + ) + then + + entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.setRedactionReason("Test animal sex found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + end + + + +rule "35a: Animal Number 405" + when + FileAttribute(label == "OECD Number", value == "405") + $section: Section( + ( + getHeadline().containsString("animal") + || getHeadline().containsString("test system") + || getHeadline().containsString("reaction") + ) + && !getHeadline().containsString("selection") + && ( + containsStringIgnoreCase("number of animals") + || containsStringIgnoreCase("no.") + ) + ) + then + + entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.setRedactionReason("Number of animals found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + + entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.setRedactionReason("Number of animals found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + end + + + +rule "35b: Animal Number 429" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + ( + getHeadline().containsString("animal") + || getHeadline().containsString("test system") + ) + && !getHeadline().containsString("selection") + && containsStringIgnoreCase("number of animals") + && (containsStringIgnoreCase("per") || containsString("/")) + && containsStringIgnoreCase("group") + ) + then + + entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.setRedactionReason("Number of animals in group found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.setRedactionReason("Number of animals in group found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> { + entity.setRedactionReason("Number of animals in group found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("35"); + }); + end + + +rule "35c: No. Of animals - Fallback to appendix tables listing all individual animals for 429" + when + $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number") + $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual")) + FileAttribute(label == "OECD Number", value == "429") + then + $table.streamTableCellsWithHeader($keyword) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule("35"); + redactionEntity.setRedactionReason("Animal number found. ("+$keyword+")"); + redactionEntity.setLegalBasis("n-a"); + insert(redactionEntity); + }); + end + + + +rule "37: 4h Exposure" + when + FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436")) + $section: Section( + ( + containsStringIgnoreCase("4 hours") + || containsStringIgnoreCase("four hours") + ) + ) + then + /* entityCreationService.byRegexIgnoreCase("(?<=\\.\\s\\b).{1,100}(4|four) hours.{1,250}(?=\\b\\.|\\B\\.)", "4h_exposure", EntityType.ENTITY, $section).forEach(entity -> { + entity.setRedactionReason("4h exposure sentence found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule(37); + });*/ + entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> { + entity.setRedactionReason("4h exposure sentence found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("37"); + }); + end + + + +rule "39: Dilution of the test substance" + when + FileAttribute(label == "OECD Number", value == "404") + $section: Section( + getHeadline().containsString("Formulation") + && containsString("dilution") + ) + then + entityCreationService.bySemanticNode($section, "dilution", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Dilution found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("39"); + }); + end + + +rule "40: Positive Control" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsString("Positive Control") + && !( + getHeadline().containsString("Appendix") + || getHeadline().containsString("Table") + ) + ) + then + entityCreationService.bySemanticNode($section, "positive_control", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Positive control found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("40"); + }); + end + + +rule "42: Mortality Statement" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( getHeadline().containsString("Mortality") && !getHeadline().containsString("TABLE")) + then + entityCreationService.bySemanticNode($section, "mortality_statement", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Mortality Statement found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("42"); + }); + end + + +rule "43: Dose Mortality" + when + FileAttribute(label == "OECD Number", value == "425") + $table: Table( + (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality")) + && + (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) + ) + then + Stream.of($table.streamTableCellsWithHeader("Mortality"), + $table.streamTableCellsWithHeader("Comments"), + $table.streamTableCellsWithHeader("Long Term Results"), + $table.streamTableCellsWithHeader("Long Term Outcome"), + $table.streamTableCellsWithHeader("Viability / Mortality"), + $table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"), + $table.streamTableCellsWithHeader("Dose levei (mg/kg)"), + $table.streamTableCellsWithHeader("Dose Level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose level (mg/kg)"), + $table.streamTableCellsWithHeader("Dose (mg/kg)"), + $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]") + ).flatMap(a -> a) + .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) + .filter(Optional::isPresent) + .map(Optional::get) + .forEach(redactionEntity -> { + redactionEntity.setRedaction(true); + redactionEntity.addMatchedRule("43"); + redactionEntity.setRedactionReason("Dose Mortality Data found."); + redactionEntity.setLegalBasis("n-a"); + insert(redactionEntity); + }); + end + + +rule "44: Results (Main Study)" + when + FileAttribute(label == "OECD Number", value == "429") + $section: Section( + getHeadline().containsString("Results") + && getHeadline().toString().length() < 20 + && !( + getHeadline().containsString("Appendix") + || getHeadline().containsString("Table") + ) + ) + then + entityCreationService.bySemanticNode($section, "results_(main_study)", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Results for main study found."); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("44"); + }); + end + + +rule "45: Doses (mg/kg bodyweight)" + when + FileAttribute(label == "OECD Number", value == "402") + $section: Section( + getHeadline().containsString("study design") + ) + then + entityCreationService.bySemanticNode($section, "doses_(mg_kg_bw)", EntityType.ENTITY).ifPresent(entity -> { + entity.setRedactionReason("Doses per bodyweight information found"); + entity.setLegalBasis("n-a"); + entity.setRedaction(true); + entity.addMatchedRule("45"); + }); + end + + +//------------------------------------ Manual redaction rules ------------------------------------ + +// Rule unit: MAN.0 +rule "MAN.0.0: Apply manual resize redaction" + salience 128 + when + $resizeRedaction: ManualResizeRedaction($id: annotationId) + $entityToBeResized: RedactionEntity(matchesAnnotationId($id)) + then + manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction); + retract($resizeRedaction); + update($entityToBeResized); + end + + +// Rule unit: MAN.1 +rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity" + salience 128 + when + IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id)) + then + $entityToBeRemoved.removeFromGraph(); + retract($entityToBeRemoved); + end + +rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image" + salience 128 + when + IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId) + not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null) + $imageEntityToBeRemoved: Image($id == id) + then + $imageEntityToBeRemoved.setIgnored(true); + end + + +// Rule unit: MAN.2 +rule "MAN.2.0: Apply force redaction" + salience 128 + when + ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis) + $entityToForce: RedactionEntity(matchesAnnotationId($id)) + then + $entityToForce.setLegalBasis($legalBasis); + $entityToForce.setRedaction(true); + $entityToForce.setSkipRemoveEntitiesContainedInLarger(true); + end + + +// Rule unit: MAN.3 +rule "MAN.3.0: Apply image recategorization" + salience 128 + when + ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type) + $image: Image($id == id) + then + $image.setImageType(ImageType.fromString($imageType)); + end + + + +//------------------------------------ Entity merging rules ------------------------------------ + + + +//------------------------------------ File attributes rules ------------------------------------ + +// Rule unit: FA.1 +rule "FA.1.0: remove duplicate FileAttributes" + + salience 64 + when + $fileAttribute: FileAttribute($label: label, $value: value) + $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value) + then + retract($duplicate); + end + + +//------------------------------------ Local dictionary search rules ------------------------------------ + +// Rule unit: LDS.0 +rule "LDS.0.0: run local dictionary search" + agenda-group "LOCAL_DICTIONARY_ADDS" + salience -999 + when + DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels() + then + entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document) + .forEach(entity -> { + entity.addEngine(Engine.RULE); + insert(entity); + }); + end diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf new file mode 100644 index 00000000..2ba18d1f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf new file mode 100644 index 00000000..eddef2c9 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf new file mode 100644 index 00000000..0ebff2a5 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf new file mode 100644 index 00000000..08df378f Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf new file mode 100644 index 00000000..8712f74e Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf new file mode 100644 index 00000000..2f4057af Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf new file mode 100644 index 00000000..5876e193 Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf differ diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf new file mode 100644 index 00000000..933c50af Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf differ