diff --git a/redaction-service-v1/redaction-service-api-v1/pom.xml b/redaction-service-v1/redaction-service-api-v1/pom.xml
index e91526ed..4917363b 100644
--- a/redaction-service-v1/redaction-service-api-v1/pom.xml
+++ b/redaction-service-v1/redaction-service-api-v1/pom.xml
@@ -12,7 +12,7 @@
redaction-service-api-v1
- 2.79.0
+ 2.84.0
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java
new file mode 100644
index 00000000..66a80836
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java
@@ -0,0 +1,110 @@
+package com.iqser.red.service.redaction.v1.server;
+
+import static org.mockito.Mockito.when;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.ComponentScan;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.FilterType;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Primary;
+import org.springframework.test.context.junit.jupiter.SpringExtension;
+
+import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
+import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
+import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
+import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
+import com.iqser.red.service.redaction.v1.server.multitenancy.TenantContext;
+import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
+import com.iqser.red.storage.commons.StorageAutoConfiguration;
+import com.iqser.red.storage.commons.service.StorageService;
+
+@ExtendWith(SpringExtension.class)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
+@Import(DocumineFloraTest.RedactionIntegrationTestConfiguration.class)
+public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
+
+ private static final String RULES = loadFromClassPath("drools/documine_flora.drl");
+
+
+ @Test
+ public void titleExtraction() throws IOException {
+
+ AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf");
+ System.out.println("Start Full integration test");
+ analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
+ System.out.println("Finished structure analysis");
+ AnalyzeResult result = analyzeService.analyze(request);
+ System.out.println("Finished analysis");
+ var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
+
+ AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
+
+ String outputFileName = OsUtils.getTemporaryDirectory() + "/Documine.pdf";
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName)) {
+ fileOutputStream.write(annotateResponse.getDocument());
+ }
+
+ }
+
+
+ @Configuration
+ @EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
+ @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
+ static class RedactionIntegrationTestConfiguration {
+
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+
+ return new FileSystemBackedStorageService();
+ }
+
+ }
+
+
+ @BeforeEach
+ public void stubClients() {
+
+ TenantContext.setTenantId("redaction");
+
+ when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
+
+ loadTypeForTest();
+ loadNerForTest();
+ when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
+
+ when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
+ when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
+ .id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
+ .type(DOSSIER_REDACTIONS_INDICATOR)
+ .dossierTemplateId(TEST_DOSSIER_ID)
+ .hexColor("#ffe187")
+ .isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
+ .isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
+ .isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
+ .rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
+ .build()));
+
+ mockDictionaryCalls(null);
+
+ when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
+ }
+
+}
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl
new file mode 100644
index 00000000..ef0d3980
--- /dev/null
+++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl
@@ -0,0 +1,1364 @@
+package drools
+
+import static java.lang.String.format;
+import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
+import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.exactMatch;
+
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.Collection;
+import java.util.stream.Stream;
+import java.util.Optional;
+
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.*;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.*;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.*;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.*;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
+import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
+import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualImageRecategorization;
+import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.ManualRedactionApplicationService;
+import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionEntity;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
+import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
+import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
+import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility
+
+global Document document
+global EntityCreationService entityCreationService
+global ManualRedactionApplicationService manualRedactionApplicationService
+global NerEntitiesAdapter nerEntitiesAdapter
+global Dictionary dictionary
+
+//------------------------------------ queries ------------------------------------
+
+query "getFileAttributes"
+ $fileAttribute: FileAttribute()
+ end
+
+//---------------------------------------------------------------------------
+
+
+// Rule unit: MAN.0
+rule "H.0.0: Show headlines"
+ when
+ $headline: Headline()
+ then
+// entityCreationService.bySemanticNode($headline, "headline", EntityType.RECOMMENDATION);
+ end
+
+
+rule "H.0.0: Study Type File Attribute"
+ when
+ not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487"))
+ $section: Section(
+ (containsString("DATA REQUIREMENT") || containsString("TEST GUIDELINE") || containsString("MÉTODO(S) DE REFERÊNCIA(S):"))
+ ,(containsString("OECD") || containsString("EPA") || containsString("OPPTS"))
+ )
+ then
+ Stream.of(RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $section.getTextBlock()),
+ RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $section.getTextBlock()),
+ RedactionSearchUtility.findBoundariesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $section.getTextBlock())).flatMap(Collection::stream).findFirst()
+ .map(boundary -> $section.getTextBlock().subSequence(boundary).toString())
+ .map(value -> FileAttribute.builder().label("OECD Number").value(value).build())
+ .ifPresent(fileAttribute -> insert(fileAttribute));
+ end
+
+
+
+
+rule "1: Guidelines"
+ when
+ $section: Section(
+ (
+ containsString("DATA REQUIREMENT")
+ || containsString("TEST GUIDELINE")
+ || containsString("MÉTODO(S) DE REFERÊNCIA(S):")
+ )
+ && (
+ containsString("OECD")
+ || containsString("EPA")
+ || containsString("OPPTS")
+ )
+ )
+ then
+ Stream.of(entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found")),
+ entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found")),
+ entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EPA Guideline found")),
+ entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EC Guideline found")),
+ entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EC Guideline found")),
+ entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")),
+ // Examples found in PoC 1
+ //entityCreationService.byRegex("((OECD Guidelines for Testing of Chemicals, Procedure)|(OECD Guidelines for the Testing of Chemicals No\\.)|(OECD Test Guideline)|(OECD \\[Test Guideline, Number)) \\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")),
+ entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("EPA Guideline found")),
+ // new approach OECD Guideline
+ entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline no. found")),
+ entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")),
+ entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")),
+ entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline number found")),
+ entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $section).peek(e -> e.setRedactionReason("OECD Guideline year found")),
+ // missing OECD guideline rules for RFP demo file
+ entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $section).peek(e -> e.setRedactionReason("OECD Guideline number found")),
+ entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $section).peek(e -> e.setRedactionReason("OECD Guideline found")))
+ .flatMap(a -> a)
+ .forEach(e -> {
+ e.addMatchedRule("1");
+ e.setRedaction(true);
+ e.setLegalBasis("n-a");
+ });
+ end
+
+
+rule "1b: Guidelines"
+ when
+ $section: Section(
+ (
+ containsString("DATA REQUIREMENT")
+ || containsString("TEST GUIDELINE")
+ || containsString("MÉTODO(S) DE REFERÊNCIA(S):")
+ )
+ && (
+ containsString("OECD")
+ || containsString("EPA")
+ || containsString("OPPTS")
+ )
+ && (
+ hasEntitiesOfType("oecd_guideline")
+ || hasEntitiesOfType("epa_guideline")
+ || hasEntitiesOfType("ec_guideline")
+ )
+ )
+ then
+ $section.getEntitiesOfType(List.of("oecd_guideline","ec_guideline", "epa_guideline"))
+ .forEach(entity -> {
+ entity.setRedactionReason("OECD guideline found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("1");
+ });
+ end
+
+
+rule "1c: Guidelines"
+ when
+ $section: Section(
+ (
+ hasEntitiesOfType("oecd_guideline")
+ || hasEntitiesOfType("epa_guideline")
+ || hasEntitiesOfType("ec_guideline")
+ )
+ && !(
+ (
+ containsString("DATA REQUIREMENT")
+ || containsString("TEST GUIDELINE")
+ || containsString("MÉTODO(S) DE REFERÊNCIA(S):")
+ )
+ && (
+ containsString("OECD")
+ || containsString("EPA")
+ || containsString("OPPTS")
+ )
+ )
+ )
+ then
+ $section.getEntitiesOfType(List.of("oecd_guideline", "ec_guideline", "epa_guideline")).forEach(entity -> {
+ entity.removeFromGraph();
+ retract(entity);
+ });
+ end
+
+
+
+
+rule "2: Report number"
+ when
+ $section: Section(containsString("LABORATORY PROJECT ID") , containsString("Report Number:"))
+ then
+ entityCreationService.lineAfterString("Report Number:", "report_number", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
+ entity.addMatchedRule("2");
+ entity.setRedaction(true);
+ entity.setRedactionReason("Report number found");
+ entity.setLegalBasis("n-a");
+ });
+ end
+
+
+
+
+rule "3: Experimental Starting Date"
+ when
+ $section: Section(containsString("Experimental I. Starting Date:") || containsString("Experimental II. Starting Date:") || containsStringIgnoreCase("experimental start date") || containsStringIgnoreCase("experimental starting date"))
+ then
+ entityCreationService.lineAfterStrings(
+ List.of("Experimental start date",
+ "Experimental start date:",
+ "Experimental Starting Date",
+ "Experimental Starting Date:",
+ "Experimental starting date",
+ "Experimental starting date:",
+ "Experimental Start Date",
+ "Experimental Start Date:",
+ "Experimental I. Starting Date:",
+ "Experimental II. Starting Date:"),
+ "experimental_start_date", EntityType.ENTITY, $section)
+ .forEach(entity -> {
+ entity.addMatchedRule("3");
+ entity.setRedaction(true);
+ entity.setRedactionReason("Experimental start date found");
+ entity.setLegalBasis("n-a");
+ });
+ end
+
+
+rule "3: Experimental Completion Date"
+ when
+ $section: Section(containsStringIgnoreCase("experimental termination date") || containsStringIgnoreCase("experimental completion date"))
+ then
+ entityCreationService.lineAfterStrings(
+ List.of("Experimental termination date",
+ "Experimental termination date:",
+ "Experimental Completion Date",
+ "Experimental Completion Date:",
+ "Experimental completion date",
+ "Experimental completion date:",
+ "Experimental Termination Date",
+ "Experimental Termination Date:"), "experimental_end_date", EntityType.ENTITY, $section)
+ .forEach(entity -> {
+ entity.addMatchedRule("3");
+ entity.setRedaction(true);
+ entity.setRedactionReason("Experimental end date found");
+ entity.setLegalBasis("n-a");
+ });
+ end
+
+
+ // ignore species and strain in irrelevant study types
+ rule "4a: Species"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487"))
+ $section: Section(hasEntitiesOfType("species") || hasEntitiesOfType("strain"))
+ then
+ $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> {
+ entity.removeFromGraph();
+ retract(entity);
+ });
+ end
+
+
+ // hide all skipped species and strains except in the relevant sections
+ rule "4b: Species"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436"))
+ $section: Section(
+ (hasEntitiesOfType("species") || hasEntitiesOfType("strain"))
+ && !(
+ getHeadline().containsString("test system")
+ || getHeadline().containsString("animals")
+ || getHeadline().containsString("specification")
+ )
+ )
+ then
+ $section.getEntitiesOfType(List.of("species", "strain")).forEach(entity -> {
+ entity.removeFromGraph();
+ retract(entity);
+ });
+ end
+
+
+// redact the relevant species
+rule "4c: Species"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436"))
+ $section: Section(
+ hasEntitiesOfType("species")
+ )
+ then
+ $section.getEntitiesOfType("species")
+ .forEach(entity -> {
+ entity.setRedactionReason("Species found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("25");
+ });
+ end
+
+
+// redact the strain
+rule "5: Strain"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436"))
+ $section: Section(
+ hasEntitiesOfType("species")
+ && hasEntitiesOfType("strain")
+ && (
+ getHeadline().containsString("test system")
+ || getHeadline().containsString("animals")
+ || getHeadline().containsString("specification")
+ )
+ )
+ then
+ $section.getEntitiesOfType("strain")
+ .forEach(entity -> {
+ entity.setRedactionReason("Strain found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("26");
+ });
+ end
+
+
+rule "7: study title by document structure"
+ when
+ $table: Table(isOnPage(1),
+ (containsString("Final Report") || containsString("SPL")),
+ numberOfRows == 1,
+ numberOfCols == 1)
+ then
+
+ entityCreationService.bySemanticNode($table.getCell(0, 0).streamChildren().toList().get(1), "title", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Study title found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("7");
+ });
+ end
+
+
+rule "7: study title old"
+ when
+ $section: Section(isOnPage(1) && (containsString("Final Report") || containsString("SPL")))
+ then
+// TODO
+// section.redactByRegExWithNewlines("(?<=\\n)[\\w\\W]{1,300}(?=\\nFinal Report)", true, 0, "title", 7, "Study title found", "n-a");
+
+ entityCreationService.betweenStrings("TITLE", "DATA REQUIREMENT", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
+ entity.setRedactionReason("Title found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("7");
+ });
+ entityCreationService.betweenStrings("Laboratories", "SPL", "title", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
+ entity.setRedactionReason("Title found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("7");
+ });
+ end
+
+
+
+rule "8a: Performing Laboratory (Name)"
+ when
+ $section: Section(containsString("PERFORMING LABORATORY:"))
+ then
+ entityCreationService.lineAfterString("PERFORMING LABORATORY:", "laboratory_name", EntityType.ENTITY, $section).findFirst().ifPresent(entity -> {
+ entity.setRedactionReason("Performing Laboratory found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("8");
+ });
+ end
+
+
+ rule "8b: Performing Laboratory (Country)"
+ when
+ nerEntities: NerEntities(hasEntitiesOfType("COUNTRY"))
+ $section: Section(containsString("PERFORMING LABORATORY:"))
+ then
+ nerEntities.streamEntitiesOfType("COUNTRY")
+ .map(nerEntity -> entityCreationService.byNerEntity(nerEntity, "laboratory_country", EntityType.ENTITY, $section))
+ .forEach(entity -> {
+ entity.setRedactionReason("Performing Laboratory found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("8");
+ insert(entity);
+ });
+ end
+
+
+rule "8c: Performing Laboratory (Country & Name) from dict"
+ when
+ $section: Section(
+ (
+ hasEntitiesOfType("laboratory_country")
+ || hasEntitiesOfType("laboratory_name")
+ )
+ && (
+ containsString("PERFORMING LABORATORY:")
+ || (
+ containsString("PERFORMING")
+ && containsString("LABORATORY:")
+ )
+ )
+ )
+ then
+ $section.getEntitiesOfType("laboratory_country")
+ .forEach(entity -> {
+ entity.setRedactionReason("Performing laboratory country dictionary entry found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("8");
+ });
+ $section.getEntitiesOfType("laboratory_name")
+ .forEach(entity -> {
+ entity.setRedactionReason("Performing laboratory name dictionary entry found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("8");
+ });
+ end
+
+
+ rule "8d: Performing Laboratory (Country) from dict"
+ when
+ $section: Section(
+ (
+ hasEntitiesOfType("laboratory_country")
+ || hasEntitiesOfType("laboratory_name")
+ )
+ && !(
+ containsString("PERFORMING LABORATORY:")
+ || (
+ containsString("PERFORMING")
+ && containsString("LABORATORY:")
+ )
+ )
+ )
+ then
+ $section.getEntitiesOfType(List.of("laboratory_country", "laboratory_name")).forEach(entity -> {
+ entity.removeFromGraph();
+ retract(entity);
+ });
+ end
+
+
+
+// Headline not found because of ocr.
+rule "9: GLP Study"
+ when
+ $headline: Headline(containsString("GOOD LABORATORY PRACTICE COMPLIANCE")
+ || containsString("GOOD LABORATORY PRACTICE COMPLIANCE STATEMENT")
+ || (containsString("DECLARACAO DE CONFORMIDADE") && containsString("PRATICAS DE LABORATORIO"))
+ || containsString("GLP Certificate")
+ || containsString("GLP Certificates")
+ || containsString("GOOD LABORATORY PRACTICE (GLP) CERTIFICATE")
+ || containsString("Good Laboratory Practice Certificate")
+ || containsString("STATEMENT OF GLP COMPLIANCE AND AUTHENTICATION"))
+ then
+ entityCreationService.bySemanticNode($headline, "glp_study", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("GLP Study found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("9");
+ });
+ end
+
+
+rule "10: Batch number from CoA"
+ when
+ $section: Section(
+ (
+ getHeadline().containsString("Analytical Report")
+ || getHeadline().containsString("Certificate of Analysis")
+ || containsStringIgnoreCase("certificate of analysis")
+ )
+ && (
+ containsStringIgnoreCase("batch")
+ || containsStringIgnoreCase("bath")
+ || containsStringIgnoreCase("barch")
+ || containsStringIgnoreCase("bateb")
+ )
+ && (
+ containsStringIgnoreCase("identification")
+ || containsStringIgnoreCase("ldentitfication")
+ || containsStringIgnoreCase("wentification")
+ || containsStringIgnoreCase("mentification")
+ || containsStringIgnoreCase("kientification")
+ || containsStringIgnoreCase("reference number")
+ || containsStringIgnoreCase("test substance")
+ )
+ )
+ then
+ entityCreationService.lineAfterStrings(List.of("Batch Identification",
+ "(Batch Identification):",
+ "Bateb Identification",
+ "Batch Wentification",
+ "Batch Mentification",
+ "Batch Kientification",
+ "Barch Identification",
+ "Bath ldentitfication",
+ "Batch of test substance :"
+ ), "batch_number", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Batch number found in CoA");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("10");
+ });
+ end
+
+
+rule "10a: Batch number"
+ when
+ $section: Section(
+ (
+ getHeadline().containsString("Test and Control Substances")
+ || getHeadline().containsString("Test Substances")
+ || getHeadline().containsString("Test Substance")
+ || getHeadline().containsString("Test Item")
+ )
+ && !(
+ getHeadline().containsString("component")
+ || getHeadline().containsString("reference")
+ || getHeadline().containsString("blank")
+ )
+ && containsStringIgnoreCase("batch")
+ )
+ then
+ Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, $section)
+ , entityCreationService.lineAfterStrings(List.of("Batch Identification",
+ "Batch number:",
+ "Batch reference number:",
+ "Batch:",
+ "Batch/Lot number:",
+ "Batch (Lot) Number:",
+ "Batch Number:",
+ "Batch Nº:",
+ "Batch no:"
+ ), "batch_number", EntityType.ENTITY, $section)).flatMap(a -> a)
+ .forEach(entity -> {
+ entity.setRedactionReason("Batch number found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("10");
+ });
+ end
+
+
+
+rule "11: Conclusions - LD50, LC50, Confidence"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","425","436"))
+ $section: Section(
+ (getHeadline().containsString("Conclusion") || getHeadline().containsString("Lethality"))
+ && (containsString("LD") || containsString("LC") || containsString("50") || containsString("LD50") || containsString("lethal concentration") || containsString("lethal dose"))
+ && (
+ containsString("greater than")
+ || containsString("higher than")
+ || containsString("above")
+ || containsString("in excess")
+ || containsString("exceeds")
+ || containsString("was found to be")
+ || containsString("was calculated to be")
+ || containsString("estimated to be")
+ )
+ )
+ then
+ entityCreationService.byRegexIgnoreCase("(L[D|C]\\s?50|lethal concentration|lethal dose).{1,200}(greater than|considered to be above|in excess of|exceeds|higher than)", "ld50_greater", EntityType.ENTITY,2, $section)
+ .forEach(entity -> {
+ entity.setRedactionReason("LD50 greater than found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("11");
+ });
+
+ entityCreationService.byRegexIgnoreCase("\\b(?:(?:greater|higher) than|considered to be above|(?:was|is) (?:found|estimated) to be|was calculated to be|in excess of|exceeds|equal to)\\s?([\\d\\.]{1,6})\\s?mg\\/(?:kg|L)", "ld50_value", EntityType.ENTITY,1, $section)
+ .forEach(entity -> {
+ entity.setRedactionReason("LD50 value found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("11");
+ });
+
+ entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_minimal", EntityType.ENTITY,1, $section)
+ .forEach(entity -> {
+ entity.setRedactionReason("Minimal Confidence found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("11");
+ });
+
+ entityCreationService.byRegexIgnoreCase("confidence interval (?:is )?([\\d\\.]{2,6}).{0,20} to (?:greater than )?([\\d\\.]{2,6})", "confidence_maximal", EntityType.ENTITY,2, $section)
+ .forEach(entity -> {
+ entity.setRedactionReason("Maximal Confidence found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("11");
+ });
+ end
+
+
+rule "12: Guideline Deviation"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
+ $section: Section(
+ (
+ getHeadline().containsString("General Information")
+ || containsString("GENERAL INFORMATION")
+ )
+ && (
+ containsStringIgnoreCase("from the")
+ || containsStringIgnoreCase("to the")
+ )
+ )
+ then
+
+ entityCreationService.betweenRegexes("(?:Deviations? from the [G|g]uidelines?)(?: and| or)?( the)?(?: Study Plan)?", "(?:(?:Deviations? from the Study Plan)|(?:Performing laboratory test)|(?:Other)|(?:Retention of [S|s]amples)|(?:Amendments? to Final Protocol))", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Deviation from Guidelines found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+
+ entityCreationService.betweenRegexes("(?:Deviations? (?:from|to)(?: the)? [S|s]tudy [P|p]lan)", "(?:Regulatory Guidelines)|(?:Other)|(?:Distribution of the report)|(?:Performing laboratory test)|(?:Distribution of the report)|(?:Retention of [S|s]amples)", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Deviation from Study Plan found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+
+ entityCreationService.betweenStrings("Deviations from the study plan", "Regulatory Guidelines", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Deviation from the study plan found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+
+ entityCreationService.byRegex("(?>Study plan adherence)(.{1,20}deviations.{1,20} to the study plan.{0,50}\\.)\\s", "guideline_deviation", EntityType.ENTITY, 1, $section).forEach(entity -> {
+ entity.setRedactionReason("Guideline deviation found in text.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+
+ entityCreationService.betweenStrings("Deviations from the study plan", "validity of the study.", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Deviation from the study plan found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+ end
+
+
+rule "12a: Guideline Deviation in text"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
+ $section: Section(
+ getHeadline().containsString("Introduction")
+ && containsStringIgnoreCase("deviations from the protocol")
+ )
+ then
+ entityCreationService.byRegex("There were no deviations from the protocol.{1,100}\\.\\s", "guideline_deviation", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Guideline deviation found in text.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("12");
+ });
+ end
+
+rule "13: Clinical Signs"
+ when
+ FileAttribute(label == "OECD Number", value == "425")
+ $section: Section(
+ (
+ getHeadline().containsString("Clinical Signs")
+ || getHeadline().containsString("Macroscopic Findings")
+ )
+ && !getHeadline().containsString("TABLE")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "clinical_signs", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Clinical Signs found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("13");
+ });
+ end
+
+
+rule "14: Dosages"
+ when
+ FileAttribute(label == "OECD Number", value == "425")
+ $section: Section(
+ (
+ getHeadline().containsString("Dosages")
+ || getHeadline().containsString("Study Design")
+ )
+ && !getHeadline().containsString("TABLE")
+ )
+ then
+ entityCreationService.betweenStrings("The animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Dosage found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("14");
+ });
+
+ entityCreationService.betweenStrings("Animals were treated", ".", "dosages", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Dosage found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("14");
+ });
+
+//TODO section.redactByRegExWithNewlines("(?:\\.[\\s|\\n]|^.{5,20}\\n)([^\\.]{1,200}(?:animal|given|received)[^\\.]{1,200}dose\\s(?:levels?\\s)?(?:of|at)[^\\.]{1,200})(?:\\.[\\s|\\n|$])",true, 1, "dosages", 14, "Dosage found", "n-a");
+ end
+
+rule "15: Mortality"
+ when
+ $headline: Headline(containsString("Mortality") && !containsString("TABLE"))
+ FileAttribute(label == "OECD Number", value == "425")
+ then
+
+ //FIXME
+ var entity = entityCreationService.byBoundary(Boundary.merge($headline.getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH).map(SemanticNode::getBoundary).toList()), "mortality", EntityType.ENTITY, $headline.getParent());
+ entity.setRedactionReason("Mortality found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("15");
+
+ end
+
+
+rule "17: Study Conclusion"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","471"))
+ $section: Section(
+ getHeadline().containsString("Conclusion")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "study_conclusion", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Study Conclusion found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("17");
+ });
+ end
+
+
+rule "18: Weight Behavior Changes"
+ when
+ FileAttribute(label == "OECD Number", value == "425")
+ $section: Section(
+ getHeadline().containsString("Results")
+ && (
+ containsString("body weight")
+ || containsString("body weights")
+ || containsString("bodyweight")
+ || containsString("bodyweights")
+ )
+ )
+ then
+
+ //FIXME
+ entityCreationService.bySemanticNode($section, "weight_behavior_changes", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Weight behavior changes found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("18");
+ });
+ end
+
+rule "19: Necropsy findings"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","436"))
+ $section: Section(
+ (
+ getHeadline().containsString("Necropsy")
+ || getHeadline().containsString("Macroscopic Findings")
+ || getHeadline().containsString("Macroscopic examination")
+ )
+ && !getHeadline().containsString("Table")
+ && !getHeadline().containsString("Appendix")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "necropsy_findings", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Necropsy section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("19");
+ });
+ end
+
+
+rule "22: Clinical observations"
+ when
+ FileAttribute(label == "OECD Number", value == "403")
+ $section: Section(
+ (
+ getHeadline().containsString("Clinical Observations")
+ || getHeadline().containsString("Clinical observations")
+ || getHeadline().containsString("In-life Observations")
+ || getHeadline().containsString("Postmortem Observations")
+ )
+ && !getHeadline().containsString("Appendix")
+ && !getHeadline().containsString("Table")
+ )
+ then
+
+ entityCreationService.bySemanticNode($section, "clinical_observations", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Clinical observations section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("22");
+ });
+ end
+
+
+/* Die beiden waren vorher auch auskommentiert
+rule "23a: Bodyweight changes"
+ when
+ FileAttribute(label == "OECD Number", value == "403")
+ $section: Section(
+ containsString("Bodyweight")
+ && containsString("Group")
+ )
+ then
+ entityCreationService.betweenRegexes("\\.\\s\\bBodyweight\\s", "Group.{0,40}[\\s\\d(.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Bodyweight changes found in results");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule(23);
+ });
+ entityCreationService.betweenRegexes("\\.\\s\\bBodyweight.{100,500}Group.{0,30}mg/L\\)", "\\..{1,20}Group", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Bodyweight changes found in results");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule(23);
+ });
+ end
+
+
+rule "23b: Bodyweight changes"
+ when
+ FileAttribute(label == "OECD Number", value == "403")
+ $section: Section(
+ getHeadline().containsString("Bodyweight")
+ && containsString("Sighting")
+ && containsString("Main")
+ )
+ then
+ entityCreationService.betweenRegexes("\\bSighting[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "\\bMain[\\w\\s]{0,15}[\\s\\(\\d\\.]{1,10}mg/L\\)", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Bodyweight section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule(23);
+ });
+
+ entityCreationService.betweenRegexes("\\bMain[\\w\\s]{0,15}[\\s\\d(.]{1,10}mg/L\\)", "the study\\.", "bodyweight_changes", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("Bodyweight section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule(23);
+ });
+ end
+*/
+
+
+rule "23: Bodyweight changes"
+ when
+ FileAttribute(label == "OECD Number", value == "403")
+ $section: Section(
+ (
+ getHeadline().containsString("Bodyweight")
+ || getHeadline().containsString("Bodyweights")
+ || getHeadline().containsString("Body Weights")
+ || getHeadline().containsString("Body Weight")
+ )
+ && !getHeadline().containsString("Appendix")
+ && !getHeadline().containsString("TABLE")
+ )
+ then
+
+ entityCreationService.bySemanticNode($section, "bodyweight_changes", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Bodyweight section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("23");
+ });
+ end
+
+
+rule "24: Study Design"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","404","405","406","428","429","438","439","474","487"))
+ $section: Section(
+ getHeadline().containsString("study design")
+ )
+ then
+
+ entityCreationService.bySemanticNode($section, "study_design", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Study design section found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("24");
+ });
+ end
+
+
+
+rule "25: Results and Conclusion (406, 428, 438, 439, 474 & 487)"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("406","428","438","439","474","487"))
+ $section: Section(
+ (getHeadline().containsString("Results") || getHeadline().containsString("Conclusion"))
+ && !getHeadline().containsString("POSITIVE CONTROL") && !getHeadline().containsString("Positive Control")
+ && !getHeadline().containsString("Evaluation") && !getHeadline().containsString("Micronucleus") &&
+ !getHeadline().containsString("TABLE") && !getHeadline().containsString("DISCUSSION") &&
+ !getHeadline().containsString("CONCLUSIONS") && !getHeadline().containsString("Interpretation") && !getHeadline().containsString("Viability"))
+ then
+
+ entityCreationService.bySemanticNode($section, "results_and_conclusion", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Results and Conclusion found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("25");
+ });
+ end
+
+
+
+rule "26: Detailing (404 & 405)"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("404","405"))
+ $section: Section(
+ getHeadline().containsString("Results") && !getHeadline().containsString("Evaluation") && !getHeadline().containsString("study")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "detailing", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Detailing found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("26");
+ });
+ end
+
+
+rule "32: Preliminary Test Results (429)"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section(
+ (
+ (getHeadline().containsString("Preliminary Screening Test") && containsString("Clinical observations"))
+ || getHeadline().containsString("Pre-Experiment"))
+ )
+ then
+ entityCreationService.bySemanticNode($section, "preliminary_test_results", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Preliminary Test Results found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("32");
+ });
+ end
+
+
+rule "33: Test Results (429)"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section((getHeadline().containsString("RESULTS AND DISCUSSION") || getHeadline().containsString("Estimation of the proliferative response of lymph node cells") || getHeadline().containsString("Results in the Main Experiment")))
+ then
+ entityCreationService.bySemanticNode($section, "test_results", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Test Results found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("33");
+ });
+ end
+
+
+rule "34: Approach used (429)"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section(
+ hasEntitiesOfType("species")
+ && (
+ containsStringIgnoreCase("animals per")
+ || containsStringIgnoreCase("animals /")
+ )
+ )
+ then
+
+ entityCreationService.byRegexIgnoreCase("\\banimals (?:per|\\/) .{0,15}(group)\\b", "approach_used", EntityType.ENTITY,1, $section).forEach(entity -> {
+ entity.setRedactionReason("Study animal approach found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("34");
+ });
+ end
+
+
+rule "35: Sex"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("405","429"))
+ $section: Section(
+ (
+ getHeadline().containsString("animal")
+ || getHeadline().containsString("test system")
+ )
+ && !getHeadline().containsString("selection")
+ && (
+ containsStringIgnoreCase("sex:")
+ || containsStringIgnoreCase("male")
+ || containsStringIgnoreCase("female")
+ )
+ )
+ then
+
+ entityCreationService.byRegexIgnoreCase("([S|s]ex:)?[\\w\\s]{0,10}\\b(males?|females?)\\b", "sex", EntityType.ENTITY,2, $section).forEach(entity -> {
+ entity.setRedactionReason("Test animal sex found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+ end
+
+
+
+rule "35a: Animal Number 405"
+ when
+ FileAttribute(label == "OECD Number", value == "405")
+ $section: Section(
+ (
+ getHeadline().containsString("animal")
+ || getHeadline().containsString("test system")
+ || getHeadline().containsString("reaction")
+ )
+ && !getHeadline().containsString("selection")
+ && (
+ containsStringIgnoreCase("number of animals")
+ || containsStringIgnoreCase("no.")
+ )
+ )
+ then
+
+ entityCreationService.byRegexIgnoreCase("(Number of animals:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> {
+ entity.setRedactionReason("Number of animals found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+
+ entityCreationService.byRegexIgnoreCase("(?:.{1,10} No\\. )([\\d\\w\\-]{3,8})", "animal_numbers", EntityType.ENTITY,1, $section).forEach(entity -> {
+ entity.setRedactionReason("Number of animals found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+ end
+
+
+
+rule "35b: Animal Number 429"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section(
+ (
+ getHeadline().containsString("animal")
+ || getHeadline().containsString("test system")
+ )
+ && !getHeadline().containsString("selection")
+ && containsStringIgnoreCase("number of animals")
+ && (containsStringIgnoreCase("per") || containsString("/"))
+ && containsStringIgnoreCase("group")
+ )
+ then
+
+ entityCreationService.byRegexIgnoreCase("(Number of animals per group:)[\\w\\s]{0,10}\\b([\\d]{1,3})\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> {
+ entity.setRedactionReason("Number of animals in group found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+ entityCreationService.byRegexIgnoreCase("(Number of animals per group:).{0,60}\\b([\\d]{1,3})\\sper group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> {
+ entity.setRedactionReason("Number of animals in group found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+ entityCreationService.byRegexIgnoreCase("([\\d]{1,3})[\\w\\s\\/]{0,20}(?:treatment )?group\\b", "number_of_animals", EntityType.ENTITY,2, $section).forEach(entity -> {
+ entity.setRedactionReason("Number of animals in group found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("35");
+ });
+ end
+
+
+rule "35c: No. Of animals - Fallback to appendix tables listing all individual animals for 429"
+ when
+ $keyword: String() from List.of("Animal Number", "Animal No.", "Animal number")
+ $table: Table(containsString($keyword) && getHeadline().containsString("TABLE") && getHeadline().containsString("Individual"))
+ FileAttribute(label == "OECD Number", value == "429")
+ then
+ $table.streamTableCellsWithHeader($keyword)
+ .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "animal_numbers", EntityType.ENTITY))
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .forEach(redactionEntity -> {
+ redactionEntity.setRedaction(true);
+ redactionEntity.addMatchedRule("35");
+ redactionEntity.setRedactionReason("Animal number found. ("+$keyword+")");
+ redactionEntity.setLegalBasis("n-a");
+ insert(redactionEntity);
+ });
+ end
+
+
+
+rule "37: 4h Exposure"
+ when
+ FileAttribute(label == "OECD Number", valueEqualsAnyOf("403","436"))
+ $section: Section(
+ (
+ containsStringIgnoreCase("4 hours")
+ || containsStringIgnoreCase("four hours")
+ )
+ )
+ then
+ /* entityCreationService.byRegexIgnoreCase("(?<=\\.\\s\\b).{1,100}(4|four) hours.{1,250}(?=\\b\\.|\\B\\.)", "4h_exposure", EntityType.ENTITY, $section).forEach(entity -> {
+ entity.setRedactionReason("4h exposure sentence found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule(37);
+ });*/
+ entityCreationService.byRegexIgnoreCase("((?<=\\.\\s\\b).{1,100}(4|four) hours.*?\\.) ", "4h_exposure", EntityType.ENTITY,1, $section).forEach(entity -> {
+ entity.setRedactionReason("4h exposure sentence found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("37");
+ });
+ end
+
+
+
+rule "39: Dilution of the test substance"
+ when
+ FileAttribute(label == "OECD Number", value == "404")
+ $section: Section(
+ getHeadline().containsString("Formulation")
+ && containsString("dilution")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "dilution", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Dilution found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("39");
+ });
+ end
+
+
+rule "40: Positive Control"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section(
+ getHeadline().containsString("Positive Control")
+ && !(
+ getHeadline().containsString("Appendix")
+ || getHeadline().containsString("Table")
+ )
+ )
+ then
+ entityCreationService.bySemanticNode($section, "positive_control", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Positive control found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("40");
+ });
+ end
+
+
+rule "42: Mortality Statement"
+ when
+ FileAttribute(label == "OECD Number", value == "402")
+ $section: Section( getHeadline().containsString("Mortality") && !getHeadline().containsString("TABLE"))
+ then
+ entityCreationService.bySemanticNode($section, "mortality_statement", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Mortality Statement found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("42");
+ });
+ end
+
+
+rule "43: Dose Mortality"
+ when
+ FileAttribute(label == "OECD Number", value == "425")
+ $table: Table(
+ (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality"))
+ &&
+ (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]"))
+ )
+ then
+ Stream.of($table.streamTableCellsWithHeader("Mortality"),
+ $table.streamTableCellsWithHeader("Comments"),
+ $table.streamTableCellsWithHeader("Long Term Results"),
+ $table.streamTableCellsWithHeader("Long Term Outcome"),
+ $table.streamTableCellsWithHeader("Viability / Mortality"),
+ $table.streamTableCellsWithHeader("Dose [mg/kg bodyweight]"),
+ $table.streamTableCellsWithHeader("Dose levei (mg/kg)"),
+ $table.streamTableCellsWithHeader("Dose Level (mg/kg)"),
+ $table.streamTableCellsWithHeader("Dose level (mg/kg)"),
+ $table.streamTableCellsWithHeader("Dose (mg/kg)"),
+ $table.streamTableCellsWithHeader("Dosage [mg/kg body weight]")
+ ).flatMap(a -> a)
+ .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY))
+ .filter(Optional::isPresent)
+ .map(Optional::get)
+ .forEach(redactionEntity -> {
+ redactionEntity.setRedaction(true);
+ redactionEntity.addMatchedRule("43");
+ redactionEntity.setRedactionReason("Dose Mortality Data found.");
+ redactionEntity.setLegalBasis("n-a");
+ insert(redactionEntity);
+ });
+ end
+
+
+rule "44: Results (Main Study)"
+ when
+ FileAttribute(label == "OECD Number", value == "429")
+ $section: Section(
+ getHeadline().containsString("Results")
+ && getHeadline().toString().length() < 20
+ && !(
+ getHeadline().containsString("Appendix")
+ || getHeadline().containsString("Table")
+ )
+ )
+ then
+ entityCreationService.bySemanticNode($section, "results_(main_study)", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Results for main study found.");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("44");
+ });
+ end
+
+
+rule "45: Doses (mg/kg bodyweight)"
+ when
+ FileAttribute(label == "OECD Number", value == "402")
+ $section: Section(
+ getHeadline().containsString("study design")
+ )
+ then
+ entityCreationService.bySemanticNode($section, "doses_(mg_kg_bw)", EntityType.ENTITY).ifPresent(entity -> {
+ entity.setRedactionReason("Doses per bodyweight information found");
+ entity.setLegalBasis("n-a");
+ entity.setRedaction(true);
+ entity.addMatchedRule("45");
+ });
+ end
+
+
+//------------------------------------ Manual redaction rules ------------------------------------
+
+// Rule unit: MAN.0
+rule "MAN.0.0: Apply manual resize redaction"
+ salience 128
+ when
+ $resizeRedaction: ManualResizeRedaction($id: annotationId)
+ $entityToBeResized: RedactionEntity(matchesAnnotationId($id))
+ then
+ manualRedactionApplicationService.resizeEntityAndReinsert($entityToBeResized, $resizeRedaction);
+ retract($resizeRedaction);
+ update($entityToBeResized);
+ end
+
+
+// Rule unit: MAN.1
+rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to Entity"
+ salience 128
+ when
+ IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
+ not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
+ $entityToBeRemoved: RedactionEntity(matchesAnnotationId($id))
+ then
+ $entityToBeRemoved.removeFromGraph();
+ retract($entityToBeRemoved);
+ end
+
+rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
+ salience 128
+ when
+ IdRemoval(status == AnnotationStatus.APPROVED, !removeFromDictionary, requestDate != null, $id: annotationId)
+ not ManualForceRedaction($id == annotationId, status == AnnotationStatus.APPROVED, requestDate != null)
+ $imageEntityToBeRemoved: Image($id == id)
+ then
+ $imageEntityToBeRemoved.setIgnored(true);
+ end
+
+
+// Rule unit: MAN.2
+rule "MAN.2.0: Apply force redaction"
+ salience 128
+ when
+ ManualForceRedaction($id: annotationId, status == AnnotationStatus.APPROVED, requestDate != null, $legalBasis: legalBasis)
+ $entityToForce: RedactionEntity(matchesAnnotationId($id))
+ then
+ $entityToForce.setLegalBasis($legalBasis);
+ $entityToForce.setRedaction(true);
+ $entityToForce.setSkipRemoveEntitiesContainedInLarger(true);
+ end
+
+
+// Rule unit: MAN.3
+rule "MAN.3.0: Apply image recategorization"
+ salience 128
+ when
+ ManualImageRecategorization($id: annotationId, status == AnnotationStatus.APPROVED, $imageType: type)
+ $image: Image($id == id)
+ then
+ $image.setImageType(ImageType.fromString($imageType));
+ end
+
+
+
+//------------------------------------ Entity merging rules ------------------------------------
+
+
+
+//------------------------------------ File attributes rules ------------------------------------
+
+// Rule unit: FA.1
+rule "FA.1.0: remove duplicate FileAttributes"
+
+ salience 64
+ when
+ $fileAttribute: FileAttribute($label: label, $value: value)
+ $duplicate: FileAttribute(this != $fileAttribute, label == $label, value == $value)
+ then
+ retract($duplicate);
+ end
+
+
+//------------------------------------ Local dictionary search rules ------------------------------------
+
+// Rule unit: LDS.0
+rule "LDS.0.0: run local dictionary search"
+ agenda-group "LOCAL_DICTIONARY_ADDS"
+ salience -999
+ when
+ DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
+ then
+ entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
+ .forEach(entity -> {
+ entity.addEngine(Engine.RULE);
+ insert(entity);
+ });
+ end
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf
new file mode 100644
index 00000000..2ba18d1f
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/402_F.3.1 - A13617AV - Acute Dermal Toxicity.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf
new file mode 100644
index 00000000..eddef2c9
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/403_F.2 - A13617AV - Acute Inhalation Toxicity - Rats.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf
new file mode 100644
index 00000000..0ebff2a5
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/404_F.3.4 - A13617AV - Primary Skin Irritation.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf
new file mode 100644
index 00000000..08df378f
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/405_F.4 - A13617AV - Primary Eye Irritation Study.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf
new file mode 100644
index 00000000..8712f74e
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/406_SENSIBILIZAÇÃO - A13617AV - Skin Sensitisation.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf
new file mode 100644
index 00000000..2f4057af
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/425_F.1.1.1 - A13617AV - Acute Oral Toxicity Study.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf
new file mode 100644
index 00000000..5876e193
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/471_G.1.1 - A13617AV - Reverse Mutation Assay - S. typhimurium & E. coli.pdf differ
diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf
new file mode 100644
index 00000000..933c50af
Binary files /dev/null and b/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/Documine/Flora/A13617AV/474_G.1.2 - 1768300_MMNA_A13617AV_report.pdf differ