diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 22e1f1b3..8df0cc47 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,7 +12,7 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.74.0" +val layoutParserVersion = "0.75.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java index 68f25ade..09343eec 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/component/Entity.java @@ -80,7 +80,9 @@ public class Entity { .legalBasis(e.getLegalBasis()) .imported(e.isImported()) .section(e.getSection()) - .color(e.getColor()).positions(e.getPositions()).containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode()) + .color(e.getColor()) + .positions(e.getPositions()) + .containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode()) .textBefore(e.getTextBefore()) .textAfter(e.getTextAfter()) .startOffset(e.getStartOffset()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ImportedRedactionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ImportedRedactionService.java index 6c69f8e8..a43acda9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ImportedRedactionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ImportedRedactionService.java @@ -1,5 +1,7 @@ package com.iqser.red.service.redaction.v1.server.service; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.stream.Collectors; @@ -111,6 +113,8 @@ public class ImportedRedactionService { .state(EntryState.APPLIED) .positions(importedRedaction.getPositions()) .color(getColor(IMPORTED_REDACTION_TYPE, dossierTemplateId)) + .containingNodeId(Collections.emptyList()) + .value("") .build(); entityLogEntries.add(redactionLogEntry); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index 2356a161..cdafda65 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -6,6 +6,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.List; +import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -19,6 +21,7 @@ import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.FilterType; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Primary; +import org.springframework.core.io.ClassPathResource; import org.springframework.test.context.junit.jupiter.SpringExtension; import com.iqser.red.commons.jackson.ObjectMapperFactory; @@ -50,7 +53,11 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { // @Disabled public void titleExtraction() throws IOException { - AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study-ocred.pdf"); + AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf"); + + ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json"); + storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream()); + // AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf", // "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json"); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 1361cd25..902ba12f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -116,6 +116,23 @@ rule "H.3.0: Study Type File Attribute" .ifPresent(fileAttribute -> insert(fileAttribute)); end +rule "H.3.1: Study Type File Attribute in Headlines" + when + not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487")) + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $headline.getTextBlock()), + RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $headline.getTextBlock())).flatMap(Collection::stream).findFirst() + .map(textRange -> $headline.getTextBlock().subSequence(textRange).toString()) + .map(value -> FileAttribute.builder().label("OECD Number").value(value).build()) + .ifPresent(fileAttribute -> insert(fileAttribute)); + end + //------------------------------------ General documine rules ------------------------------------ @@ -232,6 +249,58 @@ rule "DOC.1.3: Guidelines" }); end +rule "DOC.1.4: Guideline in Headlines" + when + $page: Page($pageNumber:number, + getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT") + || getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE") + || getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):")) + $headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS")) + then + entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EC Guideline found", "n-a") + ); + entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "EPA Guideline found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline no. found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline year found", "n-a") + ); + entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline number found", "n-a") + ); + entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity -> + entity.apply("DOC.1.4", "OECD Guideline found", "n-a") + ); + end + // Rule unit: DOC.2 rule "DOC.2.0: Report number"