RED-7891: Fixed nullpointer when imported redactions are available #193

Merged
dominique.eiflaender1 merged 1 commits from RED-7891 into master 2023-11-13 11:54:29 +01:00
5 changed files with 85 additions and 3 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.74.0"
val layoutParserVersion = "0.75.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"

View File

@ -80,7 +80,9 @@ public class Entity {
.legalBasis(e.getLegalBasis())
.imported(e.isImported())
.section(e.getSection())
.color(e.getColor()).positions(e.getPositions()).containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode())
.color(e.getColor())
.positions(e.getPositions())
.containingNode(document.getDocumentTree().getEntryById(e.getContainingNodeId()).getNode())
.textBefore(e.getTextBefore())
.textAfter(e.getTextAfter())
.startOffset(e.getStartOffset())

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.service;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
@ -111,6 +113,8 @@ public class ImportedRedactionService {
.state(EntryState.APPLIED)
.positions(importedRedaction.getPositions())
.color(getColor(IMPORTED_REDACTION_TYPE, dossierTemplateId))
.containingNodeId(Collections.emptyList())
.value("")
.build();
entityLogEntries.add(redactionLogEntry);

View File

@ -6,6 +6,8 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@ -19,6 +21,7 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.FilterType;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
@ -50,7 +53,11 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
// @Disabled
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/402Study-ocred.pdf");
AnalyzeRequest request = uploadFileToStorage("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.pdf");
ClassPathResource importedRedactionClasspathResource = new ClassPathResource("files/ImportedRedactions/18 Chlorothalonil RAR 08 Volume 3CA B 6a Oct 2017.IMPORTED_REDACTIONS.json");
storageService.storeObject(TenantContext.getTenantId(), RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.IMPORTED_REDACTIONS), importedRedactionClasspathResource.getInputStream());
// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).pdf",
// "files/Documine/Flora/ProblemDocs/SOLICITA_VICTRATO-GOLD-II_Item 21_Mutacao_Genica (1).TABLES.json");

View File

@ -116,6 +116,23 @@ rule "H.3.0: Study Type File Attribute"
.ifPresent(fileAttribute -> insert(fileAttribute));
end
rule "H.3.1: Study Type File Attribute in Headlines"
when
not FileAttribute(label == "OECD Number", valueEqualsAnyOf("402","403","404","405","425","429","436","438","439","471","487"))
$page: Page($pageNumber:number,
getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT")
|| getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE")
|| getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):"))
$headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS"))
then
Stream.of(RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|(?:.{5,40}(?:Number |Procedure |Guideline )))(4[\\d]{2})", 1, $headline.getTextBlock()),
RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", 1, $headline.getTextBlock()),
RedactionSearchUtility.findTextRangesByRegexIgnoreCase("(?<=OECD) Guideline (4\\d{2})", 1, $headline.getTextBlock())).flatMap(Collection::stream).findFirst()
.map(textRange -> $headline.getTextBlock().subSequence(textRange).toString())
.map(value -> FileAttribute.builder().label("OECD Number").value(value).build())
.ifPresent(fileAttribute -> insert(fileAttribute));
end
//------------------------------------ General documine rules ------------------------------------
@ -232,6 +249,58 @@ rule "DOC.1.3: Guidelines"
});
end
rule "DOC.1.4: Guideline in Headlines"
when
$page: Page($pageNumber:number,
getMainBodyTextBlock().getSearchText().contains("DATA REQUIREMENT")
|| getMainBodyTextBlock().getSearchText().contains("TEST GUIDELINE")
|| getMainBodyTextBlock().getSearchText().contains("MÉTODO(S) DE REFERÊNCIA(S):"))
$headline: Headline(onPage($pageNumber), containsString("OECD") || containsString("EPA")|| containsString("OPPTS"))
then
entityCreationService.byRegex("OECD (No\\.? )?\\d{3}( \\(\\d{4}\\))?", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline found", "n-a")
);
entityCreationService.byRegex("OECD[\\s,]{1}(?:.{1,40}.(?>Procedure|Method).{1,20}\\d{3,4}(?>.{1,100}\\d{4}\\))?|\\[.{1,20}.Skin.{1,20}\\]|[\\d\\s,\\(\\)]{7,10}|[\\w\\.\\s]{1,15}[\\d]{3}\\s\\(\\d{4}\\)|.{0,20}[N|n]umber\\s\\d{3}.{0,1}|Test Guideline \\d{3})", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline found", "n-a")
);
entityCreationService.byRegex("EPA (OPPTS )?\\d{3}[. ]\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "EPA Guideline found", "n-a")
);
entityCreationService.byRegex("EC (Directive )?(No\\.? )?\\d{3,4}\\/\\d{3,4}((,? B(\\.| )\\d{1,2}\\.?)? \\(\\d{4}\\))?", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "EC Guideline found", "n-a")
);
entityCreationService.byRegex("Commission Regulation \\(EC\\) No \\d{3}\\/\\d{4}", "ec_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "EC Guideline found", "n-a")
);
entityCreationService.byRegex("OECD Method 4\\d{2}.{5,40}\\(.{5,40}\\d{4}\\)", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline year found", "n-a")
);
entityCreationService.byRegex("OPPTS (Guideline Number )?\\d{3}\\.\\d{4}( \\(\\d{4}\\))?", "epa_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "EPA Guideline found", "n-a")
);
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline no. found", "n-a")
);
entityCreationService.byRegex("(?<=OECD)(?:[\\w\\s,\\[\\]\\(\\)\\.]{1,10}|.{5,40}(?:Number |Procedure |Guideline ))(4[\\d]{2}),?\\s\\(?(\\d{4})\\)?", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline year found", "n-a")
);
entityCreationService.byRegex("(?<=OECD)[\\w\\s,\\[\\]]{1,10}\\((\\d{4})\\)\\s(4[\\d]{2})", "oecd_guideline_year", EntityType.ENTITY,1, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline year found", "n-a")
);
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline number found", "n-a")
);
entityCreationService.byRegex("(?<=OECD).{5,40}Method (4[\\d]{2}).{1,65}(\\d{4})\\)", "oecd_guideline_year", EntityType.ENTITY,2, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline year found", "n-a")
);
entityCreationService.byRegex("(?<=OECD) Guideline (4\\d{2})", "oecd_guideline_number", EntityType.ENTITY,1, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline number found", "n-a")
);
entityCreationService.byRegex("OECD Guideline 4\\d{2}", "oecd_guideline", EntityType.ENTITY, $headline).forEach(entity ->
entity.apply("DOC.1.4", "OECD Guideline found", "n-a")
);
end
// Rule unit: DOC.2
rule "DOC.2.0: Report number"