From 5ca501da7445e8de4b412439025e97169600290f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominique=20Eifl=C3=A4nder?= Date: Tue, 11 Jul 2023 09:35:18 +0200 Subject: [PATCH] Resolve DM-307 --- .../service/PdfSegmentationService.java | 9 ++++++ .../test/resources/drools/documine_flora.drl | 28 +++++-------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/PdfSegmentationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/PdfSegmentationService.java index 000c1343..913db6c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/PdfSegmentationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/PdfSegmentationService.java @@ -14,6 +14,7 @@ import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.ImageServiceResponseAdapter; @@ -47,6 +48,9 @@ public class PdfSegmentationService { private final ImageServiceResponseAdapter imageServiceResponseAdapter; private final TableServiceResponseAdapter tableServiceResponseAdapter; + @Value("${application.type:RedactManager}") + private String applicationType; + public ClassificationDocument parseDocument(String dossierId, String fileId, @@ -108,6 +112,11 @@ public class PdfSegmentationService { stripper.setStartPage(pageNumber); stripper.setEndPage(pageNumber); stripper.setPdpage(pdPage); + + if(applicationType.equals("DocuMine")){ + stripper.setSortByPosition(true); + } + stripper.getText(pdDocument); PDRectangle pdr = pdPage.getMediaBox(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index 2add0ef7..f35a9ea2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -568,15 +568,9 @@ rule "DOC.12.1: Guideline Deviation in text" rule "DOC.13.0: Clinical Signs" when FileAttribute(label == "OECD Number", value == "425") - $section: Section( - ( - getHeadline().containsString("Clinical Signs") - || getHeadline().containsString("Macroscopic Findings") - ) - && !getHeadline().containsString("TABLE") - ) + $headline: Headline(containsAnyStringIgnoreCase("Clinical Signs", "Macroscopic Findings") && !containsString("TABLE")) then - entityCreationService.bySemanticNodeParagraphsOnly($section, "clinical_signs", EntityType.ENTITY) + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "clinical_signs", EntityType.ENTITY) .forEach(entity -> entity.apply("DOC.13.0", "Clinical Signs found", "n-a")); end @@ -718,18 +712,9 @@ rule "DOC.23.2: Bodyweight changes" rule "DOC.23.0: Bodyweight changes" when FileAttribute(label == "OECD Number", value == "403") - $section: Section( - ( - getHeadline().containsString("Bodyweight") - || getHeadline().containsString("Bodyweights") - || getHeadline().containsString("Body Weights") - || getHeadline().containsString("Body Weight") - ) - && !getHeadline().containsStringIgnoreCase("Appendix") - && !getHeadline().containsStringIgnoreCase("TABLE") - ) + $headline: Headline(containsAnyStringIgnoreCase("Bodyweight", "Bodyweights", "Body Weights", "Body Weight"), !containsAnyStringIgnoreCase("Appendix", "TABLE")) then - entityCreationService.bySemanticNodeParagraphsOnly($section, "bodyweight_changes", EntityType.ENTITY) + entityCreationService.bySemanticNodeParagraphsOnly($headline.getParent(), "bodyweight_changes", EntityType.ENTITY) .forEach(entity -> entity.apply("DOC.23.0", "Bodyweight section found", "n-a")); end @@ -968,7 +953,7 @@ rule "DOC.43.0: Dose Mortality" when FileAttribute(label == "OECD Number", value == "425") $table: Table( - (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality")) + (hasHeader("Mortality") || hasHeader("Long Term Results") || hasHeader("Long Term Outcome") || hasHeader("Comments") || hasHeader("Viability / Mortality") || hasHeader("Viability/Mortality")) && (hasHeader("Dose [mg/kg bodyweight]") || hasHeader("Dose [mg/kg body weight]") ||hasHeader("Dose (mg/kg)") || hasHeader("Dose levei (mg/kg)") || hasHeader("Dose Level (mg/kg)") || hasHeader("Dose level (mg/kg)") || hasHeader("Dosage [mg/kg body weight]")) ) @@ -977,7 +962,8 @@ rule "DOC.43.0: Dose Mortality" $table.streamTableCellsWithHeader("Comments"), $table.streamTableCellsWithHeader("Long Term Results"), $table.streamTableCellsWithHeader("Long Term Outcome"), - $table.streamTableCellsWithHeader("Viability / Mortality") + $table.streamTableCellsWithHeader("Viability / Mortality"), + $table.streamTableCellsWithHeader("Viability/Mortality") ).flatMap(a -> a) .map(tableCell -> entityCreationService.bySemanticNode(tableCell, "dose_mortality", EntityType.ENTITY)) .filter(Optional::isPresent)