From 56796ca00ef4d3025acbf44c2f7a1e322c707705 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Fri, 14 Jul 2023 14:05:09 +0200 Subject: [PATCH] DM-307: Improved paragraph splitting --- .../service/DocuMineBlockificationService.java | 2 +- .../red/service/redaction/v1/server/DocumineFloraTest.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/DocuMineBlockificationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/DocuMineBlockificationService.java index cd9b7c00..718dcbec 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/DocuMineBlockificationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/layoutparsing/classification/service/DocuMineBlockificationService.java @@ -49,7 +49,7 @@ public class DocuMineBlockificationService implements BlockificationService{ Float splitX1 = null; for (TextPositionSequence word : textPositions) { - boolean lineSeparation = word.getMinYDirAdj() - maxY > word.getHeight() * 1.25; + boolean lineSeparation = prev != null && word.getMinYDirAdj() - maxY > Math.min(word.getHeight(), prev.getHeight()) * 1.5; boolean startFromTop = prev != null && word.getMinYDirAdj() < prev.getMinYDirAdj() - prev.getTextHeight(); boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj(); boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java index ad0ac7a8..73e643f7 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/DocumineFloraTest.java @@ -45,9 +45,10 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest { @Disabled public void titleExtraction() throws IOException { + AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/402-16_Fantom_ToxicidadeCutaneaAguda.pdf"); - AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).pdf", - "files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).json"); +// AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).pdf", +// "files/Documine/Flora/ProblemDocs/23_In Vitro Percutaneous Absorption - Human Split-Thickness Skin (1).json"); System.out.println("Start Full integration test"); analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId())); -- 2.47.2