From 014eba9fc3fc4f31c0d3d5bab36136f9e3d030c8 Mon Sep 17 00:00:00 2001 From: Corina Olariu Date: Tue, 9 Apr 2024 12:14:57 +0300 Subject: [PATCH] RED-8747 - Entities not merged properly - fp - fix typo - add validate table test --- .../RedactManagerBlockificationService.java | 2 +- .../server/LayoutparserEnd2EndTest.java | 2 +- .../segmentation/PdfSegmentationServiceTest.java | 13 +++++++++++++ .../{localTests => SinglePages}/MergedEntities.pdf | Bin 4 files changed, 15 insertions(+), 2 deletions(-) rename layoutparser-service/layoutparser-service-server/src/test/resources/files/{localTests => SinglePages}/MergedEntities.pdf (100%) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java index 1a6525f..6f50eb3 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/RedactManagerBlockificationService.java @@ -59,7 +59,7 @@ public class RedactManagerBlockificationService { boolean splitByX = prev != null && maxX + 50 < word.getMinXDirAdj() && prev.getMinYDirAdj() == word.getMinYDirAdj(); boolean xIsBeforeFirstX = prev != null && word.getMinXDirAdj() < minX; boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj(); - boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedVerticalRulings, usedVerticalRulings); + boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, usedHorizonalRulings, usedVerticalRulings); boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir()); if (prev != null && (lineSeparation || startFromTop || splitByX || splitByDir || isSplitByRuling)) { diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java index a115ae9..9861a52 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java @@ -37,7 +37,7 @@ public class LayoutparserEnd2EndTest extends AbstractTest { @SneakyThrows public void testLayoutParserEndToEnd_RED_8747() { - prepareStorage("files/localTests/MergedEntities.pdf"); + prepareStorage("files/SinglePages/MergedEntities.pdf"); LayoutParsingRequest layoutParsingRequest = buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER_OLD); LayoutParsingFinishedEvent finishedEvent = layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest); Arrays.stream(finishedEvent.message().split("\n")) diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index 739d0fa..3b58f2d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -667,6 +667,19 @@ public class PdfSegmentationServiceTest extends AbstractTest { } + @Test + public void testMergedEntities_Page26() throws IOException { + + ClassPathResource pdfFileResource = new ClassPathResource("files/SinglePages/MergedEntities.pdf"); + + ClassificationDocument document = buildClassificationDocument(pdfFileResource.getFile()); + + validateTableSize(document, 1); + + validateTable(document, 0, 6, 6, 0, 0); + + } + @SneakyThrows private void toHtml(ClassificationDocument document, String filename) { diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/localTests/MergedEntities.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/MergedEntities.pdf similarity index 100% rename from layoutparser-service/layoutparser-service-server/src/test/resources/files/localTests/MergedEntities.pdf rename to layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/MergedEntities.pdf