diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java index 17dd247..e0a046b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java @@ -26,6 +26,7 @@ import lombok.RequiredArgsConstructor; @RequiredArgsConstructor public class DocstrumBlockificationService { + public static final float Y_THRESHOLD = 5f; private final DocstrumSegmentationService docstrumSegmentationService; static final float THRESHOLD = 1f; @@ -163,7 +164,7 @@ public class DocstrumBlockificationService { previous = current; } - mergeIntersectingBlocks(page, usedRulings, 0, 6.5f); + mergeIntersectingBlocks(page, usedRulings, 0, Y_THRESHOLD); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java index 3a25058..97adb60 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java @@ -165,8 +165,10 @@ public class LayoutGridService { List ys = yStream.collect(Collectors.toList()); ys.remove(0); - Rectangle2D tableBBox = table.getBBox().get(page); - List coloredLines = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredLines(); + Rectangle2D tableBBox = table.getBBox() + .get(page); + List coloredLines = layoutGrid.getVisualizationsPerPages() + .get(page.getNumber() - 1).getColoredLines(); xs.forEach(x -> { Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY())); coloredLines.add(new ColoredLine(line, INNER_LINES_COLOR, LINE_WIDTH)); @@ -192,6 +194,11 @@ public class LayoutGridService { Map bBoxMap = semanticNode.getBBox(); List subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION) .toList(); + float lineWidthModifier = (subSections.stream() + .map(node -> node.getTreeId().size()) + .max(Integer::compareTo) + .orElse(semanticNode.getTreeId().size()) - semanticNode.getTreeId().size()); + Page firstPage = semanticNode.getFirstPage(); String treeIdString = buildTreeIdString(semanticNode); if (!subSections.isEmpty()) { @@ -200,8 +207,9 @@ public class LayoutGridService { bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, treeIdString, layoutGrid))); } if (bBoxMap.values().size() == 1) { - Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH, LINE_WIDTH); - List coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); + List coloredLines = layoutGrid.getVisualizationsPerPages() + .get(firstPage.getNumber() - 1).getColoredLines(); List lines = createLinesFromRectangle(r, firstPage.getRotation()); // add string to top line var firstLine = lines.remove(0); @@ -216,11 +224,11 @@ public class LayoutGridService { .sorted(Comparator.comparingInt(Page::getNumber)) .collect(Collectors.toList()); pagesInOrder.remove(0); - addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid); + addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid, lineWidthModifier); var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1); - addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid); + addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid, lineWidthModifier); for (Page middlePage : pagesInOrder) { - addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid); + addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid, lineWidthModifier); } } @@ -254,15 +262,18 @@ public class LayoutGridService { upperLeftCorner = add(upperLeftCorner, translationVector); - var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts(); + var placedTexts = layoutGrid.getVisualizationsPerPages() + .get(page.getNumber() - 1).getPlacedTexts(); placedTexts.add(PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT)); } - private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid) { + private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid, float lineWidthModifier) { - List coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines(); - Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH); + List coloredLines = layoutGrid.getVisualizationsPerPages() + .get(middlePage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox() + .get(middlePage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); var midPageLines = createLinesFromRectangle(r, middlePage.getRotation()); // remove top line midPageLines.remove(0); @@ -277,10 +288,12 @@ public class LayoutGridService { } - private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid) { + private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid, float lineWidthModifier) { - List coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines(); - Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH); + List coloredLines = layoutGrid.getVisualizationsPerPages() + .get(lastPage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox() + .get(lastPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); var lastPageLines = createLinesFromRectangle(r, lastPage.getRotation()); // remove top line lastPageLines.remove(0); @@ -293,10 +306,12 @@ public class LayoutGridService { } - private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid) { + private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid, float lineWidthModifier) { - List coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines(); - Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH); + List coloredLines = layoutGrid.getVisualizationsPerPages() + .get(firstPage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox() + .get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier)); var firstPageLines = createLinesFromRectangle(r, firstPage.getRotation()); // remove bottom line firstPageLines.remove(2); @@ -366,7 +381,10 @@ public class LayoutGridService { private static boolean isNotSectionOrTableCellOrDocument(SemanticNode semanticNode) { - return !(semanticNode.getType().equals(NodeType.DOCUMENT) || semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.TABLE_CELL)); + return !(semanticNode.getType().equals(NodeType.DOCUMENT) + || semanticNode.getType().equals(NodeType.SECTION) + || semanticNode.getType().equals(NodeType.SUPER_SECTION) + || semanticNode.getType().equals(NodeType.TABLE_CELL)); } @@ -374,9 +392,7 @@ public class LayoutGridService { semanticNode.getBBox() .forEach((page, textBBox) -> layoutGrid.getVisualizationsPerPages() - .get(page.getNumber() - 1) - .getColoredRectangles() - .add(new ColoredRectangle(textBBox, color, LINE_WIDTH))); + .get(page.getNumber() - 1).getColoredRectangles().add(new ColoredRectangle(textBBox, color, LINE_WIDTH))); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java index 79b8bad..3351eb0 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -31,7 +31,7 @@ public class ViewerDocumentTest extends BuildDocumentTest { @SneakyThrows public void testViewerDocument() { - String fileName = "files/new/crafted_outline_test_doc.pdf"; + String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"; String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java index 6fe75cc..abd9788 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/segmentation/PdfSegmentationServiceTest.java @@ -114,7 +114,7 @@ public class PdfSegmentationServiceTest extends AbstractTest { ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile()); assertThat(classificationDocument.getHeaders() - .get(0).getTextBlocks().size()).isEqualTo(2); + .get(0).getTextBlocks().size()).isEqualTo(3); assertThat(classificationDocument.getHeaders() .get(0).getTextBlocks() .get(0).getSequences().size()).isEqualTo(8);