From ea0af08c31e78d88980a8da1ef581c378c2ada13 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 14 Aug 2023 16:02:10 +0200 Subject: [PATCH 1/2] RED-7851: add layoutgrid to new viewer document as optional content --- .../api/queue/LayoutParsingRequest.java | 1 + .../processor/LayoutParsingPipeline.java | 20 +- .../LayoutParsingStorageService.java | 13 + .../model/visualization/ColoredLine.java | 8 + .../model/visualization/ColoredRectangle.java | 8 + .../model/visualization/FilledRectangle.java | 8 + .../model/visualization/LayoutGrid.java | 27 ++ .../model/visualization/PlacedText.java | 7 + .../visualization/VisualizationsOnPage.java | 26 ++ .../visualization/LayoutGridService.java | 232 ++++++++++++++++++ .../visualization/ViewerDocumentService.java | 180 ++++++++++++++ .../build.gradle.kts | 2 +- .../layoutparser/server/BdrJsonBuildTest.java | 5 +- .../HeadlinesGoldStandardIntegrationTest.java | 4 +- .../server/graph/BuildDocumentGraphTest.java | 9 +- .../graph/DocumentGraphJsonWritingTest.java | 2 +- .../graph/DocumentGraphVisualizationTest.java | 13 +- .../server/graph/ViewerDocumentTest.java | 33 +++ .../server/utils/AbstractTest.java | 4 + .../server/utils/visualizations/PdfDraw.java | 3 +- ...custom-image.sh => publish-custom-image.sh | 0 21 files changed, 587 insertions(+), 18 deletions(-) create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredLine.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredRectangle.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/FilledRectangle.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/LayoutGrid.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/PlacedText.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/VisualizationsOnPage.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java create mode 100644 layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java create mode 100644 layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java rename push-custom-image.sh => publish-custom-image.sh (100%) diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java index aff1eae..d3b45dd 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/queue/LayoutParsingRequest.java @@ -19,6 +19,7 @@ public record LayoutParsingRequest( @NonNull String positionBlockFileStorageId, @NonNull String pageFileStorageId, @NonNull String simplifiedTextStorageId, + @NonNull String viewerDocumentStorageId, @NonNull String sectionGridStorageId) { } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 98cde15..e31c993 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -2,6 +2,7 @@ package com.knecon.fforesight.service.layoutparser.processor; import static java.lang.String.format; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -44,6 +45,7 @@ import com.knecon.fforesight.service.layoutparser.processor.services.factory.Doc import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper; import com.knecon.fforesight.service.layoutparser.processor.services.parsing.PDFLinesTextStripper; +import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; @@ -69,6 +71,8 @@ public class LayoutParsingPipeline { private final TaasBlockificationService taasBlockificationService; private final DocuMineBlockificationService docuMineBlockificationService; private final RedactManagerBlockificationService redactManagerBlockificationService; + private final ViewerDocumentService viewerDocumentService; + public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { @@ -93,6 +97,12 @@ public class LayoutParsingPipeline { layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph)); layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph)); layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph)); + + try (var out = new ByteArrayOutputStream()) { + viewerDocumentService.createViewerDocument(originDocument, documentGraph, out); + layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, out); + } + if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) { var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph); layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData); @@ -114,9 +124,9 @@ public class LayoutParsingPipeline { @SneakyThrows public ClassificationDocument parseLayout(LayoutParsingType layoutParsingType, - PDDocument originDocument, - ImageServiceResponse imageServiceResponse, - TableServiceResponse tableServiceResponse) { + PDDocument originDocument, + ImageServiceResponse imageServiceResponse, + TableServiceResponse tableServiceResponse) { Map> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse); Map> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse); @@ -176,7 +186,6 @@ public class LayoutParsingPipeline { classificationPages.add(classificationPage); } - bodyTextFrameService.setBodyTextFrames(classificationDocument, layoutParsingType); switch (layoutParsingType) { @@ -191,6 +200,7 @@ public class LayoutParsingPipeline { return classificationDocument; } + private void increaseDocumentStatistics(ClassificationPage classificationPage, ClassificationDocument document) { if (!classificationPage.isLandscape()) { @@ -221,6 +231,4 @@ public class LayoutParsingPipeline { } - - } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java index 836a582..727fe1e 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingStorageService.java @@ -1,5 +1,7 @@ package com.knecon.fforesight.service.layoutparser.processor; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -125,6 +127,7 @@ public class LayoutParsingStorageService { storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.simplifiedTextStorageId(), simplifiedText); } + @SneakyThrows private InputStream getObject(String storageId) { @@ -134,4 +137,14 @@ public class LayoutParsingStorageService { return Files.newInputStream(path, StandardOpenOption.DELETE_ON_CLOSE); } + + @SneakyThrows + public void storeViewerDocument(LayoutParsingRequest layoutParsingRequest, ByteArrayOutputStream out) { + + try (var in = new ByteArrayInputStream(out.toByteArray())) { + + storageService.storeObject(TenantContext.getTenantId(), layoutParsingRequest.viewerDocumentStorageId(), in); + } + } + } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredLine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredLine.java new file mode 100644 index 0000000..41896fe --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredLine.java @@ -0,0 +1,8 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.awt.Color; +import java.awt.geom.Line2D; + +public record ColoredLine(Line2D line, Color color) { + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredRectangle.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredRectangle.java new file mode 100644 index 0000000..b251181 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/ColoredRectangle.java @@ -0,0 +1,8 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.awt.Color; +import java.awt.geom.Rectangle2D; + +public record ColoredRectangle(Rectangle2D rectangle2D, Color color) { + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/FilledRectangle.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/FilledRectangle.java new file mode 100644 index 0000000..f043a31 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/FilledRectangle.java @@ -0,0 +1,8 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.awt.Color; +import java.awt.geom.Rectangle2D; + +public record FilledRectangle(Rectangle2D rectangle2D, Color color, float alpha) { + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/LayoutGrid.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/LayoutGrid.java new file mode 100644 index 0000000..4d7cc3b --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/LayoutGrid.java @@ -0,0 +1,27 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.util.ArrayList; +import java.util.List; + +import lombok.AccessLevel; +import lombok.Getter; +import lombok.experimental.FieldDefaults; + +@Getter +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class LayoutGrid { + + int numberOfPages; + List visualizationsPerPages; + + + public LayoutGrid(int numberOfPages) { + + this.numberOfPages = numberOfPages; + this.visualizationsPerPages = new ArrayList<>(numberOfPages); + for (int i = 0; i < numberOfPages; i++) { + this.visualizationsPerPages.add(VisualizationsOnPage.builder().pageNumber(i).build()); + } + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/PlacedText.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/PlacedText.java new file mode 100644 index 0000000..b959e3d --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/PlacedText.java @@ -0,0 +1,7 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.awt.geom.Point2D; + +public record PlacedText(String text, Point2D lineStart) { + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/VisualizationsOnPage.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/VisualizationsOnPage.java new file mode 100644 index 0000000..2b2b4ea --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/visualization/VisualizationsOnPage.java @@ -0,0 +1,26 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.visualization; + +import java.util.LinkedList; +import java.util.List; + +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Getter; +import lombok.experimental.FieldDefaults; + +@Getter +@Builder +@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) +public class VisualizationsOnPage { + + int pageNumber; + @Builder.Default + List placedTexts = new LinkedList<>(); + @Builder.Default + List coloredLines = new LinkedList<>(); + @Builder.Default + List coloredRectangles = new LinkedList<>(); + @Builder.Default + List filledRectangles = new LinkedList<>(); + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java new file mode 100644 index 0000000..8b9bc3e --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/LayoutGridService.java @@ -0,0 +1,232 @@ +package com.knecon.fforesight.service.layoutparser.processor.services.visualization; + +import static com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService.LINE_WIDTH; + +import java.awt.Color; +import java.awt.geom.Line2D; +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; +import java.awt.geom.RectangularShape; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText; +import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations; + +@Service +public class LayoutGridService { + + private static final Color INNER_LINES_COLOR = new Color(255, 175, 175); + private static final Color PARAGRAPH_COLOR = new Color(70, 130, 180); + public static final Color TABLE_COLOR = new Color(102, 205, 170); + public static final Color SECTION_COLOR = new Color(23, 23, 23); + public static final Color HEADLINE_COLOR = new Color(162, 56, 56); + public static final Color HEADER_COLOR = new Color(171, 131, 6); + public static final Color IMAGE_COLOR = new Color(253, 63, 146); + + + public LayoutGrid createLayoutGrid(Document document) { + + LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages()); + document.streamAllSubNodes().forEach(semanticNode -> { + Color color = switch (semanticNode.getType()) { + case PARAGRAPH -> PARAGRAPH_COLOR; + case TABLE -> TABLE_COLOR; + case SECTION -> SECTION_COLOR; + case HEADLINE -> HEADLINE_COLOR; + case HEADER, FOOTER -> HEADER_COLOR; + case IMAGE -> IMAGE_COLOR; + default -> null; + }; + if (isNotSectionOrTableCellOrDocument(semanticNode)) { + addAsRectangle(semanticNode, layoutGrid, color); + } + if (semanticNode.getType().equals(NodeType.SECTION)) { + addSection(semanticNode, layoutGrid, color); + } + if (semanticNode.getType().equals(NodeType.TABLE)) { + Table table = (Table) semanticNode; + addInnerTableLines(table, layoutGrid, INNER_LINES_COLOR); + } + }); + + return layoutGrid; + } + + + private void addInnerTableLines(Table table, LayoutGrid layoutGrid, Color color) { + + if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) { + return; + } + // only draw inner lines -> remove first values + List xs = table.streamRow(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMinX).collect(Collectors.toList()); + xs.remove(0); + List ys = table.streamCol(0).map(TableCell::getBBox).map(map -> map.values().stream().findAny().get()).map(RectangularShape::getMaxY).collect(Collectors.toList()); + ys.remove(0); + Rectangle2D tableBBox = table.getBBox().get(table.getFirstPage()); + List coloredLines = layoutGrid.getVisualizationsPerPages().get(table.getFirstPage().getNumber() - 1).getColoredLines(); + xs.forEach(x -> { + Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY())); + coloredLines.add(new ColoredLine(line, color)); + }); + ys.forEach(y -> { + Line2D line = new Line2D.Double(new Point2D.Double(tableBBox.getMinX(), y), new Point2D.Double(tableBBox.getMaxX(), y)); + coloredLines.add(new ColoredLine(line, color)); + }); + } + + + private void addSection(SemanticNode semanticNode, LayoutGrid layoutGrid, Color color) { + + Map bBoxMap = semanticNode.getBBox(); + List subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION).toList(); + if (!subSections.isEmpty()) { + Page firstPage = semanticNode.getFirstPage(); + addPlacedText(firstPage, bBoxMap.get(firstPage), buildTreeIdString(semanticNode), layoutGrid); + } else { + bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, buildTreeIdString(semanticNode), layoutGrid))); + } + if (bBoxMap.values().size() == 1) { + Rectangle2D r = RectangleTransformations.pad(bBoxMap.values().stream().findFirst().get(), LINE_WIDTH, LINE_WIDTH); + int pageNumber = bBoxMap.keySet().stream().findFirst().get().getNumber() - 1; + List coloredLines = layoutGrid.getVisualizationsPerPages().get(pageNumber).getColoredLines(); + List lines = createLinesFromRectangle(r); + // add string to top line + var firstLine = lines.remove(0); + coloredLines.add(new ColoredLine(firstLine, color)); + for (Line2D line : lines) { + coloredLines.add(new ColoredLine(line, color)); + } + return; + } + List pagesInOrder = bBoxMap.keySet().stream().sorted(Comparator.comparingInt(Page::getNumber)).collect(Collectors.toList()); + var firstPage = pagesInOrder.remove(0); + addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid); + var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1); + addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid); + for (Page middlePage : pagesInOrder) { + addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid); + } + } + + + private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) { + + var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts(); + placedTexts.add(new PlacedText(s, new Point2D.Float((float) (textBBox.getMinX()), (float) textBBox.getMaxY()))); + } + + + private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid) { + + List coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH); + var midPageLines = createLinesFromRectangle(r); + // remove top line + midPageLines.remove(0); + // remove top line + midPageLines.remove(1); + // add string to left line + var leftLine = midPageLines.remove(1); + coloredLines.add(new ColoredLine(leftLine, color)); + for (Line2D line : midPageLines) { + coloredLines.add(new ColoredLine(line, color)); + } + } + + + private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid) { + + List coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH); + var lastPageLines = createLinesFromRectangle(r); + // remove top line + lastPageLines.remove(0); + // add string to left line + var leftLine = lastPageLines.remove(2); + coloredLines.add(new ColoredLine(leftLine, color)); + for (Line2D line : lastPageLines) { + coloredLines.add(new ColoredLine(line, color)); + } + } + + + private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid) { + + List coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines(); + Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH); + var firstPageLines = createLinesFromRectangle(r); + // remove bottom line + firstPageLines.remove(2); + // add string to top line + var firstLine = firstPageLines.remove(0); + coloredLines.add(new ColoredLine(firstLine, color)); + for (Line2D line : firstPageLines) { + coloredLines.add(new ColoredLine(line, color)); + } + } + + + private String buildTreeIdString(SemanticNode semanticNode) { + + return semanticNode.getTreeId().stream().map(Object::toString).collect(Collectors.joining(".")); + } + + + /* + A __________________ B + | | + | | + | | + | | + D|__________________| C + The returned List are the lines [AB, BC, DC, AD] + */ + private List createLinesFromRectangle(Rectangle2D r) { + // +0.5 to join the lines + List lines = new ArrayList<>(4); + float lineWidthCorrection = LINE_WIDTH * 0.5f; + Point2D.Float a = new Point2D.Float((float) r.getMinX(), (float) r.getMaxY()); + Point2D.Float a1 = new Point2D.Float((float) r.getMinX() - lineWidthCorrection, (float) r.getMaxY()); + Point2D.Float b = new Point2D.Float((float) r.getMaxX(), (float) r.getMaxY()); + Point2D.Float b1 = new Point2D.Float((float) r.getMaxX() + lineWidthCorrection, (float) r.getMaxY()); + Point2D.Float c = new Point2D.Float((float) r.getMaxX(), (float) r.getMinY()); + Point2D.Float c1 = new Point2D.Float((float) r.getMaxX() + lineWidthCorrection, (float) r.getMinY()); + Point2D.Float d = new Point2D.Float((float) r.getMinX(), (float) r.getMinY()); + Point2D.Float d1 = new Point2D.Float((float) r.getMinX() - lineWidthCorrection, (float) r.getMinY()); + lines.add(new Line2D.Float(a1, b1)); + lines.add(new Line2D.Float(b, c)); + lines.add(new Line2D.Float(d1, c1)); + lines.add(new Line2D.Float(a, d)); + return lines; + } + + + private static boolean isNotSectionOrTableCellOrDocument(SemanticNode semanticNode) { + + return !(semanticNode.getType().equals(NodeType.DOCUMENT) || semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.TABLE_CELL)); + } + + + private void addAsRectangle(SemanticNode semanticNode, LayoutGrid layoutGrid, Color color) { + + semanticNode.getBBox() + .forEach((page, textBBox) -> layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredRectangles().add(new ColoredRectangle(textBBox, color))); + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java new file mode 100644 index 0000000..7690b19 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java @@ -0,0 +1,180 @@ +package com.knecon.fforesight.service.layoutparser.processor.services.visualization; + +import java.awt.geom.AffineTransform; +import java.awt.geom.Rectangle2D; +import java.io.IOException; +import java.io.OutputStream; +import java.util.HashSet; +import java.util.Set; + +import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.PDResources; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup; +import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.util.Matrix; +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.FilledRectangle; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText; +import com.knecon.fforesight.service.layoutparser.processor.model.visualization.VisualizationsOnPage; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class ViewerDocumentService { + + private static final String layerName = "Layout grid"; + + private static final int FONT_SIZE = 10; + public static final float LINE_WIDTH = 1.5f; + + private final LayoutGridService layoutGridService; + + + @SneakyThrows + public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream) { + + log.info("Start Viewer Document Creation"); + LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document); + log.info("Created Layout Grid"); + // PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one. + // If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones it's very fast. + Set dictionariesToUpdate = new HashSet<>(); + PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate); + PDFont font = PDType1Font.HELVETICA; + + for (int pageNumber = 0; pageNumber < pdDocument.getNumberOfPages(); pageNumber++) { + PDPage pdPage = pdDocument.getPage(pageNumber); + + AffineTransform textDeRotationMatrix = getTextDeRotationTransform(pdPage); + addLayerToPageRessources(pdPage); + + // We need to save the graphics state before, such that our appended content cannot be affected by previous content streams with side effects, + // e.g. not escaped matrix transformations. + escapePreviousContents(pdDocument, pdPage); + + VisualizationsOnPage visualizationsOnPage = layoutGrid.getVisualizationsPerPages().get(pageNumber); + assert pageNumber == visualizationsOnPage.getPageNumber(); + // We need to append to the content stream, otherwise the content could be overlapped by following content. + try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true)) { + + contentStream.beginMarkedContent(COSName.OC, layer); + contentStream.saveGraphicsState(); + + contentStream.setLineWidth(LINE_WIDTH); + for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) { + contentStream.setStrokingColor(coloredLine.color()); + contentStream.moveTo((float) coloredLine.line().getX1(), (float) coloredLine.line().getY1()); + contentStream.lineTo((float) coloredLine.line().getX2(), (float) coloredLine.line().getY2()); + contentStream.stroke(); + } + for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) { + contentStream.setStrokingColor(coloredRectangle.color()); + Rectangle2D r = coloredRectangle.rectangle2D(); + contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight()); + contentStream.stroke(); + } + for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) { + contentStream.setNonStrokingColor(filledRectangle.color()); + PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); + graphicsState.setNonStrokingAlphaConstant(filledRectangle.alpha()); + contentStream.setGraphicsStateParameters(graphicsState); + Rectangle2D r = filledRectangle.rectangle2D(); + contentStream.addRect((float) r.getX(), (float) r.getY(), (float) r.getWidth(), (float) r.getHeight()); + contentStream.fill(); + } + for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) { + contentStream.setFont(font, FONT_SIZE); + contentStream.beginText(); + Matrix textMatrix = new Matrix((float) textDeRotationMatrix.getScaleX(), + (float) textDeRotationMatrix.getShearX(), + (float) textDeRotationMatrix.getShearY(), + (float) textDeRotationMatrix.getScaleY(), + (float) placedText.lineStart().getX(), + (float) placedText.lineStart().getY()); + textMatrix.translate(-((font.getStringWidth(placedText.text()) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4), -FONT_SIZE); + contentStream.setTextMatrix(textMatrix); + contentStream.showText(placedText.text()); + contentStream.endText(); + } + contentStream.restoreGraphicsState(); + contentStream.endMarkedContent(); + } + dictionariesToUpdate.add(pdPage.getCOSObject()); + dictionariesToUpdate.add(pdPage.getResources().getCOSObject()); + } + log.info("Written Layoutgrid to pdf streams"); + pdDocument.saveIncremental(outputStream, dictionariesToUpdate); + log.info("Saved Viewer Document"); + } + + + private static void addLayerToPageRessources(PDPage pdPage) { + + PDResources resources = pdPage.getResources(); + if (resources == null) { + resources = new PDResources(); + pdPage.setResources(resources); + } + } + + + private static void escapePreviousContents(PDDocument pdDocument, PDPage pdPage) throws IOException { + + try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.PREPEND, false)) { + contentStream.saveGraphicsState(); + } + try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, false)) { + contentStream.restoreGraphicsState(); + } + } + + + private static PDOptionalContentGroup addLayerToDocument(PDDocument pdDocument, Set dictionariesToUpdate) { + + PDDocumentCatalog catalog = pdDocument.getDocumentCatalog(); + PDOptionalContentProperties ocprops = catalog.getOCProperties(); + if (ocprops == null) { + ocprops = new PDOptionalContentProperties(); + catalog.setOCProperties(ocprops); + } + PDOptionalContentGroup layer = null; + if (ocprops.hasGroup(layerName)) { + layer = ocprops.getGroup(layerName); + } else { + layer = new PDOptionalContentGroup(layerName); + ocprops.addGroup(layer); + } + ocprops.setGroupEnabled(layer, true); + dictionariesToUpdate.add(catalog.getCOSObject()); + return layer; + } + + + private static AffineTransform getTextDeRotationTransform(PDPage page) { + + return AffineTransform.getQuadrantRotateInstance(switch (page.getRotation()) { + case 90 -> 3; + case 180 -> 2; + case 270 -> 1; + default -> 0; + }); + } + +} diff --git a/layoutparser-service/layoutparser-service-server/build.gradle.kts b/layoutparser-service/layoutparser-service-server/build.gradle.kts index c0a03e3..d2ab67a 100644 --- a/layoutparser-service/layoutparser-service-server/build.gradle.kts +++ b/layoutparser-service/layoutparser-service-server/build.gradle.kts @@ -13,7 +13,7 @@ plugins { description = "layoutparser-service-server" val jacksonVersion = "2.15.2" -val pdfBoxVersion = "3.0.0-alpha2" +val pdfBoxVersion = "3.0.0-RC1" dependencies { implementation(project(":layoutparser-service-processor")) diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java index cfa17e8..10db93f 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/BdrJsonBuildTest.java @@ -47,7 +47,10 @@ public class BdrJsonBuildTest extends AbstractTest { try (InputStream inputStream = new FileInputStream(filename)) { try (PDDocument pdDocument = Loader.loadPDF(inputStream)) { - return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS, pdDocument, new ImageServiceResponse(), new TableServiceResponse())); + return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.TAAS, + pdDocument, + new ImageServiceResponse(), + new TableServiceResponse())); } } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java index 868034d..8da377e 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/HeadlinesGoldStandardIntegrationTest.java @@ -30,10 +30,10 @@ import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; +import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.tenantcommons.TenantsClient; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java index 06d3861..092a530 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/BuildDocumentGraphTest.java @@ -40,6 +40,13 @@ public class BuildDocumentGraphTest extends AbstractTest { @SneakyThrows protected Document buildGraph(String filename) { + return buildGraph(filename, LayoutParsingType.REDACT_MANAGER); + } + + + @SneakyThrows + protected Document buildGraph(String filename, LayoutParsingType layoutParsingType) { + if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf")) { prepareStorage(filename, "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json"); } else { @@ -48,7 +55,7 @@ public class BuildDocumentGraphTest extends AbstractTest { ClassPathResource fileResource = new ClassPathResource(filename); try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) { - return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, + return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse())); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java index 96e6402..95e8c48 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphJsonWritingTest.java @@ -15,9 +15,9 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; -import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java index 48e4002..aa2351d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphVisualizationTest.java @@ -16,12 +16,14 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw; import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest { @Test @SneakyThrows - @Disabled +// @Disabled public void visualizeMetolachlor() { String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"; @@ -67,9 +69,12 @@ public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest { try (var fileStream = fileResource.getInputStream();// PDDocument pdDocument = Loader.loadPDF(fileStream)// ) { - PdfDraw.drawDocumentGraph(pdDocument, documentGraph); - PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build()); - pdDocument.save(tmpFile); + log.info("drawing document"); + PdfDraw.drawDocumentGraph(pdDocument, documentGraph); + PdfDraw.drawTextBlock(pdDocument, textBlock, PdfDraw.Options.builder().stroke(true).strokeWidth(0.1f).strokeColor(Color.YELLOW).build()); + log.info("saving document"); + pdDocument.save(tmpFile); + log.info("saved document"); } } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java new file mode 100644 index 0000000..ad363d9 --- /dev/null +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/ViewerDocumentTest.java @@ -0,0 +1,33 @@ +package com.knecon.fforesight.service.layoutparser.server.graph; + +import java.io.FileOutputStream; +import java.nio.file.Path; + +import org.apache.pdfbox.Loader; +import org.junit.jupiter.api.Test; +import org.springframework.core.io.ClassPathResource; + +import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; +import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService; +import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService; + +import lombok.SneakyThrows; + +public class ViewerDocumentTest extends BuildDocumentGraphTest { + + @Test + @SneakyThrows + public void testViewerDocument() { + + LayoutGridService layoutGridService = new LayoutGridService(); + ViewerDocumentService viewerDocumentService = new ViewerDocumentService(layoutGridService); + String fileName = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf"; + Document document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER); + String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf"; + try (var pdDocument = Loader.loadPDF(new ClassPathResource(fileName).getInputStream()); var out = new FileOutputStream(tmpFileName)) { + viewerDocumentService.createViewerDocument(pdDocument, document, out); + } + } + +} diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java index cba516e..b9b58b3 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/AbstractTest.java @@ -54,6 +54,7 @@ public abstract class AbstractTest { protected final static String PAGES_FILE_ID = "pages"; protected final static String TENANT_ID = "tenant"; protected final static String SECTION_GRID_ID = "section"; + protected final static String VIEWER_DOCUMENT_ID = "viewer"; protected final static String SIMPLIFIED_ID = "simplified"; @@ -70,6 +71,7 @@ public abstract class AbstractTest { .pageFileStorageId(PAGES_FILE_ID) .simplifiedTextStorageId(SIMPLIFIED_ID) .sectionGridStorageId(SECTION_GRID_ID) + .viewerDocumentStorageId(VIEWER_DOCUMENT_ID) .build(); } @@ -110,6 +112,7 @@ public abstract class AbstractTest { .pageFileStorageId(PAGES_FILE_ID) .simplifiedTextStorageId(SIMPLIFIED_ID) .sectionGridStorageId(SECTION_GRID_ID) + .viewerDocumentStorageId(VIEWER_DOCUMENT_ID) .build(); } @@ -143,6 +146,7 @@ public abstract class AbstractTest { .pageFileStorageId(PAGES_FILE_ID) .simplifiedTextStorageId(SIMPLIFIED_ID) .sectionGridStorageId(SECTION_GRID_ID) + .viewerDocumentStorageId(VIEWER_DOCUMENT_ID) .build(); } diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java index 876c98c..63d2eb8 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java @@ -14,7 +14,6 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.apache.pdfbox.util.Matrix; import org.springframework.core.io.ClassPathResource; @@ -144,7 +143,7 @@ public class PdfDraw { } else { contentStream.newLineAtOffset((float) location.getX(), (float) location.getY()); } - contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10); + contentStream.setFont(PDType1Font.HELVETICA, 10); contentStream.showText(string); contentStream.endText(); contentStream.close(); diff --git a/push-custom-image.sh b/publish-custom-image.sh similarity index 100% rename from push-custom-image.sh rename to publish-custom-image.sh From 63de8ef82d683750a4f26b14d1877a32927946f1 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 14 Aug 2023 16:03:41 +0200 Subject: [PATCH 2/2] RED-7158: add layoutgrid into new ViewerDocument as optional content * downgraded storage-commons --- .../services/visualization/ViewerDocumentService.java | 4 +--- .../layoutparser-service-server/build.gradle.kts | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java index 7690b19..ade1342 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java @@ -52,9 +52,8 @@ public class ViewerDocumentService { log.info("Start Viewer Document Creation"); LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document); - log.info("Created Layout Grid"); // PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one. - // If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones it's very fast. + // If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones by using saveIncremental it's very fast. Set dictionariesToUpdate = new HashSet<>(); PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate); PDFont font = PDType1Font.HELVETICA; @@ -119,7 +118,6 @@ public class ViewerDocumentService { dictionariesToUpdate.add(pdPage.getCOSObject()); dictionariesToUpdate.add(pdPage.getResources().getCOSObject()); } - log.info("Written Layoutgrid to pdf streams"); pdDocument.saveIncremental(outputStream, dictionariesToUpdate); log.info("Saved Viewer Document"); } diff --git a/layoutparser-service/layoutparser-service-server/build.gradle.kts b/layoutparser-service/layoutparser-service-server/build.gradle.kts index d2ab67a..467b14e 100644 --- a/layoutparser-service/layoutparser-service-server/build.gradle.kts +++ b/layoutparser-service/layoutparser-service-server/build.gradle.kts @@ -19,7 +19,7 @@ dependencies { implementation(project(":layoutparser-service-processor")) implementation(project(":layoutparser-service-internal-api")) - implementation("com.iqser.red.commons:storage-commons:2.31.0") + implementation("com.iqser.red.commons:storage-commons:2.27.0") implementation("com.knecon.fforesight:tenant-commons:0.10.0") implementation("org.springframework.boot:spring-boot-starter-actuator:3.1.2") implementation("com.amazonaws:aws-java-sdk-s3:1.12.528")