From e62e4f9293c49a4404ea3635a5cf1d8d8f0325ea Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Mon, 15 Jul 2024 16:25:21 +0200 Subject: [PATCH] RED-8800: adjust PDFTronViewerDocumentService to cropbox --- .../server/LayoutparserEnd2EndTest.java | 4 +- .../service/VisualizationWriter.java | 59 ++++++++++++++----- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java index d73fc14..331c775 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java @@ -30,11 +30,11 @@ public class LayoutparserEnd2EndTest extends AbstractTest { @Autowired private LayoutParsingPipeline layoutParsingPipeline; - @Disabled @Test + @Disabled public void testLayoutParserEndToEnd() { - String filePath = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf"; + String filePath = "/tmp/OCR_TEST/10.SYN524464 FS (A16148C) - Absorção cutânea.pdf/document.pdf"; runForFile(filePath); } diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java index d949b09..572b090 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java @@ -53,7 +53,7 @@ public class VisualizationWriter { begin(page); AffineTransform textDeRotationMatrix = getTextDeRotationTransform(page); - AffineTransform pageTransformation = getTextDeRotationTransform(page); + AffineTransform pageTransformation = getCropboxAdjustment(page); for (LayerGroup layerGroup : layerGroups) { @@ -68,7 +68,7 @@ public class VisualizationWriter { Element escape = builder.createGroupBegin(); writer.writeElement(escape); - writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix); + writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix, pageTransformation); Element escapeEnd = builder.createGroupEnd(); writer.writeElement(escapeEnd); @@ -87,7 +87,14 @@ public class VisualizationWriter { } - private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix) throws PDFNetException { + @SneakyThrows + private AffineTransform getCropboxAdjustment(Page page) { + + return new AffineTransform(1, 0, 0, 1, page.getCropBox().getX1(), page.getCropBox().getY1()); + } + + + private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) throws PDFNetException { for (Visualizations visualization : layerGroup.getVisualizations()) { @@ -108,8 +115,7 @@ public class VisualizationWriter { Element escape = builder.createGroupBegin(); writer.writeElement(escape); - - writeVisualization(visualizationsOnPage, textDeRotationMatrix); + writeVisualization(visualizationsOnPage, textDeRotationMatrix, pageTransformation); Element escapeEnd = builder.createGroupEnd(); writer.writeElement(escapeEnd); @@ -139,7 +145,7 @@ public class VisualizationWriter { @SneakyThrows - private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix) { + private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) { if (visualizationsOnPage.isMakePathsInvisible()) { Element rect = builder.createRect(0, 0, 0, 0); @@ -149,28 +155,28 @@ public class VisualizationWriter { for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) { - drawColoredLine(coloredLine); + drawColoredLine(coloredLine, pageTransformation); } for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) { - drawColoredRectangle(coloredRectangle); + drawColoredRectangle(coloredRectangle, pageTransformation); } for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) { - drawFilledRectangle(filledRectangle); + drawFilledRectangle(filledRectangle, pageTransformation); } for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) { - writePlacedText(textDeRotationMatrix, placedText); + writePlacedText(textDeRotationMatrix, placedText, pageTransformation); } } - private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText) throws PDFNetException { + private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = placedText.color().getRGBColorComponents(null); Font font = fontMap.get(placedText.font()); @@ -180,7 +186,7 @@ public class VisualizationWriter { text.getGState().setFillColor(color); } - try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix)) { + try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix, pageTransformation)) { text.setTextMatrix(textMatrix); } @@ -217,11 +223,13 @@ public class VisualizationWriter { } - private void drawFilledRectangle(FilledRectangle filledRectangle) throws PDFNetException { + private void drawFilledRectangle(FilledRectangle filledRectangle, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = filledRectangle.color().getRGBColorComponents(null); Rectangle2D r = filledRectangle.rectangle2D(); + r = transformRect(r, pageTransformation); + Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); rect.setPathFill(true); @@ -237,11 +245,13 @@ public class VisualizationWriter { } - private void drawColoredRectangle(ColoredRectangle coloredRectangle) throws PDFNetException { + private void drawColoredRectangle(ColoredRectangle coloredRectangle, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = coloredRectangle.color().getRGBColorComponents(null); Rectangle2D r = coloredRectangle.rectangle2D(); + r = transformRect(r, pageTransformation); + Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); rect.setPathStroke(true); @@ -257,11 +267,19 @@ public class VisualizationWriter { } - private void drawColoredLine(ColoredLine coloredLine) throws PDFNetException { + private Rectangle2D transformRect(Rectangle2D r, AffineTransform pageTransformation) { + + return pageTransformation.createTransformedShape(r).getBounds2D(); + } + + + private void drawColoredLine(ColoredLine coloredLine, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = coloredLine.color().getRGBColorComponents(null); Line2D l = coloredLine.line(); + l = transformLine(pageTransformation, l); + builder.pathBegin(); builder.moveTo(l.getX1(), l.getY1()); builder.lineTo(l.getX2(), l.getY2()); @@ -279,8 +297,14 @@ public class VisualizationWriter { } + private static Line2D transformLine(AffineTransform pageTransformation, Line2D line) { + + return new Line2D.Double(pageTransformation.transform(line.getP1(), null), pageTransformation.transform(line.getP2(), null)); + } + + @SneakyThrows - private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) { + private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) { Matrix2D textMatrix; if (placedText.textMatrix().isEmpty()) { @@ -289,6 +313,9 @@ public class VisualizationWriter { var matrix = placedText.textMatrix().get(); textMatrix = toMatrix2D(matrix); } + try (var pageMatrix = toMatrix2D(pageTransformation)) { + textMatrix.multiply(pageMatrix); + } return textMatrix; }