From 2726fc3fe1387ae7792e614f5a6da98a5c6f1a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Mon, 15 Jul 2024 17:45:13 +0200 Subject: [PATCH] RED-8800: adjust coordinates in BE to ignore cropbox --- .../parsing/LegacyPDFStreamEngine.java | 22 ++++++- .../server/LayoutparserEnd2EndTest.java | 4 +- .../service/VisualizationWriter.java | 64 ++++++++++++++----- 3 files changed, 68 insertions(+), 22 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java index 326746d..560ea93 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java @@ -82,6 +82,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { private int pageRotation; private PDRectangle pageSize; + private Matrix translateMatrix; private final GlyphList glyphList; private final Map fontHeightMap = new WeakHashMap(); @@ -133,6 +134,12 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { this.pageRotation = page.getRotation(); this.pageSize = page.getCropBox(); + if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) { + translateMatrix = null; + } else { + // translation matrix for cropbox + translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY()); + } super.processPage(page); } @@ -257,13 +264,22 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { return; } } + // adjust for cropbox if needed + Matrix translatedTextRenderingMatrix; + if (translateMatrix == null) { + translatedTextRenderingMatrix = textRenderingMatrix; + } else { + translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix); + nextX -= pageSize.getLowerLeftX(); + nextY -= pageSize.getLowerLeftY(); + } // This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf if (unicodeMapping.length() == 2) { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), @@ -277,7 +293,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), @@ -293,7 +309,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java index d73fc14..331c775 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/LayoutparserEnd2EndTest.java @@ -30,11 +30,11 @@ public class LayoutparserEnd2EndTest extends AbstractTest { @Autowired private LayoutParsingPipeline layoutParsingPipeline; - @Disabled @Test + @Disabled public void testLayoutParserEndToEnd() { - String filePath = "files/Minimal Examples/RotateTextWithRulingsTestFile.pdf"; + String filePath = "/tmp/OCR_TEST/10.SYN524464 FS (A16148C) - Absorção cutânea.pdf/document.pdf"; runForFile(filePath); } diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java index d949b09..6e625d5 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/VisualizationWriter.java @@ -53,7 +53,7 @@ public class VisualizationWriter { begin(page); AffineTransform textDeRotationMatrix = getTextDeRotationTransform(page); - AffineTransform pageTransformation = getTextDeRotationTransform(page); + AffineTransform pageTransformation = getCropboxAdjustment(page); for (LayerGroup layerGroup : layerGroups) { @@ -68,7 +68,7 @@ public class VisualizationWriter { Element escape = builder.createGroupBegin(); writer.writeElement(escape); - writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix); + writeVisualizations(pageNumber, layerGroup, textDeRotationMatrix, pageTransformation); Element escapeEnd = builder.createGroupEnd(); writer.writeElement(escapeEnd); @@ -87,7 +87,14 @@ public class VisualizationWriter { } - private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix) throws PDFNetException { + @SneakyThrows + private AffineTransform getCropboxAdjustment(Page page) { + + return new AffineTransform(1, 0, 0, 1, page.getCropBox().getX1(), page.getCropBox().getY1()); + } + + + private void writeVisualizations(int pageNumber, LayerGroup layerGroup, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) throws PDFNetException { for (Visualizations visualization : layerGroup.getVisualizations()) { @@ -108,8 +115,7 @@ public class VisualizationWriter { Element escape = builder.createGroupBegin(); writer.writeElement(escape); - - writeVisualization(visualizationsOnPage, textDeRotationMatrix); + writeVisualization(visualizationsOnPage, textDeRotationMatrix, pageTransformation); Element escapeEnd = builder.createGroupEnd(); writer.writeElement(escapeEnd); @@ -139,7 +145,7 @@ public class VisualizationWriter { @SneakyThrows - private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix) { + private void writeVisualization(VisualizationsOnPage visualizationsOnPage, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) { if (visualizationsOnPage.isMakePathsInvisible()) { Element rect = builder.createRect(0, 0, 0, 0); @@ -149,28 +155,28 @@ public class VisualizationWriter { for (ColoredLine coloredLine : visualizationsOnPage.getColoredLines()) { - drawColoredLine(coloredLine); + drawColoredLine(coloredLine, pageTransformation); } for (ColoredRectangle coloredRectangle : visualizationsOnPage.getColoredRectangles()) { - drawColoredRectangle(coloredRectangle); + drawColoredRectangle(coloredRectangle, pageTransformation); } for (FilledRectangle filledRectangle : visualizationsOnPage.getFilledRectangles()) { - drawFilledRectangle(filledRectangle); + drawFilledRectangle(filledRectangle, pageTransformation); } for (PlacedText placedText : visualizationsOnPage.getPlacedTexts()) { - writePlacedText(textDeRotationMatrix, placedText); + writePlacedText(textDeRotationMatrix, placedText, pageTransformation); } } - private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText) throws PDFNetException { + private void writePlacedText(AffineTransform textDeRotationMatrix, PlacedText placedText, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = placedText.color().getRGBColorComponents(null); Font font = fontMap.get(placedText.font()); @@ -180,7 +186,7 @@ public class VisualizationWriter { text.getGState().setFillColor(color); } - try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix)) { + try (Matrix2D textMatrix = getTextMatrix(placedText, textDeRotationMatrix, pageTransformation)) { text.setTextMatrix(textMatrix); } @@ -217,11 +223,13 @@ public class VisualizationWriter { } - private void drawFilledRectangle(FilledRectangle filledRectangle) throws PDFNetException { + private void drawFilledRectangle(FilledRectangle filledRectangle, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = filledRectangle.color().getRGBColorComponents(null); Rectangle2D r = filledRectangle.rectangle2D(); + r = transformRect(r, pageTransformation); + Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); rect.setPathFill(true); @@ -237,11 +245,13 @@ public class VisualizationWriter { } - private void drawColoredRectangle(ColoredRectangle coloredRectangle) throws PDFNetException { + private void drawColoredRectangle(ColoredRectangle coloredRectangle, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = coloredRectangle.color().getRGBColorComponents(null); Rectangle2D r = coloredRectangle.rectangle2D(); + r = transformRect(r, pageTransformation); + Element rect = builder.createRect(r.getX(), r.getY(), r.getWidth(), r.getHeight()); rect.setPathStroke(true); @@ -257,11 +267,19 @@ public class VisualizationWriter { } - private void drawColoredLine(ColoredLine coloredLine) throws PDFNetException { + private Rectangle2D transformRect(Rectangle2D r, AffineTransform pageTransformation) { + + return pageTransformation.createTransformedShape(r).getBounds2D(); + } + + + private void drawColoredLine(ColoredLine coloredLine, AffineTransform pageTransformation) throws PDFNetException { float[] rgbComponents = coloredLine.color().getRGBColorComponents(null); Line2D l = coloredLine.line(); + l = transformLine(pageTransformation, l); + builder.pathBegin(); builder.moveTo(l.getX1(), l.getY1()); builder.lineTo(l.getX2(), l.getY2()); @@ -279,8 +297,15 @@ public class VisualizationWriter { } + private static Line2D transformLine(AffineTransform pageTransformation, Line2D line) { + + return new Line2D.Double(pageTransformation.transform(line.getP1(), null), pageTransformation.transform(line.getP2(), null)); + } + + @SneakyThrows - private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix) { + @SuppressWarnings("PMD.CloseResource") + private static Matrix2D getTextMatrix(PlacedText placedText, AffineTransform textDeRotationMatrix, AffineTransform pageTransformation) { Matrix2D textMatrix; if (placedText.textMatrix().isEmpty()) { @@ -289,7 +314,12 @@ public class VisualizationWriter { var matrix = placedText.textMatrix().get(); textMatrix = toMatrix2D(matrix); } - return textMatrix; + try (var pageMatrix = toMatrix2D(pageTransformation)) { + return pageMatrix.multiply(textMatrix); + } finally { + textMatrix.close(); + } + }