diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java index 326746d..560ea93 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/parsing/LegacyPDFStreamEngine.java @@ -82,6 +82,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { private int pageRotation; private PDRectangle pageSize; + private Matrix translateMatrix; private final GlyphList glyphList; private final Map fontHeightMap = new WeakHashMap(); @@ -133,6 +134,12 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { this.pageRotation = page.getRotation(); this.pageSize = page.getCropBox(); + if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) { + translateMatrix = null; + } else { + // translation matrix for cropbox + translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY()); + } super.processPage(page); } @@ -257,13 +264,22 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { return; } } + // adjust for cropbox if needed + Matrix translatedTextRenderingMatrix; + if (translateMatrix == null) { + translatedTextRenderingMatrix = textRenderingMatrix; + } else { + translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix); + nextX -= pageSize.getLowerLeftX(); + nextY -= pageSize.getLowerLeftY(); + } // This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf if (unicodeMapping.length() == 2) { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), @@ -277,7 +293,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), @@ -293,7 +309,7 @@ public class LegacyPDFStreamEngine extends PDFStreamEngine { processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), - textRenderingMatrix, + translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay),