From 7f0fb149a90b6ca00602ff8f0a43887cf4e51f2c Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 4 Apr 2024 17:03:37 +0200 Subject: [PATCH] RED-8800: fix text location for weird mediaboxes --- .../ocr/processor/model/PageInformation.java | 36 +++++++++++++++++-- .../processor/model/RenderedPageOcrImage.java | 10 +----- .../threads/ImageProcessingThread.java | 2 +- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java index 771b53a..ec4329a 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java @@ -1,12 +1,42 @@ package com.knecon.fforesight.service.ocr.processor.model; -import org.apache.pdfbox.pdmodel.PDPage; +import java.awt.geom.Rectangle2D; -public record PageInformation(int height, int width, int number, int rotationDegrees) { +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; + +public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees) { public static PageInformation fromPDPage(int pageNum, PDPage page) { - return new PageInformation((int) page.getMediaBox().getHeight(), (int) page.getMediaBox().getWidth(), pageNum, page.getRotation()); + PDRectangle mediaBox = page.getMediaBox(); + return new PageInformation(new Rectangle2D.Double(mediaBox.getLowerLeftX(), mediaBox.getLowerLeftY(), mediaBox.getWidth(), mediaBox.getHeight()), + pageNum, + page.getRotation()); + } + + + public double height() { + + return mediabox.getHeight(); + } + + + public double width() { + + return mediabox.getWidth(); + } + + + public double minX() { + + return mediabox.getX(); + } + + + public double minY() { + + return mediabox.getY(); } } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/RenderedPageOcrImage.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/RenderedPageOcrImage.java index efdadc7..e91456d 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/RenderedPageOcrImage.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/RenderedPageOcrImage.java @@ -1,20 +1,12 @@ package com.knecon.fforesight.service.ocr.processor.model; import java.awt.geom.AffineTransform; -import java.awt.geom.Point2D; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; import lombok.AccessLevel; import lombok.Getter; import lombok.RequiredArgsConstructor; -import lombok.Setter; -import lombok.SneakyThrows; import lombok.experimental.FieldDefaults; -import net.sourceforge.lept4j.Leptonica1; import net.sourceforge.lept4j.Pix; -import net.sourceforge.tess4j.ITessAPI; @Getter @RequiredArgsConstructor @@ -32,7 +24,7 @@ public class RenderedPageOcrImage implements OcrImage { public AffineTransform getImageCTM() { double scalingFactor = calculateScalingFactor(); - AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0); + AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, -pageInformation.minX(), -pageInformation.minY()); AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height()); diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java index 8825b45..1f233e5 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java @@ -45,7 +45,7 @@ public class ImageProcessingThread extends Thread { final BlockingQueue imageInputQueue; final BlockingQueue imageOutputQueue; final ITessAPI.TessBaseAPI detectionScriptHandle = initDetectionScriptHandle(); - final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 1.2f, 1); + final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 1.0f, 1); final Statistics stats; final OcrServiceSettings settings; final PDDocument document;