RED-8800: fix text location for weird mediaboxes

This commit is contained in:
Kilian Schuettler 2024-04-04 17:03:37 +02:00
parent ea11013132
commit 7f0fb149a9
3 changed files with 35 additions and 13 deletions

View File

@ -1,12 +1,42 @@
package com.knecon.fforesight.service.ocr.processor.model;
import org.apache.pdfbox.pdmodel.PDPage;
import java.awt.geom.Rectangle2D;
public record PageInformation(int height, int width, int number, int rotationDegrees) {
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees) {
public static PageInformation fromPDPage(int pageNum, PDPage page) {
return new PageInformation((int) page.getMediaBox().getHeight(), (int) page.getMediaBox().getWidth(), pageNum, page.getRotation());
PDRectangle mediaBox = page.getMediaBox();
return new PageInformation(new Rectangle2D.Double(mediaBox.getLowerLeftX(), mediaBox.getLowerLeftY(), mediaBox.getWidth(), mediaBox.getHeight()),
pageNum,
page.getRotation());
}
public double height() {
return mediabox.getHeight();
}
public double width() {
return mediabox.getWidth();
}
public double minX() {
return mediabox.getX();
}
public double minY() {
return mediabox.getY();
}
}

View File

@ -1,20 +1,12 @@
package com.knecon.fforesight.service.ocr.processor.model;
import java.awt.geom.AffineTransform;
import java.awt.geom.Point2D;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.tess4j.ITessAPI;
@Getter
@RequiredArgsConstructor
@ -32,7 +24,7 @@ public class RenderedPageOcrImage implements OcrImage {
public AffineTransform getImageCTM() {
double scalingFactor = calculateScalingFactor();
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, -pageInformation.minX(), -pageInformation.minY());
AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height());

View File

@ -45,7 +45,7 @@ public class ImageProcessingThread extends Thread {
final BlockingQueue<UnprocessedImage> imageInputQueue;
final BlockingQueue<OcrImage> imageOutputQueue;
final ITessAPI.TessBaseAPI detectionScriptHandle = initDetectionScriptHandle();
final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 1.2f, 1);
final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 1.0f, 1);
final Statistics stats;
final OcrServiceSettings settings;
final PDDocument document;