Compare commits

...

6 Commits

Author SHA1 Message Date
Kilian Schuettler
84abba46a5 RED-8670: write IdpResult to file 2024-12-06 10:08:13 +01:00
Kilian Schuettler
c3f3bb47dd RED-8670: write IdpResult to file 2024-12-06 10:06:38 +01:00
Kilian Schuettler
342a64bc6d upgrade ghostscript version 2024-11-27 10:49:22 +01:00
Kilian Schuettler
8aa4540d89 upgrade ghostscript version 2024-11-27 10:43:27 +01:00
Kilian Schuettler
429e071408 change loglevel 2024-11-26 19:45:43 +01:00
Kilian Schuettler
cd3cc63291 change loglevel 2024-11-26 17:34:35 +01:00
11 changed files with 254 additions and 37 deletions

View File

@ -5,9 +5,10 @@ import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
public final class QuadPoint {
public enum Direction {
RIGHT,
@ -41,7 +42,23 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
* ?|_____|?
*/
}
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold
private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold.
private final Point2D a;
private final Point2D b;
private final Point2D c;
private final Point2D d;
// This constructor assumes, the points form a convex polygon, I will omit the assertion for performance reasons.
public QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
this.a = a;
this.b = b;
this.c = c;
this.d = d;
}
public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {
@ -146,6 +163,74 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
}
public boolean contains(double x, double y) {
// split into two triangles, test if either contains the point, assumes the QuadPoint is convex and created correctly. More specifically, the points must be in the correct order.
return triangleContains(a, b, c, x, y) || triangleContains(a, c, d, x, y);
}
/*
checks if a triangle contains a point by converting the point to barycentric coordinates using cramer's rule and then checking if the linear combination is within the bounds of the triangle.
https://en.wikipedia.org/wiki/Barycentric_coordinate_system#Barycentric_coordinates_on_triangles
*/
private boolean triangleContains(Point2D a, Point2D b, Point2D c, double x, double y) {
// area of the triangle
double denominator = ((b.getY() - c.getY()) * (a.getX() - c.getX()) + (c.getX() - b.getX()) * (a.getY() - c.getY()));
double invertedDenominator = 1.0 / denominator;
double alpha = ((b.getY() - c.getY()) * (x - c.getX()) + (c.getX() - b.getX()) * (y - c.getY())) * invertedDenominator;
double beta = ((c.getY() - a.getY()) * (x - c.getX()) + (a.getX() - c.getX()) * (y - c.getY())) * invertedDenominator;
return alpha >= 0 && beta >= 0 && alpha + beta <= 1;
}
public boolean contains(Point2D p) {
return contains(p.getX(), p.getY());
}
public boolean contains(Rectangle2D r) {
double x = r.getX();
double y = r.getY();
double maxY = r.getMaxY();
double maxX = r.getMaxX();
Point2D p1 = new Point2D.Double(x, y);
Point2D p2 = new Point2D.Double(x, maxY);
Point2D p3 = new Point2D.Double(maxX, maxY);
Point2D p4 = new Point2D.Double(maxX, y);
return contains(p1) && contains(p2) && contains(p3) && contains(p4);
}
public double getCenterX() {
return (a.getX() + b.getX() + c.getX() + d.getX()) / 4;
}
public double getCenterY() {
return (a.getY() + b.getY() + c.getY() + d.getY()) / 4;
}
public Point2D getCenter() {
return new Point2D.Double(getCenterX(), getCenterY());
}
public boolean intersects(Line2D line) {
return contains(line.getP1()) || contains(line.getP2()) || asLines().anyMatch(qLine -> qLine.intersectsLine(line));
}
/**
* Determines if the given QuadPoint aligns with this QuadPoint within a given threshold.
* It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd.
@ -224,4 +309,37 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
return Math.atan2(deltaY, deltaX);
}
public Point2D a() {return a;}
public Point2D b() {return b;}
public Point2D c() {return c;}
public Point2D d() {return d;}
@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
var that = (QuadPoint) obj;
return Objects.equals(this.a, that.a) && Objects.equals(this.b, that.b) && Objects.equals(this.c, that.c) && Objects.equals(this.d, that.d);
}
@Override
public int hashCode() {
return Objects.hash(a, b, c, d);
}
}

View File

@ -5,4 +5,9 @@ import lombok.Builder;
@Builder
public record QuadPointData(float[] values) {
public QuadPoint get() {
return QuadPoint.fromData(this);
}
}

View File

@ -13,7 +13,7 @@ import com.pdftron.pdf.Rect;
import lombok.SneakyThrows;
public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) {
public record PageInformation(Rectangle2D mediabox, Rectangle2D cropBox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) {
@SneakyThrows
public static Map<Integer, PageInformation> fromPDFDoc(PDFDoc pdfDoc) {
@ -34,8 +34,9 @@ public record PageInformation(Rectangle2D mediabox, int number, int rotationDegr
@SneakyThrows
public static PageInformation fromPage(int pageNum, Page page) {
try (Rect mediaBox = page.getCropBox()) {
try (Rect mediaBox = page.getCropBox(); Rect cropBox = page.getCropBox()) {
return new PageInformation(new Rectangle2D.Double(mediaBox.getX1(), mediaBox.getY1(), mediaBox.getWidth(), mediaBox.getHeight()),
new Rectangle2D.Double(cropBox.getX1(), cropBox.getY1(), cropBox.getWidth(), cropBox.getHeight()),
pageNum,
page.getRotation() * 90,
DocumentTextExtractor.getTextBBoxes(page));

View File

@ -126,7 +126,7 @@ public class OCRService {
File viewerDocumentFile,
String fileId,
String dossierId,
File analyzeResultFile,
File idpResultFile,
Set<AzureOcrFeature> features) {
try (var in = new FileInputStream(documentFile); PDFDoc pdfDoc = new PDFDoc(in)) {
@ -149,7 +149,7 @@ public class OCRService {
}
if (features.contains(AzureOcrFeature.IDP)) {
saveAnalyzeResultFile(analyzeResultFile, ocrResult);
saveIdpResultFile(idpResultFile, ocrResult);
}
supervisor.getStatistics().drawingPdfFinished();
@ -162,9 +162,9 @@ public class OCRService {
}
private void saveAnalyzeResultFile(File analyzeResultFile, OcrResult ocrResult) throws IOException {
private void saveIdpResultFile(File idpResultFile, OcrResult ocrResult) throws IOException {
try (var out = new FileOutputStream(analyzeResultFile)) {
try (var out = new FileOutputStream(idpResultFile)) {
mapper.writeValue(out, ocrResult.idpResult());
}
}

View File

@ -6,6 +6,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import org.springframework.stereotype.Service;
@ -15,14 +16,12 @@ import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
public class GhostScriptService {
@ -34,6 +33,37 @@ public class GhostScriptService {
private Semaphore concurrencySemaphore = new Semaphore(3);
public GhostScriptService(OcrServiceSettings ocrServiceSettings) {
this.ocrServiceSettings = ocrServiceSettings;
assertGhostscriptIsInstalled();
}
private void assertGhostscriptIsInstalled() {
try {
Process p = Runtime.getRuntime().exec("gs -v");
InputStream stdOut = p.getInputStream();
InputStream errOut = p.getErrorStream();
assert p.waitFor(1, TimeUnit.SECONDS);
log.info("Ghostscript is installed.");
String out = new String(stdOut.readAllBytes());
String error = new String(errOut.readAllBytes());
for (String line : out.split("\n")) {
log.info(line);
}
if (!error.isBlank()) {
log.error(error);
}
} catch (Exception e) {
log.error("Ghostscript is not installed!");
log.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
@SneakyThrows
public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

View File

@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString;
import java.awt.geom.AffineTransform;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@ -23,9 +22,9 @@ import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
import com.knecon.fforesight.service.ocr.v1.api.model.Figure;
import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair;
import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
import com.knecon.fforesight.service.ocr.v1.api.model.Region;
@ -65,11 +64,14 @@ public class IdpResultFactory {
public AffineTransform getResultToPageTransform(Integer pageNumber) {
AffineTransform transform = resultToPageTransforms.get(pageNumber);
AffineTransform transform;
if (rotationCorrection) {
PageInformation page = pageInformation.get(pageNumber);
transform.preConcatenate(RotationCorrectionUtility.buildTransform(angles.get(pageNumber), page.width(), page.height(), false));
transform = RotationCorrectionUtility.buildTransform(-angles.get(pageNumber), page.cropBox().getWidth(), page.cropBox().getHeight(), false);
} else {
transform = new AffineTransform();
}
transform.concatenate(resultToPageTransforms.get(pageNumber));
return transform;
}
@ -77,12 +79,18 @@ public class IdpResultFactory {
public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) {
DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult);
analyzeResult.getTables()
.forEach(documentTable -> addTable(documentTable, words, batch));
analyzeResult.getKeyValuePairs()
.forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch));
analyzeResult.getFigures()
.forEach(documentFigure -> addFigure(documentFigure, batch, words));
if (analyzeResult.getTables() != null) {
analyzeResult.getTables()
.forEach(documentTable -> addTable(documentTable, words, batch));
}
if (analyzeResult.getKeyValuePairs() != null) {
analyzeResult.getKeyValuePairs()
.forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch));
}
if (analyzeResult.getFigures() != null) {
analyzeResult.getFigures()
.forEach(documentFigure -> addFigure(documentFigure, batch, words));
}
}
@ -204,6 +212,9 @@ public class IdpResultFactory {
private Region toRegionFromRegions(int pageNumber, List<BoundingRegion> regions) {
if (regions.size() == 1) {
return new Region(pageNumber, QuadPoint.fromPolygons(regions.get(0).getPolygon()).getTransformed(getResultToPageTransform(pageNumber)).toData());
}
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
.map(BoundingRegion::getPolygon)
.map(QuadPoint::fromPolygons)
@ -224,13 +235,7 @@ public class IdpResultFactory {
throw new AssertionError();
}
int pageNumber = batch.getPageNumber(batchPageNumber);
QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
.map(BoundingRegion::getPolygon)
.map(QuadPoint::fromPolygons)
.map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D())
.collect(new Rectangle2DBBoxCollector()));
return new Region(pageNumber, bbox.toData());
return toRegionFromRegions(pageNumber, regions);
}
}

View File

@ -85,12 +85,14 @@ public class RotationCorrectionUtility {
List<String> commands = new LinkedList<>();
double scale = getScalingFactor(angle, page);
double x = page.getCropBox().getWidth() / 2;
double y = page.getCropBox().getHeight() / 2;
commands.add("q");
commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2)));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(x, y)));
commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2)));
commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-x, -y)));
commands.add("EMC");
return String.join("\n", commands);
}
@ -197,7 +199,7 @@ public class RotationCorrectionUtility {
public static double getRemainingAngle(double angle, int quadrants) {
double referenceAngle = 90 * quadrants;
return angle - referenceAngle;
return (angle - referenceAngle) % 360;
}

View File

@ -34,6 +34,9 @@ import lombok.extern.slf4j.Slf4j;
public class OcrMessageReceiver {
public static final String OCR_REQUEST_LISTENER_ID = "ocr-request-listener";
public static final String IDP_RESULT_FILE_NAME = "idpResult.json";
public static final String VIEWER_DOCUMENT_FILE_NAME = "viewerDocument.pdf";
public static final String DOCUMENT_FILE_NAME = "document.pdf";
FileStorageService fileStorageService;
ObjectMapper objectMapper;
@ -60,9 +63,9 @@ public class OcrMessageReceiver {
ocrMessageSender.sendOCRStarted(fileId);
File documentFile = runDir.resolve("document.pdf").toFile();
File viewerDocumentFile = runDir.resolve("viewerDocument.pdf").toFile();
File analyzeResultFile = runDir.resolve("azureAnalysisResult.json").toFile();
File documentFile = runDir.resolve(DOCUMENT_FILE_NAME).toFile();
File viewerDocumentFile = runDir.resolve(VIEWER_DOCUMENT_FILE_NAME).toFile();
File analyzeResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile();
fileStorageService.downloadFiles(request, documentFile);

View File

@ -1,5 +1,5 @@
# you can list packages
ghostscript=9.55.0~dfsg1-0ubuntu5.9
ghostscript=9.55.0~dfsg1-0ubuntu5.10
pkg-config
zip
unzip

View File

@ -0,0 +1,50 @@
package com.knecon.fforesight.service.ocr.v1.api.model;
import static org.junit.jupiter.api.Assertions.*;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import org.junit.jupiter.api.Test;
class QuadPointTest {
@Test
public void testContains() {
var a = new Point2D.Double(0, 0);
var b = new Point2D.Double(0, 1);
var c = new Point2D.Double(1, 1);
var d = new Point2D.Double(1, 0);
var q = new QuadPoint(a, b, c, d);
assertTrue(q.isHorizontal());
assertFalse(q.isVertical());
assertTrue(q.contains(a));
assertTrue(q.contains(b));
assertTrue(q.contains(c));
assertTrue(q.contains(d));
var p = new Point2D.Double(0.5, 0.5);
assertTrue(q.contains(p));
var r = new Rectangle2D.Double(0.5, 0.5, 0.1, 0.1);
assertTrue(q.contains(r));
}
@Test
public void testCenter() {
var a = new Point2D.Double(0, 0);
var b = new Point2D.Double(1, 1);
var c = new Point2D.Double(2, 1);
var d = new Point2D.Double(1, 0);
var q = new QuadPoint(a, b, c, d);
assertTrue(q.isHorizontal());
assertFalse(q.isVertical());
assertEquals(QuadPoint.Direction.RIGHT, q.getDirection());
assertEquals(new Point2D.Double(1, 0.5), q.getCenter());
}
}

View File

@ -1,6 +1,9 @@
package com.knecon.fforesight.service.ocr.v1.server;
import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.DOCUMENT_FILE_NAME;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.IDP_RESULT_FILE_NAME;
import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.VIEWER_DOCUMENT_FILE_NAME;
import java.io.File;
import java.io.FileInputStream;
@ -108,9 +111,9 @@ public class OcrServiceIntegrationTest extends AbstractTest {
assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();
var documentFile = tmpDir.resolve(Path.of("document.pdf"));
var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf"));
var analyzeResultFile = tmpDir.resolve(Path.of("azureAnalysisResult.json"));
var documentFile = tmpDir.resolve(Path.of(DOCUMENT_FILE_NAME));
var viewerDocumentFile = tmpDir.resolve(Path.of(VIEWER_DOCUMENT_FILE_NAME));
var analyzeResultFile = tmpDir.resolve(Path.of(IDP_RESULT_FILE_NAME));
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);