diff --git a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java index 3fc7d66..0662724 100644 --- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java +++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java @@ -5,9 +5,10 @@ import java.awt.geom.Line2D; import java.awt.geom.Point2D; import java.awt.geom.Rectangle2D; import java.util.List; +import java.util.Objects; import java.util.stream.Stream; -public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { +public final class QuadPoint { public enum Direction { RIGHT, @@ -41,7 +42,23 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { * ?|_____|? */ } - private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold + + private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold. + + private final Point2D a; + private final Point2D b; + private final Point2D c; + private final Point2D d; + + + // This constructor assumes, the points form a convex polygon, I will omit the assertion for performance reasons. + public QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { + + this.a = a; + this.b = b; + this.c = c; + this.d = d; + } public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) { @@ -146,6 +163,74 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { } + public boolean contains(double x, double y) { + // split into two triangles, test if either contains the point, assumes the QuadPoint is convex and created correctly. More specifically, the points must be in the correct order. + return triangleContains(a, b, c, x, y) || triangleContains(a, c, d, x, y); + } + + + /* + checks if a triangle contains a point by converting the point to barycentric coordinates using cramer's rule and then checking if the linear combination is within the bounds of the triangle. + https://en.wikipedia.org/wiki/Barycentric_coordinate_system#Barycentric_coordinates_on_triangles + */ + private boolean triangleContains(Point2D a, Point2D b, Point2D c, double x, double y) { + + // area of the triangle + double denominator = ((b.getY() - c.getY()) * (a.getX() - c.getX()) + (c.getX() - b.getX()) * (a.getY() - c.getY())); + double invertedDenominator = 1.0 / denominator; + double alpha = ((b.getY() - c.getY()) * (x - c.getX()) + (c.getX() - b.getX()) * (y - c.getY())) * invertedDenominator; + double beta = ((c.getY() - a.getY()) * (x - c.getX()) + (a.getX() - c.getX()) * (y - c.getY())) * invertedDenominator; + + return alpha >= 0 && beta >= 0 && alpha + beta <= 1; + } + + + public boolean contains(Point2D p) { + + return contains(p.getX(), p.getY()); + } + + + public boolean contains(Rectangle2D r) { + + double x = r.getX(); + double y = r.getY(); + double maxY = r.getMaxY(); + double maxX = r.getMaxX(); + + Point2D p1 = new Point2D.Double(x, y); + Point2D p2 = new Point2D.Double(x, maxY); + Point2D p3 = new Point2D.Double(maxX, maxY); + Point2D p4 = new Point2D.Double(maxX, y); + + return contains(p1) && contains(p2) && contains(p3) && contains(p4); + } + + + public double getCenterX() { + + return (a.getX() + b.getX() + c.getX() + d.getX()) / 4; + } + + + public double getCenterY() { + + return (a.getY() + b.getY() + c.getY() + d.getY()) / 4; + } + + + public Point2D getCenter() { + + return new Point2D.Double(getCenterX(), getCenterY()); + } + + + public boolean intersects(Line2D line) { + + return contains(line.getP1()) || contains(line.getP2()) || asLines().anyMatch(qLine -> qLine.intersectsLine(line)); + } + + /** * Determines if the given QuadPoint aligns with this QuadPoint within a given threshold. * It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd. @@ -224,4 +309,37 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) { return Math.atan2(deltaY, deltaX); } + + public Point2D a() {return a;} + + + public Point2D b() {return b;} + + + public Point2D c() {return c;} + + + public Point2D d() {return d;} + + + @Override + public boolean equals(Object obj) { + + if (obj == this) { + return true; + } + if (obj == null || obj.getClass() != this.getClass()) { + return false; + } + var that = (QuadPoint) obj; + return Objects.equals(this.a, that.a) && Objects.equals(this.b, that.b) && Objects.equals(this.c, that.c) && Objects.equals(this.d, that.d); + } + + + @Override + public int hashCode() { + + return Objects.hash(a, b, c, d); + } + } diff --git a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java index 0d22bf4..45e1bc6 100644 --- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java +++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java @@ -5,4 +5,9 @@ import lombok.Builder; @Builder public record QuadPointData(float[] values) { + public QuadPoint get() { + + return QuadPoint.fromData(this); + } + } diff --git a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java index 5f15461..3d4aae1 100644 --- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java +++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java @@ -13,7 +13,7 @@ import com.pdftron.pdf.Rect; import lombok.SneakyThrows; -public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees, List wordBBoxes) { +public record PageInformation(Rectangle2D mediabox, Rectangle2D cropBox, int number, int rotationDegrees, List wordBBoxes) { @SneakyThrows public static Map fromPDFDoc(PDFDoc pdfDoc) { @@ -34,8 +34,9 @@ public record PageInformation(Rectangle2D mediabox, int number, int rotationDegr @SneakyThrows public static PageInformation fromPage(int pageNum, Page page) { - try (Rect mediaBox = page.getCropBox()) { + try (Rect mediaBox = page.getCropBox(); Rect cropBox = page.getCropBox()) { return new PageInformation(new Rectangle2D.Double(mediaBox.getX1(), mediaBox.getY1(), mediaBox.getWidth(), mediaBox.getHeight()), + new Rectangle2D.Double(cropBox.getX1(), cropBox.getY1(), cropBox.getWidth(), cropBox.getHeight()), pageNum, page.getRotation() * 90, DocumentTextExtractor.getTextBBoxes(page)); diff --git a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java index fd83348..65087e8 100644 --- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java +++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java @@ -126,7 +126,7 @@ public class OCRService { File viewerDocumentFile, String fileId, String dossierId, - File analyzeResultFile, + File idpResultFile, Set features) { try (var in = new FileInputStream(documentFile); PDFDoc pdfDoc = new PDFDoc(in)) { @@ -149,7 +149,7 @@ public class OCRService { } if (features.contains(AzureOcrFeature.IDP)) { - saveAnalyzeResultFile(analyzeResultFile, ocrResult); + saveIdpResultFile(idpResultFile, ocrResult); } supervisor.getStatistics().drawingPdfFinished(); @@ -162,9 +162,9 @@ public class OCRService { } - private void saveAnalyzeResultFile(File analyzeResultFile, OcrResult ocrResult) throws IOException { + private void saveIdpResultFile(File idpResultFile, OcrResult ocrResult) throws IOException { - try (var out = new FileOutputStream(analyzeResultFile)) { + try (var out = new FileOutputStream(idpResultFile)) { mapper.writeValue(out, ocrResult.idpResult()); } } diff --git a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java index cfd0ad4..0822249 100644 --- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java +++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java @@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.visualizations.layers; import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString; import java.awt.geom.AffineTransform; -import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -23,9 +22,9 @@ import com.knecon.fforesight.service.ocr.processor.model.PageBatch; import com.knecon.fforesight.service.ocr.processor.model.PageInformation; import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector; import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility; -import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult; import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature; import com.knecon.fforesight.service.ocr.v1.api.model.Figure; +import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult; import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair; import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint; import com.knecon.fforesight.service.ocr.v1.api.model.Region; @@ -65,11 +64,14 @@ public class IdpResultFactory { public AffineTransform getResultToPageTransform(Integer pageNumber) { - AffineTransform transform = resultToPageTransforms.get(pageNumber); + AffineTransform transform; if (rotationCorrection) { PageInformation page = pageInformation.get(pageNumber); - transform.preConcatenate(RotationCorrectionUtility.buildTransform(angles.get(pageNumber), page.width(), page.height(), false)); + transform = RotationCorrectionUtility.buildTransform(-angles.get(pageNumber), page.cropBox().getWidth(), page.cropBox().getHeight(), false); + } else { + transform = new AffineTransform(); } + transform.concatenate(resultToPageTransforms.get(pageNumber)); return transform; } @@ -77,12 +79,18 @@ public class IdpResultFactory { public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) { DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult); - analyzeResult.getTables() - .forEach(documentTable -> addTable(documentTable, words, batch)); - analyzeResult.getKeyValuePairs() - .forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch)); - analyzeResult.getFigures() - .forEach(documentFigure -> addFigure(documentFigure, batch, words)); + if (analyzeResult.getTables() != null) { + analyzeResult.getTables() + .forEach(documentTable -> addTable(documentTable, words, batch)); + } + if (analyzeResult.getKeyValuePairs() != null) { + analyzeResult.getKeyValuePairs() + .forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch)); + } + if (analyzeResult.getFigures() != null) { + analyzeResult.getFigures() + .forEach(documentFigure -> addFigure(documentFigure, batch, words)); + } } @@ -204,6 +212,9 @@ public class IdpResultFactory { private Region toRegionFromRegions(int pageNumber, List regions) { + if (regions.size() == 1) { + return new Region(pageNumber, QuadPoint.fromPolygons(regions.get(0).getPolygon()).getTransformed(getResultToPageTransform(pageNumber)).toData()); + } QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream() .map(BoundingRegion::getPolygon) .map(QuadPoint::fromPolygons) @@ -224,13 +235,7 @@ public class IdpResultFactory { throw new AssertionError(); } int pageNumber = batch.getPageNumber(batchPageNumber); - QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream() - .map(BoundingRegion::getPolygon) - .map(QuadPoint::fromPolygons) - .map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D()) - .collect(new Rectangle2DBBoxCollector())); - - return new Region(pageNumber, bbox.toData()); + return toRegionFromRegions(pageNumber, regions); } } diff --git a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java index 88371aa..6798a70 100644 --- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java +++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java @@ -85,12 +85,14 @@ public class RotationCorrectionUtility { List commands = new LinkedList<>(); double scale = getScalingFactor(angle, page); + double x = page.getCropBox().getWidth() / 2; + double y = page.getCropBox().getHeight() / 2; commands.add("q"); commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName())); - commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2))); + commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(x, y))); commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle)))); commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale))); - commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2))); + commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-x, -y))); commands.add("EMC"); return String.join("\n", commands); } @@ -197,7 +199,7 @@ public class RotationCorrectionUtility { public static double getRemainingAngle(double angle, int quadrants) { double referenceAngle = 90 * quadrants; - return angle - referenceAngle; + return (angle - referenceAngle) % 360; } diff --git a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java index 03ae27d..64ea98e 100644 --- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java +++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java @@ -34,6 +34,9 @@ import lombok.extern.slf4j.Slf4j; public class OcrMessageReceiver { public static final String OCR_REQUEST_LISTENER_ID = "ocr-request-listener"; + public static final String IDP_RESULT_FILE_NAME = "idpResult.json"; + public static final String VIEWER_DOCUMENT_FILE_NAME = "viewerDocument.pdf"; + public static final String DOCUMENT_FILE_NAME = "document.pdf"; FileStorageService fileStorageService; ObjectMapper objectMapper; @@ -60,9 +63,9 @@ public class OcrMessageReceiver { ocrMessageSender.sendOCRStarted(fileId); - File documentFile = runDir.resolve("document.pdf").toFile(); - File viewerDocumentFile = runDir.resolve("viewerDocument.pdf").toFile(); - File analyzeResultFile = runDir.resolve("azureAnalysisResult.json").toFile(); + File documentFile = runDir.resolve(DOCUMENT_FILE_NAME).toFile(); + File viewerDocumentFile = runDir.resolve(VIEWER_DOCUMENT_FILE_NAME).toFile(); + File analyzeResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile(); fileStorageService.downloadFiles(request, documentFile); diff --git a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointTest.java b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointTest.java new file mode 100644 index 0000000..6f94d11 --- /dev/null +++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointTest.java @@ -0,0 +1,50 @@ +package com.knecon.fforesight.service.ocr.v1.api.model; + +import static org.junit.jupiter.api.Assertions.*; + +import java.awt.geom.Point2D; +import java.awt.geom.Rectangle2D; + +import org.junit.jupiter.api.Test; + +class QuadPointTest { + + @Test + public void testContains() { + + var a = new Point2D.Double(0, 0); + var b = new Point2D.Double(0, 1); + var c = new Point2D.Double(1, 1); + var d = new Point2D.Double(1, 0); + var q = new QuadPoint(a, b, c, d); + assertTrue(q.isHorizontal()); + assertFalse(q.isVertical()); + + assertTrue(q.contains(a)); + assertTrue(q.contains(b)); + assertTrue(q.contains(c)); + assertTrue(q.contains(d)); + + var p = new Point2D.Double(0.5, 0.5); + assertTrue(q.contains(p)); + + var r = new Rectangle2D.Double(0.5, 0.5, 0.1, 0.1); + assertTrue(q.contains(r)); + } + + + @Test + public void testCenter() { + + var a = new Point2D.Double(0, 0); + var b = new Point2D.Double(1, 1); + var c = new Point2D.Double(2, 1); + var d = new Point2D.Double(1, 0); + var q = new QuadPoint(a, b, c, d); + assertTrue(q.isHorizontal()); + assertFalse(q.isVertical()); + assertEquals(QuadPoint.Direction.RIGHT, q.getDirection()); + assertEquals(new Point2D.Double(1, 0.5), q.getCenter()); + } + +} \ No newline at end of file diff --git a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java index 24f98d3..62b4a21 100644 --- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java +++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java @@ -1,6 +1,9 @@ package com.knecon.fforesight.service.ocr.v1.server; import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument; +import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.DOCUMENT_FILE_NAME; +import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.IDP_RESULT_FILE_NAME; +import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.VIEWER_DOCUMENT_FILE_NAME; import java.io.File; import java.io.FileInputStream; @@ -12,7 +15,6 @@ import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.slf4j.MDC; import org.springframework.beans.factory.annotation.Autowired; @@ -26,7 +28,7 @@ import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature; import lombok.SneakyThrows; // in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder -@Disabled +//@Disabled @SpringBootTest() public class OcrServiceIntegrationTest extends AbstractTest { @@ -108,9 +110,9 @@ public class OcrServiceIntegrationTest extends AbstractTest { assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs(); - var documentFile = tmpDir.resolve(Path.of("document.pdf")); - var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf")); - var analyzeResultFile = tmpDir.resolve(Path.of("azureAnalysisResult.json")); + var documentFile = tmpDir.resolve(Path.of(DOCUMENT_FILE_NAME)); + var viewerDocumentFile = tmpDir.resolve(Path.of(VIEWER_DOCUMENT_FILE_NAME)); + var analyzeResultFile = tmpDir.resolve(Path.of(IDP_RESULT_FILE_NAME)); Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING); Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);