Merge branch 'RED-8670' into 'main'

RED-8670: add features to status update See merge request fforesight/azure-ocr-service!23
RED-8670: add features to status update
2025-01-09 11:27:33 +01:00 · 2025-01-09 11:27:33 +01:00 · 2024-12-17 12:33:32 +01:00 · 2024-12-17 12:32:34 +01:00 · 2024-12-17 12:30:26 +01:00 · 2024-12-17 12:20:19 +01:00
77 changed files with 3111 additions and 935 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -22,4 +22,5 @@ deploy:
  rules:
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
    - if: $CI_COMMIT_BRANCH =~ /^release/
+    - if: $CI_COMMIT_BRANCH =~ /^feature/
    - if: $CI_COMMIT_TAG
--- a/README.md
+++ b/README.md
@ -70,7 +70,7 @@ int concurrency = 8;
 int batchSize = 128;
 boolean debug; // writes the ocr layer visibly to the viewer doc pdf
 boolean idpEnabled; // Enables table detection, paragraph classification, section detection, key-value detection.
-boolean tableDetection; // writes the tables to the PDF as invisible lines.
+boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
 boolean processAllPages; // if this parameter is set, ocr will be performed on any page, regardless if it has images or not
 boolean fontStyleDetection; // Enables bold detection using ghostscript and leptonica
 String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureAnalyzeResult.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureAnalyzeResult.java
@ -1,25 +0,0 @@
-package com.knecon.fforesight.service.ocr.v1.api.model;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import lombok.AccessLevel;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Getter;
-import lombok.experimental.FieldDefaults;
-
-@Getter
-@Builder
-@AllArgsConstructor
-@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
-public class AzureAnalyzeResult {
-
-    @Builder.Default
-    List<KeyValuePair> keyValuePairs = new ArrayList<>();
-    @Builder.Default
-    List<TextRegion> handWrittenText = new ArrayList<>();
-    @Builder.Default
-    List<Figure> figures = new ArrayList<>();
-
-}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureOcrFeature.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureOcrFeature.java
@ -0,0 +1,11 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+public enum AzureOcrFeature {
+
+    ROTATION_CORRECTION,
+    IDP,
+    FONT_STYLE_DETECTION,
+    ALL_PAGES,
+    REMOVE_WATERMARKS
+
+}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/DocumentRequest.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/DocumentRequest.java
@ -1,6 +1,8 @@
 package com.knecon.fforesight.service.ocr.v1.api.model;

+import java.util.Collections;
 import java.util.Optional;
+import java.util.Set;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -18,12 +20,13 @@ public class DocumentRequest {

    String dossierId;
    String fileId;
+    boolean removeWatermark;

    String originDocumentId;
    String viewerDocId;
    String idpResultId;

-    boolean removeWatermarks;
+    Set<AzureOcrFeature> features;


    public DocumentRequest(String dossierId, String fileId) {
@ -33,18 +36,23 @@ public class DocumentRequest {
        originDocumentId = null;
        viewerDocId = null;
        idpResultId = null;
-        removeWatermarks = false;
+        features = Collections.emptySet();
    }

+
    // needed for backwards compatibility
-    public DocumentRequest(String dossierId, String fileId, boolean removeWatermarks) {
+    public DocumentRequest(String dossierId, String fileId, boolean removeWatermark) {

        this.dossierId = dossierId;
        this.fileId = fileId;
-        this.removeWatermarks = removeWatermarks;
        originDocumentId = null;
        viewerDocId = null;
        idpResultId = null;
+        if (removeWatermark) {
+            features = Set.of(AzureOcrFeature.REMOVE_WATERMARKS);
+        } else {
+            features = Collections.emptySet();
+        }
    }


@ -65,4 +73,10 @@ public class DocumentRequest {
        return Optional.ofNullable(originDocumentId);
    }

+
+    public Set<AzureOcrFeature> getFeatures() {
+
+        return features == null ? Collections.emptySet() : features;
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/Figure.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/Figure.java
@ -1,10 +1,11 @@
 package com.knecon.fforesight.service.ocr.v1.api.model;

+import java.util.List;
 import java.util.Optional;

 import lombok.Builder;

@Builder
-public record Figure(Optional<TextRegion> caption, Region image) {
+public record Figure(TextRegion caption, Region image, List<TextRegion> footnotes) {

 }
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/IdpResult.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/IdpResult.java
@ -0,0 +1,23 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+public record IdpResult(List<KeyValuePair> keyValuePairs, List<TextRegion> handWrittenText, List<Figure> figures, List<Table> tables) {
+
+    public static IdpResult initSynchronized() {
+
+        return new IdpResult(Collections.synchronizedList(new LinkedList<>()),
+                             Collections.synchronizedList(new LinkedList<>()),
+                             Collections.synchronizedList(new LinkedList<>()),
+                             Collections.synchronizedList(new LinkedList<>()));
+    }
+
+
+    public static IdpResult empty() {
+
+        return new IdpResult(Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/OCRStatusUpdateResponse.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/OCRStatusUpdateResponse.java
@ -1,5 +1,8 @@
 package com.knecon.fforesight.service.ocr.v1.api.model;

+import java.util.Collections;
+import java.util.Set;
+
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.Data;
@ -12,9 +15,16 @@ import lombok.NoArgsConstructor;
 public class OCRStatusUpdateResponse {

    private String fileId;
+    private Set<AzureOcrFeature> features;
    private int numberOfPagesToOCR;
    private int numberOfOCRedPages;
    private boolean ocrFinished;
    private boolean ocrStarted;

+
+    public Set<AzureOcrFeature> getFeatures() {
+
+        return features == null ? Collections.emptySet() : features;
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
@ -5,29 +5,107 @@ import java.awt.geom.Line2D;
 import java.awt.geom.Point2D;
 import java.awt.geom.Rectangle2D;
 import java.util.List;
+import java.util.Objects;
 import java.util.stream.Stream;

-public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
+import lombok.Getter;

-    /*
-    B _____ C
-     |     |
-    A|_____|D
-    */
+public final class QuadPoint {
+
+    public enum Direction {
+        RIGHT,
+        /*
+        B _____ C
+         |     |
+        A|_____|D
+        */
+        DOWN,
+        /*
+         * A _____ B
+         *  |     |
+         * D|_____|C
+         */
+        LEFT,
+        /*
+         * D _____ A
+         *  |     |
+         * C|_____|B
+         * */
+        UP,
+        /*
+         * C _____ D
+         *  |     |
+         * B|_____|A
+         */
+        NONE
+        /*
+         * ? _____ ?
+         *  |     |
+         * ?|_____|?
+         */
+    }
+
+    private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold.
+
+    private final Point2D a;
+    private final Point2D b;
+    private final Point2D c;
+    private final Point2D d;
+    @Getter
+    private final Direction direction;
+
+
+    // This constructor assumes, the points form a convex polygon, I will omit the assertion for performance reasons.
+    public QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
+
+        this.a = a;
+        this.b = b;
+        this.c = c;
+        this.d = d;
+        this.direction = calculateDirection();
+    }
+
+
+    private Direction calculateDirection() {
+
+        if (isHorizontal()) {
+            return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
+        }
+        if (isVertical()) {
+            return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
+        }
+        return Direction.NONE;
+    }


    public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {

-        return new QuadPoint(new Point2D.Double(rectangle2D.getX(), rectangle2D.getY()),
-                             new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY()),
-                             new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY()),
-                             new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY()));
+        return fromRectangle2D(rectangle2D, Direction.NONE);
+    }
+
+
+    public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D, Direction direction) {
+
+        var lowerLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getY());
+        var upperLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY());
+        var upperRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY());
+        var lowerRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY());
+
+        return switch (direction) {
+            case DOWN -> new QuadPoint(upperLeft, upperRight, lowerRight, lowerLeft);
+            case LEFT -> new QuadPoint(upperRight, lowerRight, lowerLeft, upperLeft);
+            case UP -> new QuadPoint(lowerRight, lowerLeft, upperLeft, upperRight);
+            default -> new QuadPoint(lowerLeft, upperLeft, upperRight, lowerRight);
+        };
+
    }


    public static QuadPoint fromPolygons(List<Double> polygon) {

-        assert polygon.size() == 8;
+        if (polygon.size() != 8) {
+            throw new AssertionError();
+        }
        return new QuadPoint(new Point2D.Double(polygon.get(0), polygon.get(1)),
                             new Point2D.Double(polygon.get(6), polygon.get(7)),
                             new Point2D.Double(polygon.get(4), polygon.get(5)),
@ -56,6 +134,23 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


+    public boolean isHorizontal() {
+
+        double angle = calculateAngle(a, d);
+        double angle2 = calculateAngle(b, c);
+        return Math.abs(angle) <= THRESHOLD_ANGLE || Math.abs(angle2) <= THRESHOLD_ANGLE;
+    }
+
+
+    public boolean isVertical() {
+
+        double rightAngle = Math.PI / 2;
+        double angle = calculateAngle(a, d);
+        double angle2 = calculateAngle(b, c);
+        return Math.abs(rightAngle - Math.abs(angle)) <= THRESHOLD_ANGLE || Math.abs(rightAngle - Math.abs(angle2)) <= THRESHOLD_ANGLE;
+    }
+
+
    public Stream<Line2D> asLines() {

        return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
@ -63,7 +158,7 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


-    public QuadPointData data() {
+    public QuadPointData toData() {

        return new QuadPointData(new float[]{(float) a.getX(), (float) a.getY(), (float) b.getX(), (float) b.getY(), (float) c.getX(), (float) c.getY(), (float) d.getX(), (float) d.getY()});
    }
@ -75,6 +170,142 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


+    public boolean contains(double x, double y) {
+        // split into two triangles, test if either contains the point, assumes the QuadPoint is convex and created correctly. More specifically, the points must be in the correct order.
+        return triangleContains(a, b, c, x, y) || triangleContains(a, c, d, x, y);
+    }
+
+
+    /*
+    checks if a triangle contains a point by converting the point to barycentric coordinates using cramer's rule and then checking if the linear combination is within the bounds of the triangle.
+    https://en.wikipedia.org/wiki/Barycentric_coordinate_system#Barycentric_coordinates_on_triangles
+     */
+    private boolean triangleContains(Point2D a, Point2D b, Point2D c, double x, double y) {
+
+        // area of the triangle
+        double denominator = ((b.getY() - c.getY()) * (a.getX() - c.getX()) + (c.getX() - b.getX()) * (a.getY() - c.getY()));
+        double invertedDenominator = 1.0 / denominator;
+        double alpha = ((b.getY() - c.getY()) * (x - c.getX()) + (c.getX() - b.getX()) * (y - c.getY())) * invertedDenominator;
+        double beta = ((c.getY() - a.getY()) * (x - c.getX()) + (a.getX() - c.getX()) * (y - c.getY())) * invertedDenominator;
+
+        return alpha >= 0 && beta >= 0 && alpha + beta <= 1;
+    }
+
+
+    public boolean contains(Point2D p) {
+
+        return contains(p.getX(), p.getY());
+    }
+
+
+    public boolean contains(Rectangle2D r) {
+
+        double x = r.getX();
+        double y = r.getY();
+        double maxY = r.getMaxY();
+        double maxX = r.getMaxX();
+
+        Point2D p1 = new Point2D.Double(x, y);
+        Point2D p2 = new Point2D.Double(x, maxY);
+        Point2D p3 = new Point2D.Double(maxX, maxY);
+        Point2D p4 = new Point2D.Double(maxX, y);
+
+        return contains(p1) && contains(p2) && contains(p3) && contains(p4);
+    }
+
+
+    public double getCenterX() {
+
+        return (a.getX() + b.getX() + c.getX() + d.getX()) / 4;
+    }
+
+
+    public double getCenterY() {
+
+        return (a.getY() + b.getY() + c.getY() + d.getY()) / 4;
+    }
+
+
+    public Point2D getCenter() {
+
+        return new Point2D.Double(getCenterX(), getCenterY());
+    }
+
+
+    public boolean intersects(Line2D line) {
+
+        return contains(line.getP1()) || contains(line.getP2()) || asLines().anyMatch(qLine -> qLine.intersectsLine(line));
+    }
+
+
+    public Line2D getRightLine() {
+
+        return new Line2D.Double(getTopRight(), getLowerRight());
+    }
+
+
+    public Line2D getLeftLine() {
+
+        return new Line2D.Double(getTopLeft(), getLowerLeft());
+    }
+
+
+    public Line2D getBottomLine() {
+
+        return new Line2D.Double(getLowerLeft(), getLowerRight());
+    }
+
+
+    public Line2D getTopLine() {
+
+        return new Line2D.Double(getTopLeft(), getTopRight());
+    }
+
+
+    public Point2D getTopLeft() {
+
+        return switch (direction) {
+            case DOWN -> a;
+            case LEFT -> d;
+            case UP -> c;
+            default -> b;
+        };
+    }
+
+
+    public Point2D getTopRight() {
+
+        return switch (direction) {
+            case DOWN -> b;
+            case LEFT -> a;
+            case UP -> d;
+            default -> c;
+        };
+    }
+
+
+    public Point2D getLowerRight() {
+
+        return switch (direction) {
+            case DOWN -> c;
+            case LEFT -> b;
+            case UP -> a;
+            default -> d;
+        };
+    }
+
+
+    public Point2D getLowerLeft() {
+
+        return switch (direction) {
+            case DOWN -> d;
+            case LEFT -> c;
+            case UP -> b;
+            default -> a;
+        };
+    }
+
+
    /**
     * Determines if the given QuadPoint aligns with this QuadPoint within a given threshold.
     * It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd.
@ -134,17 +365,56 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


-    public double size() {
+    public double getRectangularSize() {

        return a().distance(b()) * a().distance(d());
    }


-    public double angle() {
+    public double getAngle() {
+
+        return calculateAngle(a, d);
+    }
+
+
+    private static double calculateAngle(Point2D a, Point2D d) {

        double deltaY = d.getY() - a.getY();
        double deltaX = d.getX() - a.getX();
        return Math.atan2(deltaY, deltaX);
    }

+
+    public Point2D a() {return a;}
+
+
+    public Point2D b() {return b;}
+
+
+    public Point2D c() {return c;}
+
+
+    public Point2D d() {return d;}
+
+
+    @Override
+    public boolean equals(Object obj) {
+
+        if (obj == this) {
+            return true;
+        }
+        if (obj == null || obj.getClass() != this.getClass()) {
+            return false;
+        }
+        var that = (QuadPoint) obj;
+        return Objects.equals(this.a, that.a) && Objects.equals(this.b, that.b) && Objects.equals(this.c, that.c) && Objects.equals(this.d, that.d);
+    }
+
+
+    @Override
+    public int hashCode() {
+
+        return Objects.hash(a, b, c, d);
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointData.java
@ -5,4 +5,9 @@ import lombok.Builder;
@Builder
 public record QuadPointData(float[] values) {

+    public QuadPoint get() {
+
+        return QuadPoint.fromData(this);
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/Table.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/Table.java
@ -0,0 +1,7 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+import java.util.List;
+
+public record Table(TextRegion caption, int numberOfCols, int numberOfRows, List<TableCell> cells, List<TextRegion> footnotes, List<Region> bboxes) {
+
+}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/TableCell.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/TableCell.java
@ -0,0 +1,5 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+public record TableCell(TextRegion textRegion, int row, int col, TableCellType kind) {
+
+}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/TableCellType.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/TableCellType.java
@ -0,0 +1,5 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+public enum TableCellType {
+    ROW_HEADER, COLUMN_HEADER, CONTENT, STUB_HEAD, DESCRIPTION
+}
--- a/azure-ocr-service/azure-ocr-service-processor/build.gradle.kts
+++ b/azure-ocr-service/azure-ocr-service-processor/build.gradle.kts
@ -10,19 +10,18 @@ configurations {
 }

 dependencies {
-    api(project(":azure-ocr-service-api"))
-    api("com.iqser.red.service:persistence-service-internal-api-v1:2.224.0")
-    api("net.sourceforge.tess4j:tess4j:5.8.0")
-    api("com.iqser.red.commons:metric-commons:2.1.0")
-    api("com.iqser.red.commons:storage-commons:2.49.0")
-    api("com.knecon.fforesight:tenant-commons:0.21.0")
-    api("com.pdftron:PDFNet:10.7.0")
-    api("org.apache.pdfbox:pdfbox:3.0.0")
-    api("org.apache.commons:commons-math3:3.6.1")
-    api("com.amazonaws:aws-java-sdk-kms:1.12.440")
-    api("com.google.guava:guava:31.1-jre")
-    api("com.iqser.red.commons:pdftron-logic-commons:2.27.0")
-    api("com.knecon.fforesight:viewer-doc-processor:0.148.0")
-    api("com.azure:azure-ai-documentintelligence:1.0.0-beta.3")
+    implementation(project(":azure-ocr-service-api"))
+    implementation("net.sourceforge.tess4j:tess4j:5.8.0")
+    implementation("com.iqser.red.commons:metric-commons:2.1.0")
+    implementation("com.pdftron:PDFNet:11.0.0")
+    implementation("org.apache.pdfbox:pdfbox:3.0.0")
+    implementation("org.apache.commons:commons-math3:3.6.1")
+    implementation("com.amazonaws:aws-java-sdk-kms:1.12.440")
+    implementation("com.google.guava:guava:31.1-jre")
+    implementation("com.knecon.fforesight:viewer-doc-processor:0.193.0")
+    implementation("com.azure:azure-ai-documentintelligence:1.0.0")
+
+    implementation("com.iqser.red.commons:pdftron-logic-commons:2.32.0")
+
    testImplementation("org.junit.jupiter:junit-jupiter:5.8.1")
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceProcessorConfiguration.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceProcessorConfiguration.java
@ -6,6 +6,8 @@ import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.ComponentScan;
 import org.springframework.context.annotation.Configuration;

+import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
+import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
 import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;

 import io.micrometer.observation.ObservationRegistry;
@ -22,4 +24,18 @@ public class OcrServiceProcessorConfiguration {
        return new PDFTronViewerDocumentService(registry);
    }

+
+    @Bean
+    public InvisibleElementRemovalService invisibleElementRemovalService() {
+
+        return new InvisibleElementRemovalService();
+    }
+
+
+    @Bean
+    public WatermarkRemovalService watermarkRemovalService() {
+
+        return new WatermarkRemovalService();
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
@ -11,16 +11,16 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
 public class OcrServiceSettings {

-    // Limits the number of concurrent calls to the azure API. In my very rudimentary testing, azure starts throwing "too many requests" errors at around 80/s. Higher numbers greatly improve the speed.
-    int concurrency = 8;
+    // Limits the number of concurrent calls to azure
+    int concurrency = 2;
    // Limits the number of pages per call.
-    int batchSize = 128;
+    int batchSize = 32;

    boolean debug; // writes the ocr layer visibly to the viewer doc pdf
-    boolean idpEnabled; // Enables table detection, paragraph classification, section detection, key-value detection.
-    boolean tableDetection; // writes the tables to the PDF as invisible lines.
-    boolean processAllPages; // if this parameter is set, ocr will be performed on any page, regardless if it has images or not
-    boolean fontStyleDetection; // Enables bold detection using ghostscript and leptonica
+    boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
+    boolean snuggify = true; // attempts to shrink the word boxes returned by azure to fit the actual word pixels snug
+    boolean useCaches; // skips azure api, pdf rendering and image processing, when the files are already present
+    boolean azureFontStyleDetection; // omits all image processing and uses azures FONT_STYLE feature (costs 0.6ct per page)
    String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java
@ -7,6 +7,7 @@ import com.pdftron.pdf.PDFNet;
 import com.sun.jna.NativeLibrary;

 import jakarta.annotation.PostConstruct;
+import lombok.AllArgsConstructor;
 import lombok.RequiredArgsConstructor;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
@ -14,11 +15,14 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
+@AllArgsConstructor
 public class NativeLibrariesInitializer {

    @Value("${pdftron.license:}")
    private String pdftronLicense;

+    @Value("${native-libs.path:}")
+    private String nativeLibsPath;

    @SneakyThrows
    @PostConstruct
@ -30,8 +34,8 @@ public class NativeLibrariesInitializer {
        PDFNet.setTempPath("/tmp/pdftron");
        PDFNet.initialize(pdftronLicense);

-        log.info("Setting jna.library.path: {}", System.getenv("VCPKG_DYNAMIC_LIB"));
-        System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
+        log.info("Setting jna.library.path: {}", nativeLibsPath);
+        System.setProperty("jna.library.path", nativeLibsPath);

        log.info("Asserting Native Libraries loaded");

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/DocumentSpanLookup.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/DocumentSpanLookup.java
@ -0,0 +1,102 @@
+package com.knecon.fforesight.service.ocr.processor.model;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.ai.documentintelligence.models.DocumentPage;
+import com.azure.ai.documentintelligence.models.DocumentSpan;
+import com.azure.ai.documentintelligence.models.DocumentWord;
+
+public class DocumentSpanLookup {
+
+    List<PageSpanLookup> documentWordLookup;
+
+
+    public DocumentSpanLookup(AnalyzeResult analyzeResult) {
+
+        documentWordLookup = new ArrayList<>(analyzeResult.getPages().size());
+        int offset = 0;
+        for (DocumentPage page : analyzeResult.getPages()) {
+
+            if (page.getWords() == null || page.getWords().isEmpty()) {
+                documentWordLookup.add(new PageSpanLookup(offset, offset, null));
+            }
+            int start = page.getWords()
+                    .get(0).getSpan().getOffset();
+            DocumentSpan span = page.getWords()
+                    .get(page.getWords().size() - 1).getSpan();
+            int end = span.getOffset() + span.getLength();
+            SpanLookup<DocumentWord> pageWords = new SpanLookup<>(page.getWords()
+                                                                          .stream(), DocumentWord::getSpan);
+            documentWordLookup.add(new PageSpanLookup(start, end, pageWords));
+            offset = end + 1;
+        }
+    }
+
+
+    public List<WordOnPage> findWordsOnPages(DocumentSpan documentSpan) {
+
+        if (documentSpan == null) {
+            return Collections.emptyList();
+        }
+        int firstSmallerIdx = findIdxOfFirstSmallerObject(documentSpan);
+        PageSpanLookup firstPage = documentWordLookup.get(firstSmallerIdx);
+        List<WordOnPage> wordsOnPages = new ArrayList<>();
+        for (int pageNumber = firstSmallerIdx; pageNumber < documentWordLookup.size(); pageNumber++) {
+            PageSpanLookup page = documentWordLookup.get(pageNumber);
+            if (page.end >= documentSpan.getOffset()) {
+                break;
+            }
+            firstPage.wordSpanLookup.findElementsContainedInSpan(documentSpan)
+                    .stream()
+                    .map(documentWord -> new WordOnPage(documentWord, firstSmallerIdx))
+                    .forEach(wordsOnPages::add);
+        }
+        return wordsOnPages;
+    }
+
+
+    private int findIdxOfFirstSmallerObject(DocumentSpan documentSpan) {
+
+        int idx = Collections.binarySearch(documentWordLookup, new PageSpanLookup(documentSpan.getOffset(), -1, null), Comparator.comparing(PageSpanLookup::start));
+
+        if (idx >= 0) {
+            return idx;
+        } else {
+            int insertionPoint = -(idx + 1);
+
+            if (insertionPoint == 0) {
+                return -1;
+            }
+            var lastSmaller = documentWordLookup.get(insertionPoint - 1);
+            for (int resultIdx = insertionPoint - 2; resultIdx >= 0; resultIdx--) {
+                if (documentWordLookup.get(resultIdx).compareTo(lastSmaller) == 0) {
+                    return resultIdx + 1;
+                }
+            }
+            return 0;
+        }
+    }
+
+
+    public record WordOnPage(DocumentWord documentWord, int pageNumber) {
+
+    }
+
+    private record PageSpanLookup(int start, int end, SpanLookup<DocumentWord> wordSpanLookup) implements Comparable<PageSpanLookup> {
+
+        @Override
+        public int compareTo(PageSpanLookup o) {
+
+            return Integer.compare(start, o.start);
+        }
+
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ImageFile.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ImageFile.java
@ -1,5 +1,7 @@
 package com.knecon.fforesight.service.ocr.processor.model;

+import java.io.File;
+
 import net.sourceforge.lept4j.Leptonica1;
 import net.sourceforge.lept4j.Pix;

@ -10,4 +12,10 @@ public record ImageFile(int pageNumber, String absoluteFilePath) {
        return Leptonica1.pixRead(absoluteFilePath);
    }

+
+    public boolean exists() {
+
+        return new File(absoluteFilePath).exists();
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageBatch.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageBatch.java
@ -2,29 +2,129 @@ package com.knecon.fforesight.service.ocr.processor.model;

 import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;

+import java.io.File;
+import java.io.FileInputStream;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.function.Consumer;

+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonOptions;
+import com.azure.json.JsonReader;
+import com.azure.json.implementation.DefaultJsonReader;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.GhostScriptService;
+
 import lombok.AccessLevel;
+import lombok.Getter;
 import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
 import lombok.experimental.FieldDefaults;

+@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public final class PageBatch implements Comparable<PageBatch> {

+    @Getter
+    int index;
    @NonNull
-    List<Integer> lookup = new ArrayList<>();
+    List<Integer> batchPageToOriginPageLookup;
+    @NonNull
+    @Getter
+    Path batchDoc;
+    @NonNull
+    @Getter
+    Path batchDir;
+
+
+    @SneakyThrows
+    public AnalyzeResult getAzureResultCache() {
+
+        try (var in = new FileInputStream(getAzureResultCacheFile()); JsonReader reader = DefaultJsonReader.fromStream(in, new JsonOptions());) {
+            return AnalyzeResult.fromJson(reader);
+        }
+    }
+
+
+    @SneakyThrows
+    public File getAzureResultCacheFile() {
+
+        return batchDir.resolve("analyzeResult.json").toFile();
+    }
+
+
+    public List<ImageFile> getRenderedImageFiles() {
+
+        List<ImageFile> renderedImageFiles = new ArrayList<>();
+        for (int i = 0; i < batchPageToOriginPageLookup.size(); i++) {
+            renderedImageFiles.add(getRenderedImageFile(batchPageToOriginPageLookup.get(i), i + 1));
+        }
+        return renderedImageFiles;
+    }
+
+
+    public ImageFile getRenderedImageFile(int pageNumber, int numberInBatch) {
+
+        return new ImageFile(pageNumber, getRenderedImageNameFormat().formatted(numberInBatch));
+    }
+
+
+    public ImageFile getProcessedImageFile(int pageNumber, int numberInBatch) {
+
+        return new ImageFile(pageNumber, getProcessedImageNameFormat().formatted(numberInBatch));
+    }
+
+
+    public List<ImageFile> getProcessedImageFiles() {
+
+        List<ImageFile> processedImageFiles = new ArrayList<>();
+        for (int i = 0; i < batchPageToOriginPageLookup.size(); i++) {
+            processedImageFiles.add(getProcessedImageFile(batchPageToOriginPageLookup.get(i), i + 1));
+        }
+        return processedImageFiles;
+    }
+
+
+    public String getRenderedImageNameFormat() {
+
+        return getRenderedImageDir().resolve(getImageFormat()).toFile().toString();
+    }
+
+
+    public String getProcessedImageNameFormat() {
+
+        return getProcessedImageDir().resolve(getImageFormat()).toFile().toString();
+    }
+
+
+    private String getImageFormat() {
+
+        return "output_" + index + ".%04d" + GhostScriptService.FORMAT;
+    }
+
+
+    public Path getRenderedImageDir() {
+
+        return batchDir.resolve("rendered");
+    }
+
+
+    public Path getProcessedImageDir() {
+
+        return batchDir.resolve("processed");
+    }


    @Override
    public String toString() {

        if (size() == 1) {
-            return String.format("%d", lookup.get(0));
+            return String.format("%d", batchPageToOriginPageLookup.get(0));
        }

-        List<String> intervals = formatIntervals(lookup);
+        List<String> intervals = formatIntervals(batchPageToOriginPageLookup);
        if (intervals.size() > 4) {
            intervals = intervals.subList(0, 4);
            intervals.add("...");
@ -34,54 +134,54 @@ public final class PageBatch implements Comparable<PageBatch> {
    }


-    public void add(Integer pageNumber) {
-
-        lookup.add(pageNumber);
-    }
-
-
    public void forEach(Consumer<? super Integer> consumer) {

-        lookup.forEach(consumer);
+        batchPageToOriginPageLookup.forEach(consumer);
    }


    public List<Integer> getAllPageNumbers() {

-        return lookup;
+        return batchPageToOriginPageLookup;
    }


    public int size() {

-        return lookup.size();
+        return batchPageToOriginPageLookup.size();
    }


    public boolean isEmpty() {

-        return lookup.isEmpty();
+        return batchPageToOriginPageLookup.isEmpty();
    }


    public int getPageNumber(int pageNumber) {

-        return lookup.get(pageNumber - 1);
+        return batchPageToOriginPageLookup.get(pageNumber - 1);
    }


    @Override
    public int compareTo(PageBatch o) {

-        if (lookup.isEmpty() && o.lookup.isEmpty()) {
+        if (batchPageToOriginPageLookup.isEmpty() && o.batchPageToOriginPageLookup.isEmpty()) {
            return 0;
-        } else if (lookup.isEmpty()) {
+        } else if (batchPageToOriginPageLookup.isEmpty()) {
            return 1;
-        } else if (o.lookup.isEmpty()) {
+        } else if (o.batchPageToOriginPageLookup.isEmpty()) {
            return -1;
        }

-        return Integer.compare(lookup.get(0), o.lookup.get(0));
+        return Integer.compare(batchPageToOriginPageLookup.get(0), o.batchPageToOriginPageLookup.get(0));
+    }
+
+
+    public BinaryData render() {
+
+        return BinaryData.fromFile(batchDoc);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageInformation.java
@ -13,17 +13,19 @@ import com.pdftron.pdf.Rect;

 import lombok.SneakyThrows;

-public record PageInformation(Rectangle2D mediabox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) {
+public record PageInformation(Rectangle2D mediabox, Rectangle2D cropBox, int number, int rotationDegrees, List<Rectangle2D> wordBBoxes) {

    @SneakyThrows
    public static Map<Integer, PageInformation> fromPDFDoc(PDFDoc pdfDoc) {

        ConcurrentHashMap<Integer, PageInformation> pageInformationMap = new ConcurrentHashMap<>();
        int pageNumber = 1;
-        for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); pageNumber++) {
-
-            Page page = iterator.next();
-            pageInformationMap.put(pageNumber, PageInformation.fromPage(pageNumber, page));
+        try (PageIterator iterator = pdfDoc.getPageIterator()) {
+            while (iterator.hasNext()) {
+                Page page = iterator.next();
+                pageInformationMap.put(pageNumber, PageInformation.fromPage(pageNumber, page));
+                pageNumber++;
+            }
        }
        return pageInformationMap;
    }
@ -32,8 +34,9 @@ public record PageInformation(Rectangle2D mediabox, int number, int rotationDegr
    @SneakyThrows
    public static PageInformation fromPage(int pageNum, Page page) {

-        try (Rect mediaBox = page.getCropBox()) {
+        try (Rect mediaBox = page.getCropBox(); Rect cropBox = page.getCropBox()) {
            return new PageInformation(new Rectangle2D.Double(mediaBox.getX1(), mediaBox.getY1(), mediaBox.getWidth(), mediaBox.getHeight()),
+                                       new Rectangle2D.Double(cropBox.getX1(), cropBox.getY1(), cropBox.getWidth(), cropBox.getHeight()),
                                       pageNum,
                                       page.getRotation() * 90,
                                       DocumentTextExtractor.getTextBBoxes(page));
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/Statistics.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/Statistics.java
@ -162,7 +162,7 @@ public class Statistics {

        return batchStats.values()
                .stream()
-                .mapToLong(BatchStats::getWritingTextDuration)
+                .mapToLong(BatchStats::getMappingResultDuration)
                .toArray();
    }

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/TextPositionInImage.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/TextPositionInImage.java
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.model;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.Point2D;

-import com.azure.ai.documentintelligence.models.DocumentWord;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetrics;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
@ -20,7 +19,8 @@ public class TextPositionInImage {

    final QuadPoint position;
    final String text;
-    final AffineTransform imageCTM;
+    final AffineTransform resultToPageTransform;
+    final boolean snugBBox;

    @Setter
    boolean overlapsIgnoreZone;
@ -30,33 +30,34 @@ public class TextPositionInImage {
    FontStyle fontStyle;


-    public TextPositionInImage(DocumentWord word, AffineTransform imageCTM, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle) {
+    public TextPositionInImage(QuadPoint position, String text, AffineTransform resultToPageTransform, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle, boolean snugBBox) {

-        this.position = QuadPoint.fromPolygons(word.getPolygon());
-        this.text = word.getContent();
-        this.imageCTM = imageCTM;
+        this.position = position;
+        this.text = text;
+        this.resultToPageTransform = resultToPageTransform;
        this.fontMetricsProvider = fontMetricsProvider;
        this.fontStyle = fontStyle;
+        this.snugBBox = snugBBox;
    }


    public QuadPoint getTransformedTextBBox() {

-        return position.getTransformed(imageCTM);
+        return position.getTransformed(resultToPageTransform);
    }


    public AffineTransform getTextMatrix() {

-        FontMetrics metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
+        FontMetrics metrics = getMetrics();

        // Matrix multiplication is from right to left:
        // convert to image coords -> subtract descent -> scale height -> reverse imageCTM scaling -> translate to coordinates in image -> convert to pdf coords
        // width must not be set, since it is scaled with the fontsize attribute
-        double rotation = position.angle();
+        double rotation = position.getAngle();
        Point2D anchor = new Point2D.Double(position.b().getX(), position.b().getY());
        AffineTransform ctm = new AffineTransform();
-        ctm.concatenate(imageCTM);
+        ctm.concatenate(resultToPageTransform);
        ctm.translate(anchor.getX(), anchor.getY());
        ctm.scale(getWidth() / getTransformedWidth(),
                  getHeight() / getTransformedHeight()); // scale with transformation coefficient, such that fontsize may be set with transformed width.
@ -69,6 +70,15 @@ public class TextPositionInImage {
    }


+    private FontMetrics getMetrics() {
+
+        if (snugBBox) {
+            return fontMetricsProvider.calculateMetricsForTightBBox(text, getTransformedWidth(), getTransformedHeight());
+        }
+        return fontMetricsProvider.calculateMetricsForAzureBBox(text, getTransformedWidth(), getTransformedHeight());
+    }
+
+
    public double getFontSize() {
        // The fontsize as estimated by the word width
        return fontMetricsProvider.calculateFontSize(text, getTransformedWidth());
@ -95,7 +105,7 @@ public class TextPositionInImage {

    public double getFontSizeByHeight() {
        // The fontsize as estimated by the word height, only used for font style detection
-        var metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
+        var metrics = getMetrics();
        return fontMetricsProvider.calculateFontSize(text, getTransformedWidth()) * metrics.getHeightScaling();
    }

@ -108,25 +118,25 @@ public class TextPositionInImage {

    public Point2D transformedA() {

-        return imageCTM.transform(position.a(), null);
+        return resultToPageTransform.transform(position.a(), null);
    }


    public Point2D transformedB() {

-        return imageCTM.transform(position.b(), null);
+        return resultToPageTransform.transform(position.b(), null);
    }


    public Point2D transformedC() {

-        return imageCTM.transform(position.c(), null);
+        return resultToPageTransform.transform(position.c(), null);
    }


    public Point2D transformedD() {

-        return imageCTM.transform(position.d(), null);
+        return resultToPageTransform.transform(position.d(), null);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
@ -1,23 +1,25 @@
 package com.knecon.fforesight.service.ocr.processor.service;

-import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.Supplier;

+import org.slf4j.MDC;
 import org.springframework.stereotype.Service;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
 import com.azure.core.util.BinaryData;
 import com.azure.core.util.polling.LongRunningOperationStatus;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.LayerFactory;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
-import com.pdftron.common.PDFNetException;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.pdftron.pdf.PDFDoc;
-import com.pdftron.sdf.SDFDoc;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -34,16 +36,13 @@ public class AsyncOcrService {

    AzureOcrResource azureOcrResource;
    OcrServiceSettings settings;
+    ImageProcessingPipeline imageProcessingPipeline;
+    ObjectMapper mapper;


-    public OcrResult awaitOcr(PDFDoc pdfDoc,
-                              OcrExecutionSupervisor supervisor,
-                              Set<Integer> pagesWithImages,
-                              ImageProcessingSupervisor imageSupervisor) throws InterruptedException, PDFNetException {
+    public OcrResult awaitOcr(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, List<PageBatch> batches) throws InterruptedException {

-        LayerFactory layerFactory = new LayerFactory(settings, supervisor, imageSupervisor, PageInformation.fromPDFDoc(pdfDoc));
-
-        List<PageBatch> batches = splitIntoBatches(pdfDoc, supervisor, pagesWithImages);
+        LayerFactory layerFactory = new LayerFactory(settings, features, supervisor, PageInformation.fromPDFDoc(pdfDoc), imageProcessingPipeline);

        for (PageBatch batch : batches) {

@ -56,12 +55,10 @@ public class AsyncOcrService {
            supervisor.requireNoErrors();

            batchContext.batchStats().start();
-
-            BinaryData data = renderBatch(pdfDoc, batch);
-
+            BinaryData data = batch.render();
            batchContext.batchStats().batchRenderFinished();

-            beginAnalysis(data, batchContext);
+            beginAnalysis(data, batchContext, features);
        }

        supervisor.awaitAllPagesProcessed();
@ -70,44 +67,21 @@ public class AsyncOcrService {
    }


-    private static BinaryData renderBatch(PDFDoc pdfDoc, PageBatch batch) throws PDFNetException {
+    private void beginAnalysis(BinaryData data, BatchContext batchContext, Set<AzureOcrFeature> features) throws InterruptedException {

-        BinaryData docData;
-        try (var smallerDoc = extractBatchDocument(pdfDoc, batch)) {
-            docData = BinaryData.fromBytes(smallerDoc.save(SDFDoc.SaveMode.LINEARIZED, null));
+        if (settings.isUseCaches() && batchContext.batch().getAzureResultCacheFile().exists()) {
+            handleCached(batchContext);
        }
-        return docData;
-    }
-
-
-    private List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<Integer> pagesWithImages) throws PDFNetException {
-
-        List<PageBatch> batches = new ArrayList<>();
-        PageBatch currentBatch = new PageBatch();
-        batches.add(currentBatch);
-        for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
-            if (!settings.isProcessAllPages() && !pagesWithImages.contains(pageNumber)) {
-                supervisor.logPageSkipped(pageNumber);
-                continue;
-            }
-            currentBatch.add(pageNumber);
-            if (currentBatch.size() == settings.getBatchSize()) {
-                currentBatch = new PageBatch();
-                batches.add(currentBatch);
-            }
-        }
-        return batches;
-    }
-
-
-    private void beginAnalysis(BinaryData data, BatchContext batchContext) throws InterruptedException {

        batchContext.supervisor.enterConcurrency(batchContext.batch);

        batchContext.supervisor.logUploadStart(batchContext.batch, data.getLength());

-        azureOcrResource.callAzureAsync(data)
+        var mdcContext = MDC.getCopyOfContextMap();
+
+        azureOcrResource.callAzureAsync(data, features)
                .flatMap(response -> {
+                    MDC.setContextMap(mdcContext);
                    if (response.getStatus().equals(LongRunningOperationStatus.IN_PROGRESS)) {
                        batchContext.supervisor.logInProgress(batchContext.batch);
                    }
@ -117,54 +91,62 @@ public class AsyncOcrService {
                    if (LongRunningOperationStatus.SUCCESSFULLY_COMPLETED == response.getStatus()) {
                        return response.getFinalResult();
                    }
-                    return Mono.error(new IllegalStateException("Polling completed unsuccessfully with status: " + response.getStatus()));
+                    String message = "Polling completed unsuccessfully with status: " + response.getStatus();
+                    log.error(message);
+                    return Mono.error(new IllegalStateException(message));
                }).subscribe(finalResult -> handleSuccessful(finalResult, batchContext),//
                             ex -> handleError(ex, batchContext),//
                             () -> handleCompleted(batchContext));
    }


+    @SneakyThrows
+    private static void handleCached(BatchContext batchContext) {
+
+        var mdcContext = MDC.getCopyOfContextMap();
+        Thread thread = new Thread(() -> {
+            MDC.setContextMap(mdcContext);
+            log.info("Batch {}: Using cached ocr result", batchContext.batch.getIndex());
+            batchContext.batchStats().finishUpload();
+            batchContext.batchStats().finishApiWait();
+            batchContext.supervisor.logPageSuccess(batchContext.batch());
+            try {
+                batchContext.layerFactory.processAnalyzeResult(batchContext.batch(), batchContext.batch().getAzureResultCache());
+            } catch (InterruptedException e) {
+                batchContext.supervisor.logPageError(batchContext.batch, e);
+            }
+
+        });
+        thread.start();
+    }
+
+
    private static void handleCompleted(BatchContext batchContext) {

-        batchContext.supervisor.leaveConcurrency(batchContext.batch);
+        log.info("Batch {}: Completed with pages {}", batchContext.batch.getIndex(), batchContext.batch);
    }


    private void handleError(Throwable ex, BatchContext batchContext) {

+        batchContext.supervisor.leaveConcurrency(batchContext.batch);
        batchContext.supervisor.logPageError(batchContext.batch, ex);
    }


    private void handleSuccessful(AnalyzeResult finalResult, BatchContext batchContext) {

+        batchContext.supervisor.leaveConcurrency(batchContext.batch);
        try {
-            batchContext.layerFactory.addAnalyzeResult(batchContext.batch, finalResult);
-            batchContext.supervisor.logPageSuccess(batchContext.batch);
+            mapper.writeValue(batchContext.batch().getAzureResultCacheFile(), finalResult);
+            batchContext.supervisor.logPageSuccess(batchContext.batch());
+            batchContext.layerFactory.processAnalyzeResult(batchContext.batch(), finalResult);
        } catch (Exception e) {
            handleError(e, batchContext);
        }
    }


-    private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, PageBatch pageBatch) throws PDFNetException {
-
-        if (pageBatch.size() < 0) {
-            throw new IllegalArgumentException();
-        }
-        PDFDoc singlePagePdfDoc = new PDFDoc();
-        pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, singlePagePdfDoc));
-        return singlePagePdfDoc;
-    }
-
-
-    @SneakyThrows
-    private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc singlePagePdfDoc) {
-
-        singlePagePdfDoc.pagePushBack(pdfDoc.getPage(pageNumber));
-    }
-
-
    private record BatchContext(LayerFactory layerFactory, OcrExecutionSupervisor supervisor, PageBatch batch) {

        BatchStats batchStats() {
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
@ -2,23 +2,25 @@ package com.knecon.fforesight.service.ocr.processor.service;

 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;

 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;

 import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
 import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
-import com.azure.ai.documentintelligence.models.AnalyzeDocumentRequest;
+import com.azure.ai.documentintelligence.models.AnalyzeDocumentOptions;
+import com.azure.ai.documentintelligence.models.AnalyzeOperationDetails;
 import com.azure.ai.documentintelligence.models.AnalyzeResult;
-import com.azure.ai.documentintelligence.models.AnalyzeResultOperation;
-import com.azure.ai.documentintelligence.models.ContentFormat;
 import com.azure.ai.documentintelligence.models.DocumentAnalysisFeature;
+import com.azure.ai.documentintelligence.models.DocumentContentFormat;
 import com.azure.ai.documentintelligence.models.StringIndexType;
 import com.azure.core.credential.AzureKeyCredential;
 import com.azure.core.util.BinaryData;
 import com.azure.core.util.polling.PollerFlux;
 import com.google.common.base.Objects;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;

 import lombok.AccessLevel;
 import lombok.SneakyThrows;
@ -42,43 +44,48 @@ public class AzureOcrResource {


    @SneakyThrows
-    public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data) {
+    public PollerFlux<AnalyzeOperationDetails, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {

-        AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes());
-
-        return asyncClient.beginAnalyzeDocument(getModelId(), null, null, StringIndexType.UTF16CODE_UNIT, buildFeatures(), null, buildContentFormat(), analyzeRequest);
+        AnalyzeDocumentOptions analyzeDocumentOptions = new AnalyzeDocumentOptions(data.toBytes());
+        analyzeDocumentOptions.setStringIndexType(StringIndexType.UTF16_CODE_UNIT);
+        analyzeDocumentOptions.setDocumentAnalysisFeatures(buildFeatures(features));
+        analyzeDocumentOptions.setOutputContentFormat(buildContentFormat());
+        return asyncClient.beginAnalyzeDocument(getModelId(features), analyzeDocumentOptions);

    }


-    private ContentFormat buildContentFormat() {
+    private DocumentContentFormat buildContentFormat() {

        if (Objects.equal(settings.getContentFormat(), "markdown")) {
-            return ContentFormat.MARKDOWN;
+            return DocumentContentFormat.MARKDOWN;
        }
-        return ContentFormat.TEXT;
+        return DocumentContentFormat.TEXT;
    }


-    private String getModelId() {
+    private String getModelId(Set<AzureOcrFeature> features) {

-        if (settings.isIdpEnabled()) {
+        if (features.contains(AzureOcrFeature.IDP)) {
            return "prebuilt-layout";
        }
        return "prebuilt-read";
    }


-    private List<DocumentAnalysisFeature> buildFeatures() {
+    private List<DocumentAnalysisFeature> buildFeatures(Set<AzureOcrFeature> features) {

-        var features = new ArrayList<DocumentAnalysisFeature>();
+        var azureFeatures = new ArrayList<DocumentAnalysisFeature>();

-        if (settings.isIdpEnabled()) {
-            features.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
+        if (features.contains(AzureOcrFeature.IDP)) {
+            azureFeatures.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
        }
-        features.add(DocumentAnalysisFeature.BARCODES);
+        if (settings.isAzureFontStyleDetection() && features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
+            azureFeatures.add(DocumentAnalysisFeature.STYLE_FONT);
+        }
+        azureFeatures.add(DocumentAnalysisFeature.BARCODES);

-        return features;
+        return azureFeatures;
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchFactory.java
@ -0,0 +1,144 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
+import com.pdftron.common.PDFNetException;
+import com.pdftron.pdf.Optimizer;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.AccessLevel;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+import lombok.experimental.FieldDefaults;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class BatchFactory {
+
+    OcrServiceSettings settings;
+
+
+    @SneakyThrows
+    public List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, Path runDir) {
+
+        Set<Integer> pagesToProcess = findPagesToProcess(pdfDoc, features);
+        supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesToProcess.size());
+
+        List<PageBatch> batches = buildBatches(pdfDoc, supervisor, features, runDir, pagesToProcess);
+        if (batches.size() > 1) {
+            log.info("Split {} pages to process into {} batches", pagesToProcess.size(), batches.size());
+        }
+        return batches;
+    }
+
+
+    @SneakyThrows
+    public Set<Integer> findPagesToProcess(PDFDoc pdfDoc, Set<AzureOcrFeature> features) {
+
+        if (features.contains(AzureOcrFeature.ALL_PAGES)) {
+            Set<Integer> pages = new HashSet<>();
+            for (int i = 1; i <= pdfDoc.getPageCount(); i++) {
+                pages.add(i);
+            }
+            return Collections.unmodifiableSet(pages);
+        }
+
+        return ImageDetectionService.findPagesWithImages(pdfDoc);
+    }
+
+
+    public List<PageBatch> buildBatches(PDFDoc pdfDoc,
+                                        OcrExecutionSupervisor supervisor,
+                                        Set<AzureOcrFeature> features,
+                                        Path runDir,
+                                        Set<Integer> pagesWithImages) throws PDFNetException {
+
+        List<PageBatch> batches = new ArrayList<>();
+        List<Integer> numbersForCurrentBatch = new ArrayList<>();
+        for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
+            if (!features.contains(AzureOcrFeature.ALL_PAGES) && !pagesWithImages.contains(pageNumber)) {
+                supervisor.logPageSkipped(pageNumber);
+                continue;
+            }
+            numbersForCurrentBatch.add(pageNumber);
+            if (numbersForCurrentBatch.size() == settings.getBatchSize()) {
+                batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, runDir));
+                numbersForCurrentBatch = new ArrayList<>();
+            }
+        }
+        if (!numbersForCurrentBatch.isEmpty()) {
+            batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, runDir));
+        }
+        return batches;
+    }
+
+
+    @SneakyThrows
+    public static PageBatch create(int number, PDFDoc pdfDoc, List<Integer> pageNumbers, Path runDir) {
+
+        if (pageNumbers.isEmpty()) {
+            throw new IllegalArgumentException("pageNumbers must not be empty");
+        }
+        Path batchDir = formatBatchDir(number, pageNumbers, runDir);
+        Files.createDirectories(batchDir);
+
+        Path batchDocPath = batchDir.resolve("batch.pdf");
+        try (var batchDoc = extractBatchDocument(pdfDoc, pageNumbers)) {
+            Optimizer.optimize(batchDoc);
+            batchDoc.save(batchDocPath.toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
+        }
+        PageBatch batch = new PageBatch(number, pageNumbers, batchDocPath, batchDir);
+        Files.createDirectories(batch.getRenderedImageDir());
+        Files.createDirectories(batch.getProcessedImageDir());
+        return batch;
+    }
+
+
+    private static Path formatBatchDir(int number, List<Integer> pageNumbers, Path runDir) {
+
+        List<String> intervals = formatIntervals(pageNumbers);
+        if (intervals.size() > 4) {
+            intervals = intervals.subList(0, 4);
+            intervals.add("...");
+        }
+
+        String batchName = String.join(", ", intervals);
+        return runDir.resolve("batch_%04d_%s".formatted(number, batchName));
+    }
+
+
+    private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, List<Integer> pageBatch) throws PDFNetException {
+
+        if (pageBatch.isEmpty()) {
+            throw new IllegalArgumentException();
+        }
+        PDFDoc batchDoc = new PDFDoc();
+        pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, batchDoc));
+        return batchDoc;
+    }
+
+
+    @SneakyThrows
+    private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc batchDoc) {
+
+        batchDoc.pagePushBack(pdfDoc.getPage(pageNumber));
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchStats.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchStats.java
@ -10,7 +10,7 @@ public class BatchStats {

    private long apiWaitTimestamp = -1;
    private long imageUploadTimestamp = -1;
-    private long writingTextTimestamp = -1;
+    private long mappingResultTimestamp = -1;
    private long batchRenderTimestamp = -1;


@ -38,9 +38,9 @@ public class BatchStats {
    }


-    public void finishWritingText() {
+    public void finishMappingResult() {

-        writingTextTimestamp = System.currentTimeMillis();
+        mappingResultTimestamp = System.currentTimeMillis();
    }


@ -50,15 +50,33 @@ public class BatchStats {
    }


+    public boolean isApiWaitFinished() {
+
+        return apiWaitTimestamp > 0;
+    }
+
+
+    public boolean isMappingResultFinished() {
+
+        return mappingResultTimestamp > 0;
+    }
+
+
+    public boolean isBatchRenderFinished() {
+
+        return batchRenderTimestamp > 0;
+    }
+
+
    public long getApiWaitDuration() {return this.apiWaitTimestamp - imageUploadTimestamp;}


    public long getImageUploadDuration() {return this.imageUploadTimestamp - batchRenderTimestamp;}


-    public long getWritingTextDuration() {return this.writingTextTimestamp - apiWaitTimestamp;}
+    public long getMappingResultDuration() {return this.mappingResultTimestamp - apiWaitTimestamp;}


-    public long getBatchRenderDuration() {return this.batchRenderTimestamp - startTimestamp;}
+    public long getBatchRenderDuration() {return startTimestamp - this.batchRenderTimestamp;}

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/IOcrMessageSender.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/IOcrMessageSender.java
@ -1,16 +1,24 @@
 package com.knecon.fforesight.service.ocr.processor.service;

+import java.util.Set;
+
 import org.springframework.stereotype.Service;

+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
+import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
+
@Service
 public interface IOcrMessageSender {

-    void sendUpdate(String fileId, int finishedImages, int totalImages);
+    void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features);

-    void sendOCRStarted(String fileId);

-    void sendOcrFinished(String fileId, int totalImages);
+    void sendOCRStarted(String fileId, Set<AzureOcrFeature> features);

-    void sendOcrResponse(String dossierId, String fileId);
+
+    void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features);
+
+
+    void sendOcrResponse(DocumentRequest request);

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/ImageDetectionService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/ImageDetectionService.java
@ -7,40 +7,24 @@ import java.util.Set;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.pdftron.common.PDFNetException;
 import com.pdftron.pdf.Element;
 import com.pdftron.pdf.ElementReader;
 import com.pdftron.pdf.PDFDoc;

 import lombok.SneakyThrows;
+import lombok.experimental.UtilityClass;

-@Service
+@UtilityClass
 public class ImageDetectionService {

    // any image with smaller height and width than this gets thrown out, see everyPointInDashedLineIsImage.pdf
    private static final int PIXEL_THRESHOLD = 0;
-    private final OcrServiceSettings ocrServiceSettings;
-
-
-    public ImageDetectionService(OcrServiceSettings ocrServiceSettings) {this.ocrServiceSettings = ocrServiceSettings;}


    @SneakyThrows
-    public Set<Integer> findPagesToProcess(PDFDoc pdfDoc) {
-
-        if (ocrServiceSettings.isProcessAllPages()) {
-            Set<Integer> pages = new HashSet<>();
-            for (int i = 1; i <= pdfDoc.getPageCount(); i++) {
-                pages.add(i);
-            }
-            return Collections.unmodifiableSet(pages);
-        }
-
-        return findPagesWithImages(pdfDoc);
-    }
-
-
-    private Set<Integer> findPagesWithImages(PDFDoc pdfDoc) throws PDFNetException {
+    public Set<Integer> findPagesWithImages(PDFDoc pdfDoc) {

        Set<Integer> pagesWithImages = new HashSet<>();
        try (ElementReader reader = new ElementReader()) {
@ -72,8 +56,11 @@ public class ImageDetectionService {
                }
                case Element.e_form -> {
                    reader.formBegin();
-                    findImagePositionsOnPage(reader);
+                    var found = findImagePositionsOnPage(reader);
                    reader.end();
+                    if (found) {
+                        return true;
+                    }
                }
            }
        }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
@ -5,21 +5,25 @@ import static com.knecon.fforesight.service.ocr.processor.model.Statistics.human
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
+import java.util.List;
 import java.util.Set;

 import org.springframework.stereotype.Service;

+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
 import com.iqser.red.pdftronlogic.commons.OCGWatermarkRemovalService;
 import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.Statistics;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
 import com.pdftron.pdf.PDFDoc;

@ -40,10 +44,10 @@ public class OCRService {
    WatermarkRemovalService watermarkRemovalService;
    InvisibleElementRemovalService invisibleElementRemovalService;
    PDFTronViewerDocumentService viewerDocumentService;
-    ImageDetectionService imageDetectionService;
+    BatchFactory batchFactory;
    AsyncOcrService asyncOcrService;
    OcrServiceSettings settings;
-    ImageProcessingPipeline imageProcessingPipeline;
+    ObjectMapper mapper;


    /**
@ -56,24 +60,23 @@ public class OCRService {
     * @param tmpDir             working directory for all files
     * @param documentFile       the file to perform ocr on, results are written invisibly
     * @param viewerDocumentFile debugging file, results are written visibly in an optional content group
-     * @param analyzeResultFile  result file with additional information
+     * @param idpResultFile  result file with additional information
     */
    @Observed(name = "OCRService", contextualName = "run-ocr-on-document")
-    public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
+    public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File idpResultFile) {

-        if (removeWatermark) {
+        if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
            removeWatermark(documentFile);
        }

        removeInvisibleElements(documentFile);

-        log.info("Starting OCR for file {}", fileId);
        long ocrStart = System.currentTimeMillis();

-        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile).getStatistics();
+        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, idpResultFile, features).getStatistics();

        long ocrEnd = System.currentTimeMillis();
-        log.info("ocr successful for file with dossierId {} and fileId {}, took {}", dossierId, fileId, humanizeDuration(ocrEnd - ocrStart));
+        log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));

        if (settings.isDebug()) {
            logRuntimeBreakdown(ocrEnd, ocrStart, stats);
@ -117,34 +120,37 @@ public class OCRService {


    @SneakyThrows
-    public OcrExecutionSupervisor runOcr(Path tmpDir, File documentFile, File viewerDocumentFile, String fileId, String dossierId, File analyzeResultFile) {
-
-        Path tmpImageDir = tmpDir.resolve("images");
-        Path azureOutputDir = tmpDir.resolve("azure_output");
-
-        Files.createDirectories(azureOutputDir);
-        Files.createDirectories(tmpImageDir);
+    public OcrExecutionSupervisor runOcr(Path runDir,
+                                         File documentFile,
+                                         File viewerDocumentFile,
+                                         String fileId,
+                                         String dossierId,
+                                         File idpResultFile,
+                                         Set<AzureOcrFeature> features) {

        try (var in = new FileInputStream(documentFile); PDFDoc pdfDoc = new PDFDoc(in)) {

            OCGWatermarkRemovalService.removeWatermarks(pdfDoc);

-            OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings);
+            OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings, features);
            supervisor.getStatistics().setStart();

-            Set<Integer> pagesWithImages = imageDetectionService.findPagesToProcess(pdfDoc);
-            ImageProcessingSupervisor imageSupervisor = null;
-            if (settings.isFontStyleDetection()) {
-                imageSupervisor = imageProcessingPipeline.run(pagesWithImages, tmpImageDir, documentFile);
-            }
+            List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, runDir);

-            supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
-
-            OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, pagesWithImages, imageSupervisor);
+            OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, features, batches);

            viewerDocumentService.addLayerGroups(documentFile, documentFile, ocrResult.regularLayers());
            viewerDocumentService.addLayerGroups(documentFile, viewerDocumentFile, ocrResult.debugLayers());

+            if (features.contains(AzureOcrFeature.ROTATION_CORRECTION)) {
+                RotationCorrectionUtility.rotatePages(documentFile.toPath(), documentFile.toPath(), ocrResult.anglesPerPage());
+                RotationCorrectionUtility.rotatePages(viewerDocumentFile.toPath(), viewerDocumentFile.toPath(), ocrResult.anglesPerPage());
+            }
+
+            if (features.contains(AzureOcrFeature.IDP)) {
+                saveIdpResultFile(idpResultFile, ocrResult);
+            }
+
            supervisor.getStatistics().drawingPdfFinished();

            supervisor.sendFinished();
@ -154,4 +160,12 @@ public class OCRService {

    }

+
+    private void saveIdpResultFile(File idpResultFile, OcrResult ocrResult) throws IOException {
+
+        try (var out = new FileOutputStream(idpResultFile)) {
+            mapper.writeValue(out, ocrResult.idpResult());
+        }
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
@ -5,6 +5,7 @@ import static com.knecon.fforesight.service.ocr.processor.model.Statistics.human

 import java.util.Collections;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
@ -13,6 +14,7 @@ import java.util.concurrent.CountDownLatch;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.Statistics;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;

 import lombok.AccessLevel;
 import lombok.Getter;
@ -38,12 +40,15 @@ public class OcrExecutionSupervisor {

    String fileId;

+    Set<AzureOcrFeature> features;

-    public OcrExecutionSupervisor(int totalPageCount, IOcrMessageSender ocrMessageSender, String fileId, OcrServiceSettings settings) {
+
+    public OcrExecutionSupervisor(int totalPageCount, IOcrMessageSender ocrMessageSender, String fileId, OcrServiceSettings settings, Set<AzureOcrFeature> features) {

        this.totalPageCount = totalPageCount;
        this.ocrMessageSender = ocrMessageSender;
        this.fileId = fileId;
+        this.features = features;
        this.errorPages = Collections.synchronizedSet(new HashSet<>());
        this.countDownPagesToProcess = new CountDownLatch(totalPageCount);
        this.statistics = new Statistics();
@ -65,16 +70,16 @@ public class OcrExecutionSupervisor {
    }


-    public void logImageExtractionFinished(int numberOfPages, int numberOfImages) {
+    public void logImageExtractionFinished(int numberOfPages, int numberOfPagesToProcess) {

        statistics.imageExtractionFinished();
-        log.info("Images found on {}/{} pages in {}", numberOfImages, numberOfPages, humanizeDuration(statistics.getImageExtractionDuration()));
+        log.info("Images found on {}/{} pages in {}", numberOfPagesToProcess, numberOfPages, humanizeDuration(statistics.getImageExtractionDuration()));
    }


    public void logUploadStart(PageBatch pageRange, long bytes) {

-        log.info("Start uploading pages {} with {}", pageRange, humanizeBytes(bytes));
+        log.info("Batch {}: Start uploading pages {} with {}", pageRange.getIndex(), pageRange, humanizeBytes(bytes));
        statistics.getBatchStats(pageRange).start();
        statistics.increaseTotalBytes(pageRange, bytes);
    }
@ -83,27 +88,28 @@ public class OcrExecutionSupervisor {
    public void logInProgress(PageBatch pageRange) {

        if (!statistics.getBatchStats(pageRange).isUploadFinished()) {
-            log.info("Pages {} is in progress", pageRange);
+            log.info("Batch {}: Pages {} is in progress", pageRange.getIndex(), pageRange);
            statistics.getBatchStats(pageRange).finishUpload();
-            ocrMessageSender.sendUpdate(fileId, processedPages(), getTotalPageCount());
+            ocrMessageSender.sendUpdate(fileId, processedPages(), getTotalPageCount(), features);
        } else {
-            log.debug("Pages {} still in progress", pageRange);
+            log.debug("Batch {}: Pages {} still in progress", pageRange.getIndex(), pageRange);
        }
    }


-    public void finishMappingResult(PageBatch pageRange) {
+    public void finishMappingResult(PageBatch batch) {

-        pageRange.forEach(pageIndex -> countDownPagesToProcess.countDown());
-        statistics.getBatchStats(pageRange).finishWritingText();
-        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
+        batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
+        statistics.getBatchStats(batch).finishMappingResult();
+        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
+        log.info("Batch {}: Finished mapping result with pages {}", batch.getIndex(), batch);
    }


    public void logPageSkipped(Integer pageIndex) {

        this.countDownPagesToProcess.countDown();
-        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
+        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
        log.debug("{}/{}: No images to ocr on page {}", processedPages(), getTotalPageCount(), pageIndex);

    }
@ -113,21 +119,43 @@ public class OcrExecutionSupervisor {

        this.errorPages.add(batch);
        batch.forEach(pageIndex -> this.countDownPagesToProcess.countDown());
-        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
-        log.error("{}/{}: Error occurred on pages {}", processedPages(), getTotalPageCount(), batch, e);
+        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
+        log.error("{}/{}: Error occurred in batch {} with pages {}", processedPages(), getTotalPageCount(), batch.getIndex(), batch, e);
    }


    public void logPageSuccess(PageBatch batch) {

        statistics.getBatchStats(batch).finishApiWait();
-        log.info("{}/{}: Finished OCR on pages {}", processedPages(), getTotalPageCount(), batch);
+        log.info("{}/{}: Finished OCR in batch {} with pages {}", processedPages(), getTotalPageCount(), batch.getIndex(), batch);
    }


    private int processedPages() {

-        return (int) (totalPageCount - countDownPagesToProcess.getCount());
+        if (countDownPagesToProcess.getCount() == 0) {
+            return totalPageCount;
+        }
+        int processedPages = 0;
+        for (Map.Entry<PageBatch, BatchStats> entry : statistics.getBatchStats().entrySet()) {
+            PageBatch pageBatch = entry.getKey();
+            BatchStats batchStats = entry.getValue();
+            float percentage = 0;
+            if (batchStats.isBatchRenderFinished()) {
+                percentage += 0.1f;
+            }
+            if (batchStats.isUploadFinished()) {
+                percentage += 0.3f;
+            }
+            if (batchStats.isApiWaitFinished()) {
+                percentage += 0.3f;
+            }
+            if (batchStats.isMappingResultFinished()) {
+                percentage += 0.3f;
+            }
+            processedPages += (int) (pageBatch.size() * percentage);
+        }
+        return processedPages;
    }


@ -144,7 +172,7 @@ public class OcrExecutionSupervisor {
        requireNoErrors();

        log.info("{}/{}: Finished OCR on all pages", getTotalPageCount(), getTotalPageCount());
-        ocrMessageSender.sendOcrFinished(fileId, getTotalPageCount());
+        ocrMessageSender.sendOcrFinished(fileId, getTotalPageCount(), features);
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultPostProcessingPipeline.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultPostProcessingPipeline.java
@ -0,0 +1,510 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Line2D;
+import java.awt.geom.Rectangle2D;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.ai.documentintelligence.models.BoundingRegion;
+import com.azure.ai.documentintelligence.models.DocumentFontStyle;
+import com.azure.ai.documentintelligence.models.DocumentPage;
+import com.azure.ai.documentintelligence.models.DocumentSpan;
+import com.azure.ai.documentintelligence.models.DocumentStyle;
+import com.azure.ai.documentintelligence.models.DocumentTable;
+import com.azure.ai.documentintelligence.models.DocumentTableCell;
+import com.azure.ai.documentintelligence.models.DocumentWord;
+import com.google.common.base.Functions;
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
+import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
+import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
+import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.StrokeWidthCalculator;
+import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
+import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
+import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
+import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.Type0FontMetricsProvider;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+
+import lombok.AccessLevel;
+import lombok.Getter;
+import lombok.SneakyThrows;
+import lombok.experimental.FieldDefaults;
+import lombok.extern.slf4j.Slf4j;
+import net.sourceforge.lept4j.Box;
+import net.sourceforge.lept4j.Leptonica1;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.lept4j.util.LeptUtils;
+
+@Slf4j
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class OcrResultPostProcessingPipeline {
+
+    @Getter
+    Map<Integer, AffineTransform> resultToPageTransforms;
+    Map<Integer, PageInformation> pageInformation;
+    ImageProcessingPipeline imageProcessingPipeline;
+    OcrServiceSettings settings;
+    Set<AzureOcrFeature> features;
+
+
+    @SneakyThrows
+    public OcrResultPostProcessingPipeline(Map<Integer, PageInformation> pageInformation,
+                                           ImageProcessingPipeline imageProcessingPipeline,
+                                           OcrServiceSettings settings,
+                                           Set<AzureOcrFeature> features) {
+
+        this.imageProcessingPipeline = imageProcessingPipeline;
+        this.pageInformation = pageInformation;
+        resultToPageTransforms = Collections.synchronizedMap(new HashMap<>());
+        this.settings = settings;
+        this.features = features;
+    }
+
+
+    public List<WritableOcrResult> processAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) throws InterruptedException {
+
+        ImageProcessingSupervisor imageSupervisor = renderImagesIfNecessary(analyzeResult, batch);
+
+        List<WritableOcrResult> writableOcrResultList = new ArrayList<>();
+
+        Lookups lookups = getLookups(analyzeResult);
+
+        for (DocumentPage resultPage : analyzeResult.getPages()) {
+
+            PageInformation pageInformation = getPageInformation(getPageNumber(batch, resultPage));
+            AffineTransform resultToPageTransform = buildResultToPageTransform(pageInformation, resultPage.getWidth());
+            resultToPageTransforms.put(getPageNumber(batch, resultPage), resultToPageTransform);
+
+            List<TextPositionInImage> words = buildTextPositionsInImage(batch, resultPage, resultToPageTransform, lookups, pageInformation, imageSupervisor);
+
+            var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words).angle(-resultPage.getAngle());
+
+            if (settings.isDrawTablesAsLines()) {
+                builder.tableLines(getTableLines(analyzeResult, pageInformation, resultToPageTransform));
+            }
+
+            writableOcrResultList.add(builder.build());
+
+        }
+        log.debug("Batch {}: finished post-processing.", batch.getIndex());
+        return writableOcrResultList;
+    }
+
+
+    private ImageProcessingSupervisor renderImagesIfNecessary(AnalyzeResult analyzeResult, PageBatch batch) {
+
+        ImageProcessingSupervisor imageSupervisor = null;
+        if (useRenderedImages()) {
+
+            Map<Integer, Double> anglesPerPage = analyzeResult.getPages()
+                    .stream()
+                    .collect(Collectors.toMap(DocumentPage::getPageNumber, documentPage -> -documentPage.getAngle()));
+            RotationCorrectionUtility.rotatePages(batch.getBatchDoc(), batch.getBatchDoc(), anglesPerPage);
+            imageSupervisor = imageProcessingPipeline.addToPipeline(batch);
+        }
+        return imageSupervisor;
+    }
+
+
+    private boolean useRenderedImages() {
+
+        if (settings.isAzureFontStyleDetection() && features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
+            return false;
+        }
+        return settings.isSnuggify() || features.contains(AzureOcrFeature.FONT_STYLE_DETECTION);
+    }
+
+
+    private List<TextPositionInImage> buildTextPositionsInImage(PageBatch pageOffset,
+                                                                DocumentPage resultPage,
+                                                                AffineTransform resultToPageTransform,
+                                                                Lookups lookups,
+                                                                PageInformation pageInformation,
+                                                                ImageProcessingSupervisor imageSupervisor) throws InterruptedException {
+
+        if (!useRenderedImages()) {
+            return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
+        }
+
+        ImageFile imageFile = imageSupervisor.awaitProcessedPage(getPageNumber(pageOffset, resultPage));
+
+        if (imageFile == null) {
+            return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
+        }
+
+        synchronized (ImageProcessingSupervisor.class) {
+            // Leptonica is not thread safe, but is being called in ImageProcessingService as well
+
+            if (features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
+                return buildTextWithBoldDetection(resultPage, resultToPageTransform, pageInformation, imageFile);
+            }
+
+            return buildTextWithSnugBBoxes(resultPage, imageFile, resultToPageTransform, lookups, pageInformation);
+        }
+    }
+
+
+    @SneakyThrows
+    private List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage,
+                                                                 AffineTransform resultToPageTransform,
+                                                                 PageInformation pageInformation,
+                                                                 ImageFile imageFile) {
+
+        Pix pageImage = imageFile.readPix();
+        List<TextPositionInImage> words = new ArrayList<>();
+
+        try (FontStyleDetector fontStyleDetector = new FontStyleDetector()) {
+
+            AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
+
+            for (DocumentWord word : resultPage.getWords()) {
+                TextPositionInImage textPosition;
+                if (canBeSnuggified(resultPage, resultToImageTransform)) {
+                    textPosition = buildTextPositionInImageWithSnugBBox(word,
+                                                                        resultToPageTransform,
+                                                                        new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE),
+                                                                        pageImage,
+                                                                        resultToImageTransform);
+                } else {
+                    textPosition = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                           word.getContent(),
+                                                           resultToPageTransform,
+                                                           new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).font(),
+                                                           new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).fontStyle(),
+                                                           false);
+                }
+
+                if (intersectsIgnoreZone(pageInformation.wordBBoxes(), textPosition)) {
+                    textPosition.setOverlapsIgnoreZone(true);
+                }
+
+                QuadPoint originTransformed = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(resultToImageTransform);
+                Pix wordImage = extractWordImage(originTransformed, pageImage);
+
+                if (wordImage == null) {
+                    log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
+                    continue;
+                }
+
+                if (StrokeWidthCalculator.wordImageHasMinimumPixelDensity(wordImage)) {
+                    fontStyleDetector.add(textPosition, wordImage, textPosition.getFontSizeByHeight());
+                }
+
+                words.add(textPosition);
+            }
+
+            fontStyleDetector.classifyWords();
+
+        } finally {
+            LeptUtils.disposePix(pageImage);
+        }
+
+        return words;
+    }
+
+
+    @SneakyThrows
+    public static AffineTransform buildResultToImageTransform(DocumentPage resultPage, Pix pageImage) {
+
+        int quadrant = RotationCorrectionUtility.getQuadrantRotation(-resultPage.getAngle());
+        AffineTransform rotationCorrection = RotationCorrectionUtility.buildTransform(-resultPage.getAngle(), pageImage.w, pageImage.h);
+        AffineTransform imageTransform = new AffineTransform();
+        double scalingFactor = switch (quadrant) {
+            case 1, 3 -> pageImage.h / resultPage.getWidth();
+            default -> pageImage.w / resultPage.getWidth();
+        };
+        imageTransform.concatenate(rotationCorrection);
+        imageTransform.scale(scalingFactor, scalingFactor);
+        return imageTransform;
+    }
+
+
+    public static Pix extractWordImage(QuadPoint wordPosition, Pix pageImage) {
+
+        Rectangle2D wordBBox = wordPosition.getBounds2D();
+        Rectangle2D pageBBox = new Rectangle2D.Double(0, 0, pageImage.w, pageImage.h);
+
+        if (!pageBBox.contains(wordBBox)) {
+            return null;
+        }
+
+        Box box = new Box((int) wordBBox.getX(), (int) wordBBox.getY(), (int) wordBBox.getWidth(), (int) wordBBox.getHeight(), 1);
+        Pix wordImage = Leptonica1.pixClipRectangle(pageImage, box, null);
+        box.clear();
+        return wordImage;
+    }
+
+
+    public List<TextPositionInImage> buildTextWithSnugBBoxes(DocumentPage resultPage,
+                                                             ImageFile imageFile,
+                                                             AffineTransform pageCtm,
+                                                             Lookups lookups,
+                                                             PageInformation pageInformation) {
+
+        Pix pageImage = imageFile.readPix();
+        AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
+
+        boolean snuggify = canBeSnuggified(resultPage, resultToImageTransform);
+
+        List<TextPositionInImage> list = new ArrayList<>();
+        for (DocumentWord word : resultPage.getWords()) {
+
+            FontInformation fontInformation = FontInformation.determineStyle(word, lookups);
+
+            TextPositionInImage textPositionInImage;
+            if (snuggify) {
+                textPositionInImage = buildTextPositionInImageWithSnugBBox(word, pageCtm, fontInformation, pageImage, resultToImageTransform);
+            } else {
+                textPositionInImage = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                              word.getContent(),
+                                                              pageCtm,
+                                                              fontInformation.font(),
+                                                              fontInformation.fontStyle(),
+                                                              false);
+            }
+            markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes());
+            list.add(textPositionInImage);
+        }
+        LeptUtils.disposePix(pageImage);
+        return list;
+    }
+
+
+    private boolean canBeSnuggified(DocumentPage resultPage, AffineTransform resultToImageTransform) {
+
+        return settings.isSnuggify() && BBoxSnuggificationService.canBeSnuggified(resultPage, resultToImageTransform);
+    }
+
+
+    public List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
+
+        return resultPage.getWords()
+                .stream()
+                .map(word -> new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                     word.getContent(),
+                                                     pageCtm,
+                                                     FontInformation.determineStyle(word, lookups).font(),
+                                                     FontInformation.determineStyle(word, lookups).fontStyle(),
+                                                     false))
+                .map(textPositionInImage -> markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes()))
+                .collect(Collectors.toList());
+    }
+
+
+    private static int getPageNumber(PageBatch pageBatch, DocumentPage resultPage) {
+
+        return pageBatch.getPageNumber(resultPage.getPageNumber());
+    }
+
+
+    private static Lookups getLookups(AnalyzeResult analyzeResult) {
+
+        if (analyzeResult.getStyles() == null || analyzeResult.getStyles().isEmpty()) {
+            return Lookups.empty();
+        }
+
+        // Azure stopped supporting bold text detection in 1.0.0 release
+        SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(Stream.empty(), Function.identity());
+
+        SpanLookup<DocumentSpan> italicLookup = new SpanLookup<>(analyzeResult.getStyles()
+                                                                         .stream()
+                                                                         .filter(style -> Objects.equals(style.getFontStyle(),
+                                                                                                         DocumentFontStyle.ITALIC))
+                                                                         .map(DocumentStyle::getSpans)
+                                                                         .flatMap(Collection::stream), Functions.identity());
+
+        SpanLookup<DocumentSpan> handWrittenLookup = new SpanLookup<>(analyzeResult.getStyles()
+                                                                              .stream()
+                                                                              .filter(documentStyle -> documentStyle.isHandwritten() != null && documentStyle.isHandwritten())
+                                                                              .map(DocumentStyle::getSpans)
+                                                                              .flatMap(Collection::stream), Functions.identity());
+
+        return new Lookups(boldLookup, italicLookup, handWrittenLookup);
+    }
+
+
+    @SneakyThrows
+    private TextPositionInImage buildTextPositionInImageWithSnugBBox(DocumentWord dw,
+                                                                     AffineTransform imageCTM,
+                                                                     FontInformation fontInformation,
+                                                                     Pix pageImage,
+                                                                     AffineTransform resultToImageTransform) {
+
+        QuadPoint origin = QuadPoint.fromPolygons(dw.getPolygon());
+
+        Optional<QuadPoint> snugBBox = BBoxSnuggificationService.snuggify(pageImage, dw, resultToImageTransform);
+
+        return new TextPositionInImage(snugBBox.orElse(origin), dw.getContent(), imageCTM, fontInformation.font(), fontInformation.fontStyle(), snugBBox.isPresent());
+    }
+
+
+    private record FontInformation(FontStyle fontStyle, FontMetricsProvider font) {
+
+        public static FontInformation determineStyle(DocumentWord dw, Lookups lookups) {
+
+            boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
+            boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
+            boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
+
+            FontStyle fontStyle;
+            FontMetricsProvider font;
+            if (handwritten) {
+                fontStyle = FontStyle.HANDWRITTEN;
+                font = Type0FontMetricsProvider.REGULAR_INSTANCE;
+            } else if (italic && bold) {
+                fontStyle = FontStyle.BOLD_ITALIC;
+                font = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
+            } else if (bold) {
+                fontStyle = FontStyle.BOLD;
+                font = Type0FontMetricsProvider.BOLD_INSTANCE;
+            } else if (italic) {
+                fontStyle = FontStyle.ITALIC;
+                font = Type0FontMetricsProvider.ITALIC_INSTANCE;
+            } else {
+                fontStyle = FontStyle.REGULAR;
+                font = Type0FontMetricsProvider.REGULAR_INSTANCE;
+            }
+            return new FontInformation(fontStyle, font);
+        }
+
+    }
+
+
+    private static List<Line2D> getTableLines(AnalyzeResult analyzeResult, PageInformation pageInformation, AffineTransform imageCTM) {
+
+        if (analyzeResult.getTables() == null || analyzeResult.getTables().isEmpty()) {
+            return Collections.emptyList();
+        }
+
+        return analyzeResult.getTables()
+                .stream()
+                .map(DocumentTable::getCells)
+                .flatMap(Collection::stream)
+                .map(DocumentTableCell::getBoundingRegions)
+                .flatMap(Collection::stream)
+                .filter(table -> table.getPageNumber() == pageInformation.number())
+                .map(BoundingRegion::getPolygon)
+                .map(QuadPoint::fromPolygons)
+                .map(qp -> qp.getTransformed(imageCTM))
+                .flatMap(QuadPoint::asLines)
+                .toList();
+    }
+
+
+    private static TextPositionInImage markTextOverlappingIgnoreZone(TextPositionInImage textPositionInImage, List<Rectangle2D> ignoreZones) {
+
+        if (intersectsIgnoreZone(ignoreZones, textPositionInImage)) {
+            textPositionInImage.setOverlapsIgnoreZone(true);
+        }
+
+        return textPositionInImage;
+    }
+
+
+    private static boolean intersectsIgnoreZone(List<Rectangle2D> ignoreZones, TextPositionInImage textPositionInImage) {
+
+        for (Rectangle2D ignoreZone : ignoreZones) {
+            Rectangle2D textBBox = textPositionInImage.getTransformedTextBBox().getBounds2D();
+            if (textBBox.intersects(ignoreZone)) {
+                double intersectedArea = calculateIntersectedArea(textBBox, ignoreZone);
+                double textArea = textBBox.getWidth() * textBBox.getHeight();
+                if (intersectedArea / textArea > 0.5) {
+                    return true;
+                }
+                double ignoreZoneArea = ignoreZone.getWidth() * ignoreZone.getHeight();
+                if (intersectedArea / ignoreZoneArea > 0.5) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+
+    public static double calculateIntersectedArea(Rectangle2D r1, Rectangle2D r2) {
+
+        double xOverlap = Math.max(0, Math.min(r1.getMaxX(), r2.getMaxX()) - Math.max(r1.getMinX(), r2.getMinX()));
+        double yOverlap = Math.max(0, Math.min(r1.getMaxY(), r2.getMaxY()) - Math.max(r1.getY(), r2.getY()));
+
+        return xOverlap * yOverlap;
+    }
+
+
+    public static AffineTransform buildResultToPageTransform(PageInformation pageInformation, double imageWidth) {
+
+        double scalingFactor = calculateScalingFactor(imageWidth, pageInformation);
+        AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
+
+        AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height());
+
+        AffineTransform rotationMatrix = switch (pageInformation.rotationDegrees()) {
+            case 90 -> new AffineTransform(0, 1, -1, 0, pageInformation.height(), 0);
+            case 180 -> new AffineTransform(-1, 0, 0, -1, pageInformation.width(), pageInformation.height());
+            case 270 -> new AffineTransform(0, -1, 1, 0, pageInformation.width() - pageInformation.height(), pageInformation.height());
+            default -> new AffineTransform();
+        };
+
+        // matrix multiplication is performed from right to left, so the order is reversed.
+        // scaling -> mirror -> rotation
+        AffineTransform resultMatrix = new AffineTransform();
+
+        resultMatrix.concatenate(rotationMatrix);
+        resultMatrix.concatenate(mirrorMatrix);
+        resultMatrix.concatenate(imageToCropBoxScaling);
+        return resultMatrix;
+    }
+
+
+    private static double calculateScalingFactor(double width, PageInformation pageInformation) {
+
+        // PDFBox always returns page height and width based on rotation
+        double pageWidth;
+        if (pageInformation.rotationDegrees() == 90 || pageInformation.rotationDegrees() == 270) {
+            pageWidth = pageInformation.height();
+        } else {
+            pageWidth = pageInformation.width();
+        }
+
+        return pageWidth / width;
+    }
+
+
+    @SneakyThrows
+    private PageInformation getPageInformation(Integer pageNumber) {
+
+        return pageInformation.get(pageNumber);
+    }
+
+
+    public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
+
+        public static Lookups empty() {
+
+            return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()));
+        }
+
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
@ -0,0 +1,215 @@
+package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Rectangle2D;
+import java.nio.IntBuffer;
+import java.util.Optional;
+
+import com.azure.ai.documentintelligence.models.DocumentPage;
+import com.azure.ai.documentintelligence.models.DocumentWord;
+import com.knecon.fforesight.service.ocr.processor.service.OcrResultPostProcessingPipeline;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+import com.sun.jna.Pointer;
+
+import lombok.SneakyThrows;
+import lombok.experimental.UtilityClass;
+import lombok.extern.slf4j.Slf4j;
+import net.sourceforge.lept4j.Leptonica1;
+import net.sourceforge.lept4j.Numa;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.lept4j.util.LeptUtils;
+
+/**
+ * This class attempts to shrink the BBox of a word to match the exact height of the word. This is only attempted for horizontal or vertical words. Any askew text is left as is.
+ */
+@Slf4j
+@UtilityClass
+public class BBoxSnuggificationService {
+
+    public static final int PIXEL_COUNT_THRESHOLD = 2; // minimum active pixel count per row for shrinking to stop
+    private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
+    public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
+    public static final int MAX_SHRINK_PIXELS = 40; // Number of pixels that are allowed to be removed from the top or bottom of an image
+    private static final int MINIMUM_WORD_PIXELS = 5; // Number of pixels that are required for snuggification
+
+    private enum Operation {
+        HORIZONTAL,
+        VERTICAL,
+        BOTH,
+        NONE
+    }
+
+
+    @SneakyThrows
+    public Optional<QuadPoint> snuggify(Pix pageImage, DocumentWord origin, AffineTransform resultToImageTransform) {
+
+        if (pageImage == null) {
+            return Optional.empty();
+        }
+
+        if (origin.getContent().equals("-") || origin.getContent().equals(",")) {
+            // very slim characters should not be snuggified, or the fontsize may be off significantly
+            return Optional.empty();
+        }
+
+        QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
+        double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
+        QuadPoint.Direction direction = originTransformed.getDirection();
+
+        Operation operation = determineOperation(origin, direction, remainingAngle, originTransformed);
+
+        if (operation == Operation.NONE) {
+            return Optional.empty();
+        }
+
+        Pix wordImage = OcrResultPostProcessingPipeline.extractWordImage(originTransformed, pageImage);
+
+        if (wordImage == null) {
+            log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
+            return Optional.empty();
+        }
+
+        if (!StrokeWidthCalculator.wordImageHasMinimumPixelDensity(wordImage)) {
+            return Optional.empty();
+        }
+
+        Optional<Rectangle2D> snugBox = switch (operation) {
+            case HORIZONTAL -> snuggifyY(wordImage, originTransformed.getBounds2D());
+            case VERTICAL -> snuggifyX(wordImage, originTransformed.getBounds2D());
+            case BOTH -> snuggifyBoth(wordImage, originTransformed);
+            default -> Optional.empty();
+        };
+
+        LeptUtils.disposePix(wordImage);
+
+        AffineTransform imageToResultTransform = resultToImageTransform.createInverse();
+        return snugBox.map(snugBBox -> QuadPoint.fromRectangle2D(snugBBox, direction))
+                .map(bbox -> bbox.getTransformed(imageToResultTransform));
+
+    }
+
+
+    private Optional<Rectangle2D> snuggifyBoth(Pix wordImage, QuadPoint originTransformed) {
+
+        Optional<Rectangle2D> snugY = snuggifyY(wordImage, originTransformed.getBounds2D());
+        Optional<Rectangle2D> snugX = snuggifyX(wordImage, originTransformed.getBounds2D());
+        if (snugY.isPresent() && snugX.isPresent()) {
+            return Optional.of(snugY.get().createIntersection(snugX.get()).getBounds2D());
+        } else if (snugY.isPresent()) {
+            return snugY;
+        } else {
+            return snugX;
+        }
+    }
+
+
+    private Operation determineOperation(DocumentWord origin, QuadPoint.Direction direction, double remainingAngle, QuadPoint originTransformed) {
+
+        Operation operation = Operation.NONE;
+        if (((direction.equals(QuadPoint.Direction.RIGHT) || direction.equals(QuadPoint.Direction.LEFT)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD)) {
+            operation = Operation.HORIZONTAL;
+        } else if ((direction.equals(QuadPoint.Direction.UP) || direction.equals(QuadPoint.Direction.DOWN)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) {
+            operation = Operation.VERTICAL;
+        } else if ((origin.getContent().length() < 4 || Math.abs(originTransformed.getAngle()) < AVERAGE_ANGLE_THRESHOLD * 3)) {
+            return Operation.BOTH;
+        }
+        return operation;
+    }
+
+
+    private Optional<Rectangle2D> snuggifyX(Pix wordImage, Rectangle2D origin) {
+
+        Numa colCounts = Leptonica1.pixCountPixelsByColumn(wordImage);
+        int start = 0;
+        int end = wordImage.w - PIXEL_COUNT_THRESHOLD;
+        for (int i = start; i < Math.min(wordImage.w, MAX_SHRINK_PIXELS); i++) {
+            if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
+                start = i;
+                break;
+            }
+        }
+        for (int i = end; i > Math.max(0, wordImage.w - MAX_SHRINK_PIXELS); i--) {
+            if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
+                end = i;
+                break;
+            }
+        }
+        if (start == 0 && end == wordImage.w) {
+            return Optional.empty();
+        }
+        if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
+            return Optional.empty();
+        }
+        return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
+    }
+
+
+    private Optional<Rectangle2D> snuggifyY(Pix wordImage, Rectangle2D origin) {
+
+        int start = 0;
+        int end = wordImage.h - 1;
+        for (int i = start; i < Math.min(wordImage.h, MAX_SHRINK_PIXELS); i++) {
+            if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
+                start = i;
+                break;
+            }
+        }
+        for (int i = end; i > Math.max(0, wordImage.h - MAX_SHRINK_PIXELS); i--) {
+            if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
+                end = i;
+                break;
+            }
+        }
+        if (start == 0 && end == wordImage.h) {
+            return Optional.empty();
+        }
+        if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
+            return Optional.empty();
+        }
+        return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
+    }
+
+
+    private int pixCountPerRow(int row, Pix pix) {
+
+        IntBuffer result = IntBuffer.allocate(1);
+        int success = Leptonica1.pixCountPixelsInRow(pix, row, result, null);
+        if (success == 0) {
+            return result.get();
+        } else {
+            return -1;
+        }
+    }
+
+
+    private int pixCountPerColumn(int column, Numa colCounts) {
+
+        if (column > colCounts.n) {
+            throw new IndexOutOfBoundsException("column " + column + " is out of bounds for column count " + colCounts.n);
+        }
+        Pointer pointer = colCounts.array.getPointer();
+
+        // Read the float value at position i. Each float takes 4 bytes.
+        return (int) pointer.getFloat((long) column * Float.BYTES);
+    }
+
+
+    public boolean canBeSnuggified(DocumentPage resultPage, AffineTransform imageTransform) {
+
+        double averageAngle = resultPage.getWords()
+                .stream()
+                .filter(word -> word.getContent().length() >= 4)
+                .map(DocumentWord::getPolygon)
+                .map(QuadPoint::fromPolygons)
+                .map(qp -> qp.getTransformed(imageTransform))
+                .filter(qp -> qp.getDirection().equals(QuadPoint.Direction.RIGHT))
+                .mapToDouble(QuadPoint::getAngle)
+                .map(Math::toDegrees)
+                .map(RotationCorrectionUtility::getRemainingAngle).average()
+                .orElse(Double.MAX_VALUE);
+
+        return Math.abs(averageAngle) < AVERAGE_ANGLE_THRESHOLD;
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/FontStyleDetector.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/FontStyleDetector.java
@ -84,6 +84,7 @@ public class FontStyleDetector implements Closeable {
                wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.BOLD_INSTANCE);
                wordImage.textPosition().setFontStyle(FontStyle.BOLD);
            } else {
+                wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.REGULAR_INSTANCE);
                wordImage.textPosition().setFontStyle(FontStyle.REGULAR);
            }
        }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
@ -1,16 +1,20 @@
 package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

+import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
+
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

+import org.slf4j.MDC;
+
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -27,7 +31,7 @@ public class GhostScriptOutputHandler extends Thread {

    // If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
    // Since both need to read simultaneously we need to implement the readers as separate threads.
-
+    final int batchIdx;
    final InputStream is;
    final String processName;
    final Type type;
@ -36,24 +40,32 @@ public class GhostScriptOutputHandler extends Thread {
    final Consumer<ImageFile> outputHandler;
    final Consumer<String> errorHandler;

+    final Map<String, String> parentMdcContext;
+
    int currentPageNumber;


-    public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
+    public static GhostScriptOutputHandler stdError(int batchIdx, InputStream is, Consumer<String> errorHandler) {

-        return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
+        return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.ERROR, null, null, errorHandler, MDC.getCopyOfContextMap());
    }


-    public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
+    public static GhostScriptOutputHandler stdOut(int batchIdx,
+                                                  InputStream is,
+                                                  Map<Integer, ImageFile> pagesToProcess,
+                                                  Consumer<ImageFile> imageFileOutput,
+                                                  Consumer<String> errorHandler) {

-        return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
+        return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler, MDC.getCopyOfContextMap());
    }


    @SneakyThrows
    public void run() {

+        MDC.setContextMap(parentMdcContext);
+
        try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {

            String line;
@ -63,13 +75,14 @@ public class GhostScriptOutputHandler extends Thread {
                if (line == null) {
                    break;
                }
-
-                if (type.equals(Type.ERROR)) {
-                    log.error("{}_{}>{}", processName, type.name(), line);
-                } else {
-                    log.debug("{}_{}>{}", processName, type.name(), line);
-                    addProcessedImageToQueue(line);
+                switch (type) {
+                    case STD_OUT -> {
+                        log.debug("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
+                        addProcessedImageToQueue(line);
+                    }
+                    case ERROR -> log.error("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
                }
+
            }
        }
        is.close();
@ -77,7 +90,9 @@ public class GhostScriptOutputHandler extends Thread {
            queueFinishedPage(currentPageNumber);

            if (!pagesToProcess.isEmpty()) {
-                errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
+                errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
+            } else {
+                log.info("Batch {}: rendered successfully!", batchIdx);
            }
        }

@ -86,10 +101,16 @@ public class GhostScriptOutputHandler extends Thread {

    private String formatPagesToProcess() {

-        var pages = new PageBatch();
-        pagesToProcess.keySet()
-                .forEach(pages::add);
-        return pages.toString();
+        List<String> intervals = formatIntervals(pagesToProcess.keySet()
+                                                         .stream()
+                                                         .sorted()
+                                                         .toList());
+        if (intervals.size() > 4) {
+            intervals = intervals.subList(0, 4);
+            intervals.add("...");
+        }
+
+        return String.join(", ", intervals);
    }


@ -106,7 +127,6 @@ public class GhostScriptOutputHandler extends Thread {
                currentPageNumber = pageNumber;
                return;
            }
-
            queueFinishedPage(currentPageNumber);
            currentPageNumber = pageNumber;
        }
@ -117,10 +137,10 @@ public class GhostScriptOutputHandler extends Thread {

        var imageFile = this.pagesToProcess.remove(pageNumber);
        if (imageFile == null) {
-            errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
+            errorHandler.accept(String.format("%d: Page number %d does not exist in this thread. It only has pagenumbers %s", batchIdx, pageNumber, pagesToProcess.keySet()));
        } else {
            if (!new File(imageFile.absoluteFilePath()).exists()) {
-                errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
+                errorHandler.accept(String.format("%d: Rendered page with number %d does not exist!", batchIdx, pageNumber));
            }
        }
        outputHandler.accept(imageFile);
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
@ -2,155 +2,136 @@ package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

 import java.io.InputStream;
 import java.nio.file.Path;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
 import java.util.function.Consumer;
-import java.util.stream.Collectors;

 import org.springframework.stereotype.Service;

+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
-import lombok.RequiredArgsConstructor;
 import lombok.SneakyThrows;
 import lombok.experimental.FieldDefaults;
 import lombok.extern.slf4j.Slf4j;

@Slf4j
@Service
-@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
-@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
+@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
 public class GhostScriptService {

-    public static final int BATCH_SIZE = 256;
-    static String FORMAT = ".tiff";
+    private OcrServiceSettings ocrServiceSettings;
+    public static String FORMAT = ".tiff";
    static String DEVICE = "tiffgray";
    static int DPI = 300;
-    static int PROCESS_COUNT = 1;
+    private Semaphore concurrencySemaphore = new Semaphore(3);


-    @SneakyThrows
-    public void renderPagesBatched(List<Integer> pagesToProcess,
-                                   String documentAbsolutePath,
-                                   Path tmpImageDir,
-                                   ImageProcessingSupervisor supervisor,
-                                   Consumer<ImageFile> successHandler,
-                                   Consumer<String> errorHandler) {
+    public GhostScriptService(OcrServiceSettings ocrServiceSettings) {

-        List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
-                                                                                PROCESS_COUNT,
-                                                                                BATCH_SIZE
-                                                                                * PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
-        for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
+        this.ocrServiceSettings = ocrServiceSettings;
+        assertGhostscriptIsInstalled();
+    }

-            supervisor.requireNoErrors();

-            List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
+    private void assertGhostscriptIsInstalled() {

-            log.info("Batch {}: Running {} gs processes with ({}) pages each",
-                     batchIdx,
-                     processInfos.size(),
-                     processInfos.stream()
-                             .map(info -> info.pageNumbers().size())
-                             .map(String::valueOf)
-                             .collect(Collectors.joining(", ")));
-
-            int finalBatchIdx = batchIdx;
-            List<Process> processes = processInfos.stream()
-                    .parallel()
-                    .map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
-                    .peek(s -> log.debug(String.join(" ", s.cmdArgs())))
-                    .map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
-                    .toList();
-
-            List<Integer> processExitCodes = new LinkedList<>();
-            for (Process process : processes) {
-                processExitCodes.add(process.waitFor());
+        try {
+            Process p = Runtime.getRuntime().exec("gs -v");
+            InputStream stdOut = p.getInputStream();
+            InputStream errOut = p.getErrorStream();
+            assert p.waitFor(1, TimeUnit.SECONDS);
+            log.info("Ghostscript is installed.");
+            String out = new String(stdOut.readAllBytes());
+            String error = new String(errOut.readAllBytes());
+            for (String line : out.split("\n")) {
+                log.info(line);
            }
-            log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
+            if (!error.isBlank()) {
+                log.error(error);
+            }
+        } catch (Exception e) {
+            log.error("Ghostscript is not installed!");
+            log.error(e.getMessage(), e);
+            throw new RuntimeException(e);
        }
    }


-    private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
+    @SneakyThrows
+    public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

-        // GhostScript command line can only handle so many page numbers at once, so we split it into batches
-        int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
+        supervisor.requireNoErrors();

-        log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
-
-        List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
-        List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
-                                                                                                                  .sorted()
-                                                                                                                  .toList(), processCount, batchCount);
-
-        for (var batch : batchedBalancedSublist) {
-            List<ProcessInfo> processInfos = new ArrayList<>(processCount);
-            for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
-                List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
-                processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
-            }
-            processInfoBatches.add(processInfos);
+        List<ImageFile> renderedImageFiles = batch.getRenderedImageFiles();
+        if (ocrServiceSettings.isUseCaches() && renderedImageFiles.stream()
+                .allMatch(ImageFile::exists)) {
+            log.info("Batch {}: Using cached GhostScript rendering with page(s) {}", batch.getIndex(), batch);
+            renderedImageFiles.forEach(successHandler);
+            return;
        }
-        return processInfoBatches;
+
+        concurrencySemaphore.acquire();
+        log.info("Batch {}: starting GhostScript rendering with page(s) {}", batch.getIndex(), batch);
+        executeProcess(batch, buildCmdArgs(batch, batch.getBatchDoc()), successHandler, errorHandler);
    }


    @SneakyThrows
-    private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
-                                                          Integer batchIdx,
-                                                          List<Integer> stitchedImagePageIndices,
-                                                          Path outputDir,
-                                                          String documentAbsolutePath) {
-
-        String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
+    private ProcessCmdsAndRenderedImageFiles buildCmdArgs(PageBatch batch, Path document) {

        Map<Integer, ImageFile> fullPageImages = new HashMap<>();
-        for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
-            Integer pageNumber = stitchedImagePageIndices.get(i);
-            fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
+        List<ImageFile> renderedImageFiles = batch.getRenderedImageFiles();
+        for (int i = 1; i <= renderedImageFiles.size(); i++) {
+            ImageFile renderedImageFile = renderedImageFiles.get(i - 1);
+            fullPageImages.put(i, renderedImageFile);
        }

-        String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
+        String[] cmdArgs = buildCmdArgs(document, batch.getRenderedImageNameFormat());

        return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
    }


-    private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
+    private String[] buildCmdArgs(Path document, String imagePathFormat) {

-        StringBuilder sPageList = new StringBuilder();
-        int i = 1;
-        for (Integer integer : pageNumbers) {
-            sPageList.append(integer);
-            if (i < pageNumbers.size()) {
-                sPageList.append(",");
-            }
-            i++;
-        }
-
-        return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
+        return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sOutputFile=" + imagePathFormat, document.toFile().toString(), "-c", "quit"};
    }


    @SneakyThrows
-    private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
+    private void executeProcess(PageBatch batch, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

        Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
        InputStream stdOut = p.getInputStream();
-        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
+        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batch.getIndex(), stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
        InputStream stdError = p.getErrorStream();
-        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
+        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batch.getIndex(), stdError, errorHandler);

        stdOutLogger.start();
        stdErrorLogger.start();
-        return p;
+        handleFinished(p, errorHandler, batch, successHandler);
+    }
+
+
+    private void handleFinished(Process p, Consumer<String> errorHandler, PageBatch batch, Consumer<ImageFile> successHandler) {
+
+        Thread finishedThread = new Thread(() -> {
+            try {
+                p.waitFor(2, TimeUnit.MINUTES);
+            } catch (InterruptedException e) {
+                errorHandler.accept("Batch %d: Ghostscript rendering has been terminated after 2 minutes \n %s".formatted(batch.getIndex(), e.getMessage()));
+            } finally {
+                concurrencySemaphore.release();
+            }
+        });
+        finishedThread.start();
    }


@ -158,8 +139,4 @@ public class GhostScriptService {

    }

-    private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
-
-    }
-
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingPipeline.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingPipeline.java
@ -1,15 +1,12 @@
 package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

-import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.List;
-import java.util.Set;
 import java.util.function.Consumer;

 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -26,24 +23,16 @@ public class ImageProcessingPipeline {


    @SneakyThrows
-    public ImageProcessingSupervisor run(Set<Integer> pageNumberSet, Path imageDir, File document) {
+    public ImageProcessingSupervisor addToPipeline(PageBatch batch) {

-        Path processedImageDir = imageDir.resolve("processed");
-        Path renderedImageDir = imageDir.resolve("rendered");
-
-        Files.createDirectories(renderedImageDir);
-        Files.createDirectories(processedImageDir);
-
-        List<Integer> pageNumbers = pageNumberSet.stream()
-                .sorted()
-                .toList();
+        List<Integer> pageNumbers = batch.getAllPageNumbers();

        ImageProcessingSupervisor supervisor = new ImageProcessingSupervisor(pageNumbers);

-        Consumer<ImageFile> renderingSuccessConsumer = imageFile -> imageProcessingService.addToProcessingQueue(imageFile, processedImageDir, supervisor);
+        Consumer<ImageFile> renderingSuccessConsumer = imageFile -> imageProcessingService.addToProcessingQueue(imageFile, batch.getProcessedImageDir(), supervisor);
        Consumer<String> renderingErrorConsumer = supervisor::markError;

-        ghostScriptService.renderPagesBatched(pageNumbers, document.toString(), renderedImageDir, supervisor, renderingSuccessConsumer, renderingErrorConsumer);
+        ghostScriptService.startBatchRender(batch, supervisor, renderingSuccessConsumer, renderingErrorConsumer);

        return supervisor;
    }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
@ -1,11 +1,13 @@
 package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

+import java.io.File;
 import java.nio.file.Path;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;

 import org.springframework.stereotype.Service;

+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;

 import lombok.AccessLevel;
@ -23,9 +25,10 @@ import net.sourceforge.lept4j.util.LeptUtils;
 public class ImageProcessingService {

    BlockingQueue<ProcessParams> queue = new LinkedBlockingQueue<>();
+    private final OcrServiceSettings ocrServiceSettings;


-    public ImageProcessingService() {
+    public ImageProcessingService(OcrServiceSettings ocrServiceSettings) {

        Thread queueConsumerThread = new Thread(() -> {
            while (true) {
@ -38,12 +41,13 @@ public class ImageProcessingService {
                try {
                    process(processParams.unprocessedImage(), processParams.outputDir, processParams.supervisor());
                } catch (Exception e) {
+                    processParams.supervisor.markPageFinished(processParams.unprocessedImage());
                    log.error(e.getMessage(), e);
                }
            }
        });
        queueConsumerThread.start();
-
+        this.ocrServiceSettings = ocrServiceSettings;
    }


@ -54,31 +58,43 @@ public class ImageProcessingService {
    }


-    @SneakyThrows
    private void process(ImageFile unprocessedImage, Path outputDir, ImageProcessingSupervisor supervisor) {

-        supervisor.requireNoErrors();
+        String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
+        ImageFile processedImage = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);

-        synchronized (ImageProcessingSupervisor.class) {
-            // Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
-            Pix processedPix;
-            Pix pix = unprocessedImage.readPix();
+        if (ocrServiceSettings.isUseCaches() && processedImage.exists()) {
+            supervisor.markPageFinished(processedImage);
+            return;
+        }

-            String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
+        try {
+            if (!unprocessedImage.exists()) {
+                log.error("ERROR, rendered image {} does not exist", unprocessedImage.absoluteFilePath());
+                throw new AssertionError();
+            }
+            synchronized (ImageProcessingSupervisor.class) {
+                // Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
+                Pix processedPix;
+                Pix pix = unprocessedImage.readPix();

-            processedPix = processPix(pix);
-            Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
+                assert pix != null;

-            LeptUtils.disposePix(pix);
-            LeptUtils.disposePix(processedPix);
+                processedPix = processPix(pix);
+                Leptonica1.pixWrite(processedImage.absoluteFilePath(), processedPix, ILeptonica.IFF_TIFF_PACKBITS);

-            ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
-            supervisor.markPageFinished(imageFile);
+                LeptUtils.disposePix(pix);
+                LeptUtils.disposePix(processedPix);
+            }
+        } catch (Exception e) {
+            supervisor.markError("Page %d could not be processed due to: %s".formatted(unprocessedImage.pageNumber(), e.getMessage()));
+        } finally {
+            supervisor.markPageFinished(processedImage);
+            log.debug("Finished page: {}", processedImage.pageNumber());
        }
    }


-    @SneakyThrows
    private Pix processPix(Pix pix) {

        Pix binarized;
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
@ -53,7 +53,7 @@ public class ImageProcessingSupervisor {

    public ImageFile awaitProcessedPage(Integer pageNumber) throws InterruptedException {

-        if (hasErros()) {
+        if (hasErrors()) {
            return null;
        }
        getPageLatch(pageNumber).await();
@ -61,14 +61,15 @@ public class ImageProcessingSupervisor {
    }


-    private boolean hasErros() {
+    private boolean hasErrors() {

-        return errors.isEmpty();
+        return !errors.isEmpty();
    }


    public void markError(String errorMessage) {

+        log.error(errorMessage);
        this.errors.add(errorMessage);
    }

@ -86,7 +87,7 @@ public class ImageProcessingSupervisor {
        if (this.errors.isEmpty()) {
            return;
        }
-        throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
+        throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors.subList(0, Math.min(errors.size(), 3))));
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/StrokeWidthCalculator.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/StrokeWidthCalculator.java
@ -3,13 +3,9 @@ package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
 import static net.sourceforge.lept4j.ILeptonica.L_THIN_FG;

 import java.io.Closeable;
-import java.io.IOException;
 import java.nio.IntBuffer;

-import org.springframework.stereotype.Service;
-
 import lombok.AccessLevel;
-import lombok.NoArgsConstructor;
 import lombok.experimental.FieldDefaults;
 import net.sourceforge.lept4j.Leptonica1;
 import net.sourceforge.lept4j.Pix;
@ -19,6 +15,7 @@ import net.sourceforge.lept4j.util.LeptUtils;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class StrokeWidthCalculator implements Closeable {

+    public static final double MINIMUM_PIXEL_DENSITY = 0.05;
    Sela thinningSel = Leptonica1.selaMakeThinSets(1, 0);


@ -46,6 +43,14 @@ public class StrokeWidthCalculator implements Closeable {
    }


+    public static boolean wordImageHasMinimumPixelDensity(Pix wordImage) {
+
+        IntBuffer pixelCount = IntBuffer.allocate(1);
+        Leptonica1.pixCountPixels(wordImage, pixelCount, null);
+        return (double) pixelCount.get(0) / (wordImage.w * wordImage.h) >= MINIMUM_PIXEL_DENSITY;
+    }
+
+
    public boolean hasLargerStrokeWidth(Pix pix, double strokeWidth, double threshold) {

        int roundedStrokeWidth = (int) Math.round(strokeWidth);
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/OsUtils.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/OsUtils.java
@ -1,25 +1,26 @@
 package com.knecon.fforesight.service.ocr.processor.utils;

-import org.apache.commons.lang3.StringUtils;
+import java.util.Locale;

 import lombok.experimental.UtilityClass;

@UtilityClass
 public final class OsUtils {

-    private static final String SERVICE_NAME = "azure-ocr-service";
-
-
    private static boolean isWindows() {

-        return StringUtils.containsIgnoreCase(System.getProperty("os.name"), "Windows");
+        String osName = System.getProperty("os.name");
+        if (osName == null) {
+            return false;
+        }
+        return osName.toLowerCase(Locale.ENGLISH).contains("windows");
    }


    public static String getTemporaryDirectory() {

        String tmpdir = System.getProperty("java.io.tmpdir");
-        if (isWindows() && StringUtils.isNotBlank(tmpdir)) {
+        if (isWindows() && !tmpdir.isBlank()) {
            return tmpdir;
        }
        return "/tmp";
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/StringCleaningUtility.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/StringCleaningUtility.java
@ -0,0 +1,40 @@
+package com.knecon.fforesight.service.ocr.processor.utils;
+
+import java.util.regex.Pattern;
+
+import lombok.experimental.UtilityClass;
+
+@UtilityClass
+public class StringCleaningUtility {
+
+    public static final Pattern hyphenLineBreaks = Pattern.compile("[-~‐‒⁻−﹣゠⁓‑\\u00AD][\\r\\n]+");
+    public static final Pattern linebreaks = Pattern.compile("[\\r\\n]+");
+    public static final Pattern doubleWhitespaces = Pattern.compile("\\s{2,}");
+
+
+    public static String cleanString(String value) {
+
+        String noHyphenLinebreaks = removeHyphenLinebreaks(value);
+        String noLinebreaks = removeLinebreaks(noHyphenLinebreaks);
+        return removeMultipleWhitespaces(noLinebreaks);
+    }
+
+
+    private String removeHyphenLinebreaks(String value) {
+
+        return hyphenLineBreaks.matcher(value).replaceAll("");
+    }
+
+
+    private String removeMultipleWhitespaces(String value) {
+
+        return doubleWhitespaces.matcher(value).replaceAll(" ");
+    }
+
+
+    private String removeLinebreaks(String value) {
+
+        return linebreaks.matcher(value).replaceAll(" ");
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/AnalyzeResultMapper.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/AnalyzeResultMapper.java
@ -1,14 +1,14 @@
 package com.knecon.fforesight.service.ocr.processor.visualizations;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
-import com.knecon.fforesight.service.ocr.v1.api.model.AzureAnalyzeResult;
+import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;

 import lombok.experimental.UtilityClass;

@UtilityClass
 public class AnalyzeResultMapper {

-    public AzureAnalyzeResult map(AnalyzeResult analyzeResult) {
+    public IdpResult map(AnalyzeResult analyzeResult) {

        return null;
    }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/FontStyler.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/FontStyler.java
@ -0,0 +1,23 @@
+package com.knecon.fforesight.service.ocr.processor.visualizations;
+
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+import com.azure.ai.documentintelligence.models.DocumentSpan;
+import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
+
+public class FontStyler {
+
+
+    public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
+
+        public static Lookups empty() {
+
+            return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()));
+        }
+
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResult.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResult.java
@ -20,6 +20,7 @@ import lombok.experimental.FieldDefaults;
 public final class WritableOcrResult {

    int pageNumber;
+    double angle;
    @Builder.Default
    List<TextPositionInImage> textPositionInImage = Collections.emptyList();
    @Builder.Default
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResultFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResultFactory.java
@ -1,367 +0,0 @@
-package com.knecon.fforesight.service.ocr.processor.visualizations;
-
-import java.awt.geom.AffineTransform;
-import java.awt.geom.Line2D;
-import java.awt.geom.Rectangle2D;
-import java.nio.IntBuffer;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.azure.ai.documentintelligence.models.AnalyzeResult;
-import com.azure.ai.documentintelligence.models.BoundingRegion;
-import com.azure.ai.documentintelligence.models.DocumentPage;
-import com.azure.ai.documentintelligence.models.DocumentSpan;
-import com.azure.ai.documentintelligence.models.DocumentStyle;
-import com.azure.ai.documentintelligence.models.DocumentTable;
-import com.azure.ai.documentintelligence.models.DocumentTableCell;
-import com.azure.ai.documentintelligence.models.DocumentWord;
-import com.azure.ai.documentintelligence.models.FontWeight;
-import com.google.common.base.Functions;
-import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
-import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
-import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
-import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
-import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
-import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
-import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
-import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.Type0FontMetricsProvider;
-import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
-
-import lombok.AccessLevel;
-import lombok.Getter;
-import lombok.SneakyThrows;
-import lombok.experimental.FieldDefaults;
-import net.sourceforge.lept4j.Box;
-import net.sourceforge.lept4j.Leptonica1;
-import net.sourceforge.lept4j.Pix;
-import net.sourceforge.lept4j.util.LeptUtils;
-
-@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
-public class WritableOcrResultFactory {
-
-    FontMetricsProvider regularFont = Type0FontMetricsProvider.REGULAR_INSTANCE;
-    FontMetricsProvider boldFont = Type0FontMetricsProvider.BOLD_INSTANCE;
-    FontMetricsProvider italicFont = Type0FontMetricsProvider.ITALIC_INSTANCE;
-    FontMetricsProvider boldItalicFont = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
-
-    @Getter
-    Map<Integer, AffineTransform> pageCtms;
-    Map<Integer, PageInformation> pageInformation;
-    OcrServiceSettings settings;
-    ImageProcessingSupervisor imageSupervisor;
-
-
-    @SneakyThrows
-    public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation, OcrServiceSettings settings, ImageProcessingSupervisor imageSupervisor) {
-
-        this.pageInformation = pageInformation;
-        pageCtms = Collections.synchronizedMap(new HashMap<>());
-        this.settings = settings;
-        this.imageSupervisor = imageSupervisor;
-    }
-
-
-    public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch pageOffset) throws InterruptedException {
-
-        List<WritableOcrResult> writableOcrResultList = new ArrayList<>();
-
-        Lookups lookups = getLookups(analyzeResult);
-
-        for (DocumentPage resultPage : analyzeResult.getPages()) {
-
-            PageInformation pageInformation = getPageInformation(getPageNumber(pageOffset, resultPage));
-            AffineTransform pageCtm = getPageCTM(pageInformation, resultPage.getWidth());
-            pageCtms.put(getPageNumber(pageOffset, resultPage), pageCtm);
-
-            List<TextPositionInImage> words = buildTextPositionsInImage(pageOffset, resultPage, pageCtm, lookups, pageInformation);
-
-            var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words);
-
-            if (settings.isTableDetection()) {
-                builder.tableLines(getTableLines(analyzeResult, pageInformation, pageCtm));
-            }
-
-            writableOcrResultList.add(builder.build());
-
-        }
-        return writableOcrResultList;
-    }
-
-
-    private List<TextPositionInImage> buildTextPositionsInImage(PageBatch pageOffset,
-                                                                DocumentPage resultPage,
-                                                                AffineTransform pageCtm,
-                                                                Lookups lookups,
-                                                                PageInformation pageInformation) throws InterruptedException {
-
-        if (!settings.isFontStyleDetection()) {
-            return buildText(resultPage, pageCtm, lookups, pageInformation);
-        }
-
-        ImageFile imageFile = imageSupervisor.awaitProcessedPage(getPageNumber(pageOffset, resultPage));
-
-        if (imageFile == null) {
-            return buildText(resultPage, pageCtm, lookups, pageInformation);
-        }
-
-        synchronized (ImageProcessingSupervisor.class) {
-            return buildTextWithBoldDetection(resultPage, pageCtm, pageInformation, imageFile);
-        }
-    }
-
-
-    private static List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage, AffineTransform pageCtm, PageInformation pageInformation, ImageFile imageFile) {
-        // Leptonica is not thread safe, but is being called in ImageProcessingService as well
-        Pix pageImage = imageFile.readPix();
-        List<TextPositionInImage> words = new ArrayList<>();
-
-        try (FontStyleDetector fontStyleDetector = new FontStyleDetector()) {
-
-            AffineTransform imageTransform = new AffineTransform();
-            double scalingFactor = pageImage.w / resultPage.getWidth();
-            imageTransform.scale(scalingFactor, scalingFactor);
-
-            for (DocumentWord word : resultPage.getWords()) {
-
-                TextPositionInImage textPosition = new TextPositionInImage(word, pageCtm, Type0FontMetricsProvider.REGULAR_INSTANCE, FontStyle.REGULAR);
-
-                if (intersectsIgnoreZone(pageInformation.wordBBoxes(), textPosition)) {
-                    textPosition.setOverlapsIgnoreZone(true);
-                }
-
-                Pix wordImage = extractWordImage(word, imageTransform, pageImage);
-
-                IntBuffer pixelCount = IntBuffer.allocate(1);
-                Leptonica1.pixCountPixels(wordImage, pixelCount, null);
-
-                if (pixelCount.get(0) > 3) {
-                    fontStyleDetector.add(textPosition, wordImage, textPosition.getFontSizeByHeight());
-                }
-
-                words.add(textPosition);
-            }
-
-            fontStyleDetector.classifyWords();
-
-        } finally {
-            LeptUtils.disposePix(pageImage);
-        }
-
-        return words;
-    }
-
-
-    private static Pix extractWordImage(DocumentWord word, AffineTransform imageTransform, Pix pageImage) {
-
-        Rectangle2D wordBBox = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(imageTransform).getBounds2D();
-        Box box = new Box((int) wordBBox.getX(), (int) wordBBox.getY(), (int) wordBBox.getWidth(), (int) wordBBox.getHeight(), 1);
-        Pix wordImage = Leptonica1.pixClipRectangle(pageImage, box, null);
-        box.clear();
-        return wordImage;
-    }
-
-
-    private List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
-
-        return resultPage.getWords()
-                .stream()
-                .map(word -> buildTextPositionInImage(word, pageCtm, lookups))
-                .map(textPositionInImage -> markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes()))
-                .collect(Collectors.toList());
-    }
-
-
-    private static int getPageNumber(PageBatch pageOffset, DocumentPage resultPage) {
-
-        return pageOffset.getPageNumber(resultPage.getPageNumber());
-    }
-
-
-    private static Lookups getLookups(AnalyzeResult analyzeResult) {
-
-        if (analyzeResult.getStyles() == null || analyzeResult.getStyles().isEmpty()) {
-            return Lookups.empty();
-        }
-
-        SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(analyzeResult.getStyles()
-                                                                       .stream()
-                                                                       .filter(style -> Objects.equals(style.getFontWeight(), FontWeight.BOLD))
-                                                                       .map(DocumentStyle::getSpans)
-                                                                       .flatMap(Collection::stream), Function.identity());
-
-        SpanLookup<DocumentSpan> italicLookup = new SpanLookup<>(analyzeResult.getStyles()
-                                                                         .stream()
-                                                                         .filter(style -> Objects.equals(style.getFontStyle(),
-                                                                                                         com.azure.ai.documentintelligence.models.FontStyle.ITALIC))
-                                                                         .map(DocumentStyle::getSpans)
-                                                                         .flatMap(Collection::stream), Functions.identity());
-
-        SpanLookup<DocumentSpan> handWrittenLookup = new SpanLookup<>(analyzeResult.getStyles()
-                                                                              .stream()
-                                                                              .filter(documentStyle -> documentStyle.isHandwritten() != null && documentStyle.isHandwritten())
-                                                                              .map(DocumentStyle::getSpans)
-                                                                              .flatMap(Collection::stream), Functions.identity());
-
-        return new Lookups(boldLookup, italicLookup, handWrittenLookup);
-
-    }
-
-
-    private TextPositionInImage buildTextPositionInImage(DocumentWord dw, AffineTransform imageCTM, Lookups lookups) {
-
-        boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
-        boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
-        boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
-
-        FontStyle fontStyle;
-        FontMetricsProvider font;
-        if (handwritten) {
-            fontStyle = FontStyle.HANDWRITTEN;
-            font = regularFont;
-        } else if (italic && bold) {
-            fontStyle = FontStyle.BOLD_ITALIC;
-            font = boldItalicFont;
-        } else if (bold) {
-            fontStyle = FontStyle.BOLD;
-            font = boldFont;
-        } else if (italic) {
-            fontStyle = FontStyle.ITALIC;
-            font = italicFont;
-        } else {
-            fontStyle = FontStyle.REGULAR;
-            font = regularFont;
-        }
-
-        return new TextPositionInImage(dw, imageCTM, font, fontStyle);
-    }
-
-
-    private static List<Line2D> getTableLines(AnalyzeResult analyzeResult, PageInformation pageInformation, AffineTransform imageCTM) {
-
-        if (analyzeResult.getTables() == null || analyzeResult.getTables().isEmpty()) {
-            return Collections.emptyList();
-        }
-
-        return analyzeResult.getTables()
-                .stream()
-                .map(DocumentTable::getCells)
-                .flatMap(Collection::stream)
-                .map(DocumentTableCell::getBoundingRegions)
-                .flatMap(Collection::stream)
-                .filter(table -> table.getPageNumber() == pageInformation.number())
-                .map(BoundingRegion::getPolygon)
-                .map(QuadPoint::fromPolygons)
-                .map(qp -> qp.getTransformed(imageCTM))
-                .flatMap(QuadPoint::asLines)
-                .toList();
-    }
-
-
-    private static TextPositionInImage markTextOverlappingIgnoreZone(TextPositionInImage textPositionInImage, List<Rectangle2D> ignoreZones) {
-
-        if (intersectsIgnoreZone(ignoreZones, textPositionInImage)) {
-            textPositionInImage.setOverlapsIgnoreZone(true);
-        }
-
-        return textPositionInImage;
-    }
-
-
-    private static boolean intersectsIgnoreZone(List<Rectangle2D> ignoreZones, TextPositionInImage textPositionInImage) {
-
-        for (Rectangle2D ignoreZone : ignoreZones) {
-            Rectangle2D textBBox = textPositionInImage.getTransformedTextBBox().getBounds2D();
-            if (textBBox.intersects(ignoreZone)) {
-                double intersectedArea = calculateIntersectedArea(textBBox, ignoreZone);
-                double textArea = textBBox.getWidth() * textBBox.getHeight();
-                if (intersectedArea / textArea > 0.5) {
-                    return true;
-                }
-                double ignoreZoneArea = ignoreZone.getWidth() * ignoreZone.getHeight();
-                if (intersectedArea / ignoreZoneArea > 0.5) {
-                    return true;
-                }
-            }
-        }
-        return false;
-    }
-
-
-    public static double calculateIntersectedArea(Rectangle2D r1, Rectangle2D r2) {
-
-        double xOverlap = Math.max(0, Math.min(r1.getMaxX(), r2.getMaxX()) - Math.max(r1.getMinX(), r2.getMinX()));
-        double yOverlap = Math.max(0, Math.min(r1.getMaxY(), r2.getMaxY()) - Math.max(r1.getY(), r2.getY()));
-
-        return xOverlap * yOverlap;
-    }
-
-
-    public static AffineTransform getPageCTM(PageInformation pageInformation, double imageWidth) {
-
-        double scalingFactor = calculateScalingFactor(imageWidth, pageInformation);
-        AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
-
-        AffineTransform mirrorMatrix = new AffineTransform(1, 0, 0, -1, 0, pageInformation.height());
-
-        AffineTransform rotationMatrix = switch (pageInformation.rotationDegrees()) {
-            case 90 -> new AffineTransform(0, 1, -1, 0, pageInformation.height(), 0);
-            case 180 -> new AffineTransform(-1, 0, 0, -1, pageInformation.width(), pageInformation.height());
-            case 270 -> new AffineTransform(0, -1, 1, 0, pageInformation.width() - pageInformation.height(), pageInformation.height());
-            default -> new AffineTransform();
-        };
-
-        // matrix multiplication is performed from right to left, so the order is reversed.
-        // scaling -> mirror -> rotation
-        AffineTransform resultMatrix = new AffineTransform();
-
-        resultMatrix.concatenate(rotationMatrix);
-        resultMatrix.concatenate(mirrorMatrix);
-        resultMatrix.concatenate(imageToCropBoxScaling);
-        return resultMatrix;
-    }
-
-
-    private static double calculateScalingFactor(double width, PageInformation pageInformation) {
-
-        // PDFBox always returns page height and width based on rotation
-        double pageWidth;
-        if (pageInformation.rotationDegrees() == 90 || pageInformation.rotationDegrees() == 270) {
-            pageWidth = pageInformation.height();
-        } else {
-            pageWidth = pageInformation.width();
-        }
-
-        return pageWidth / width;
-    }
-
-
-    @SneakyThrows
-    private PageInformation getPageInformation(Integer pageNumber) {
-
-        return pageInformation.get(pageNumber);
-    }
-
-
-    private record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
-
-        public static Lookups empty() {
-
-            return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
-                               new SpanLookup<>(Stream.empty(), Function.identity()),
-                               new SpanLookup<>(Stream.empty(), Function.identity()));
-        }
-
-    }
-
-}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/fonts/FontMetricsProvider.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/fonts/FontMetricsProvider.java
@ -8,7 +8,7 @@ import lombok.SneakyThrows;

 public interface FontMetricsProvider extends EmbeddableFont {

-    default FontMetrics calculateMetrics(String text, double textWidth, double textHeight) {
+    default FontMetrics calculateMetricsForAzureBBox(String text, double textWidth, double textHeight) {

        HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
        float fontSize = calculateFontSize(text, textWidth);
@ -18,6 +18,16 @@ public interface FontMetricsProvider extends EmbeddableFont {
    }


+    default FontMetrics calculateMetricsForTightBBox(String text, double textWidth, double textHeight) {
+
+        HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
+        float fontSize = calculateFontSize(text, textWidth);
+        float heightScaling = (float) ((textHeight / (heightAndDescent.height() - heightAndDescent.descent())) * 1000) / fontSize;
+
+        return new FontMetrics((heightAndDescent.descent() / 1000) * fontSize, fontSize, heightScaling);
+    }
+
+
    @SneakyThrows
    default float calculateFontSize(String text, double textWidth) {

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
@ -12,8 +12,6 @@ import com.azure.ai.documentintelligence.models.DocumentBarcode;
 import com.azure.ai.documentintelligence.models.DocumentFigure;
 import com.azure.ai.documentintelligence.models.DocumentKeyValuePair;
 import com.azure.ai.documentintelligence.models.DocumentLine;
-import com.azure.ai.documentintelligence.models.DocumentList;
-import com.azure.ai.documentintelligence.models.DocumentListItem;
 import com.azure.ai.documentintelligence.models.DocumentParagraph;
 import com.azure.ai.documentintelligence.models.DocumentSection;
 import com.azure.ai.documentintelligence.models.DocumentTable;
@ -23,8 +21,8 @@ import com.azure.ai.documentintelligence.models.DocumentWord;
 import com.azure.ai.documentintelligence.models.ParagraphRole;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
-import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
 import com.knecon.fforesight.service.ocr.processor.visualizations.utils.LineUtils;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
 import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
 import com.knecon.fforesight.service.viewerdoc.layers.IdpLayerConfig;
 import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
@ -42,7 +40,7 @@ import lombok.experimental.FieldDefaults;
 public class IdpLayer extends IdpLayerConfig {

    public static final int LINE_WIDTH = 1;
-    private Map<Integer, AffineTransform> pageCtms;
+    private Map<Integer, AffineTransform> resultToPageTransform;


    public void addSection(int pageNumber, DocumentSection section, SpanLookup<DocumentWord> wordsOnPage) {
@ -65,15 +63,7 @@ public class IdpLayer extends IdpLayerConfig {

        var sectionsOnPage = getOrCreateVisualizationsOnPage(pageNumber, vis);

-        sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(pageCtms.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
-    }
-
-
-    public void addList(DocumentList list, PageBatch pageOffset) {
-
-        for (DocumentListItem item : list.getItems()) {
-            addBoundingRegion(item.getBoundingRegions(), lists, PARAGRAPH_COLOR, pageOffset);
-        }
+        sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(resultToPageTransform.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
    }


@ -85,19 +75,27 @@ public class IdpLayer extends IdpLayerConfig {

    public void addKeyValue(DocumentKeyValuePair keyValue, PageBatch pageOffset) {

+        if (keyValue.getKey() == null || keyValue.getKey().getContent().isEmpty()) {
+            return;
+        }
        addBoundingRegion(keyValue.getKey().getBoundingRegions(), keyValuePairs, KEY_COLOR, pageOffset);
-        if (keyValue.getValue() != null) {
+        if (keyValue.getValue() != null && !keyValue.getValue().getContent().isEmpty()) {
            addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);

-            if (keyValue.getKey().getBoundingRegions().get(0).getPageNumber() != keyValue.getValue().getBoundingRegions().get(0).getPageNumber()) {
+            if (keyValue.getKey().getBoundingRegions()
+                        .get(0).getPageNumber() != keyValue.getValue().getBoundingRegions()
+                        .get(0).getPageNumber()) {
                return;
            }
-            int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions().get(0).getPageNumber());
-            QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions().get(0).getPolygon());
-            QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions().get(0).getPolygon());
+            int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions()
+                                                                        .get(0).getPageNumber());
+            QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions()
+                                                           .get(0).getPolygon());
+            QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions()
+                                                             .get(0).getPolygon());

            var line = LineUtils.findClosestMidpointLine(key, value);
-            line = LineUtils.transform(line, pageCtms.get(pageNumberWithOffset));
+            line = LineUtils.transform(line, resultToPageTransform.get(pageNumberWithOffset));
            var arrowHead = LineUtils.createArrowHead(line, Math.min(LineUtils.length(line), 5));
            var linesOnPage = getOrCreateVisualizationsOnPage(pageNumberWithOffset, keyValuePairs).getColoredLines();
            linesOnPage.add(new ColoredLine(line, KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
@ -142,7 +140,7 @@ public class IdpLayer extends IdpLayerConfig {
    private void addPolygon(int pageNumber, List<Double> polygon, Visualizations visualizations, Color color) {

        VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, visualizations);
-        visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(pageCtms.get(pageNumber)), color));
+        visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(resultToPageTransform.get(pageNumber)), color));
    }


@ -181,7 +179,8 @@ public class IdpLayer extends IdpLayerConfig {

                var vis = getOrCreateVisualizationsOnPage(pageOffset.getPageNumber(boundingRegion.getPageNumber()), tables);

-                QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(pageCtms.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
+                QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon())
+                        .getTransformed(resultToPageTransform.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));

                vis.getFilledRectangles().add(new FilledRectangle(qp.getBounds2D(), TITLE_COLOR, 0.2f));

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayerFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayerFactory.java
@ -20,9 +20,9 @@ public class IdpLayerFactory {
    private final IdpLayer idpLayer;


-    IdpLayerFactory(Map<Integer, AffineTransform> pageCtms) {
+    IdpLayerFactory(Map<Integer, AffineTransform> resultToPageTransform) {

-        this.idpLayer = new IdpLayer(pageCtms);
+        this.idpLayer = new IdpLayer(resultToPageTransform);
    }


@ -65,10 +65,6 @@ public class IdpLayerFactory {
            analyzeResult.getTables()
                    .forEach(documentTable -> idpLayer.addTable(documentTable, pageOffset));
        }
-        if (analyzeResult.getLists() != null) {
-            analyzeResult.getLists()
-                    .forEach(list -> idpLayer.addList(list, pageOffset));
-        }
        if (analyzeResult.getKeyValuePairs() != null) {
            analyzeResult.getKeyValuePairs()
                    .forEach(keyValue -> idpLayer.addKeyValue(keyValue, pageOffset));
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java
@ -0,0 +1,241 @@
+package com.knecon.fforesight.service.ocr.processor.visualizations.layers;
+
+import static com.knecon.fforesight.service.ocr.processor.utils.StringCleaningUtility.cleanString;
+
+import java.awt.geom.AffineTransform;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.ai.documentintelligence.models.BoundingRegion;
+import com.azure.ai.documentintelligence.models.DocumentCaption;
+import com.azure.ai.documentintelligence.models.DocumentFigure;
+import com.azure.ai.documentintelligence.models.DocumentFootnote;
+import com.azure.ai.documentintelligence.models.DocumentKeyValuePair;
+import com.azure.ai.documentintelligence.models.DocumentTable;
+import com.azure.ai.documentintelligence.models.DocumentTableCell;
+import com.knecon.fforesight.service.ocr.processor.model.DocumentSpanLookup;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
+import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
+import com.knecon.fforesight.service.ocr.v1.api.model.Figure;
+import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
+import com.knecon.fforesight.service.ocr.v1.api.model.KeyValuePair;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+import com.knecon.fforesight.service.ocr.v1.api.model.Region;
+import com.knecon.fforesight.service.ocr.v1.api.model.Table;
+import com.knecon.fforesight.service.ocr.v1.api.model.TableCell;
+import com.knecon.fforesight.service.ocr.v1.api.model.TableCellType;
+import com.knecon.fforesight.service.ocr.v1.api.model.TextRegion;
+
+import lombok.AccessLevel;
+import lombok.Getter;
+import lombok.experimental.FieldDefaults;
+
+@Getter
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class IdpResultFactory {
+
+    IdpResult idpResult;
+    Map<Integer, AffineTransform> resultToPageTransforms;
+    Map<Integer, PageInformation> pageInformation;
+    Map<Integer, Double> angles;
+    boolean rotationCorrection;
+
+
+    public IdpResultFactory(Map<Integer, AffineTransform> resultToPageTransforms,
+                            Map<Integer, PageInformation> pageInformation,
+                            Map<Integer, Double> angles,
+                            Set<AzureOcrFeature> features) {
+
+        this.angles = angles;
+
+        this.rotationCorrection = features.contains(AzureOcrFeature.ROTATION_CORRECTION);
+        this.resultToPageTransforms = resultToPageTransforms;
+        this.pageInformation = pageInformation;
+        this.idpResult = IdpResult.initSynchronized();
+    }
+
+
+    public AffineTransform getResultToPageTransform(Integer pageNumber) {
+
+        AffineTransform transform;
+        if (rotationCorrection) {
+            PageInformation page = pageInformation.get(pageNumber);
+            transform = RotationCorrectionUtility.buildTransform(-angles.get(pageNumber), page.cropBox().getWidth(), page.cropBox().getHeight(), false);
+        } else {
+            transform = new AffineTransform();
+        }
+        transform.concatenate(resultToPageTransforms.get(pageNumber));
+        return transform;
+    }
+
+
+    public void addAnalyzeResult(AnalyzeResult analyzeResult, PageBatch batch) {
+
+        DocumentSpanLookup words = new DocumentSpanLookup(analyzeResult);
+        if (analyzeResult.getTables() != null) {
+            analyzeResult.getTables()
+                    .forEach(documentTable -> addTable(documentTable, words, batch));
+        }
+        if (analyzeResult.getKeyValuePairs() != null) {
+            analyzeResult.getKeyValuePairs()
+                    .forEach(documentKeyValuePair -> addKeyValuePair(documentKeyValuePair, batch));
+        }
+        if (analyzeResult.getFigures() != null) {
+            analyzeResult.getFigures()
+                    .forEach(documentFigure -> addFigure(documentFigure, batch, words));
+        }
+    }
+
+
+    private void addFigure(DocumentFigure documentFigure, PageBatch batch, DocumentSpanLookup words) {
+
+        List<TextRegion> footNotes = new LinkedList<>();
+        if (documentFigure.getFootnotes() != null) {
+            documentFigure.getFootnotes()
+                    .stream()
+                    .map(footNote -> toTextRegion(footNote, batch))
+                    .filter(Objects::nonNull)
+                    .forEach(footNotes::add);
+        }
+        int batchPageNumber = documentFigure.getBoundingRegions()
+                .get(0).getPageNumber();
+        Region bbox = toRegionFromRegions(batch.getPageNumber(batchPageNumber), documentFigure.getBoundingRegions());
+        TextRegion caption = toTextRegion(documentFigure.getCaption(), batch);
+        idpResult.figures().add(new Figure(caption, bbox, footNotes));
+
+    }
+
+
+    private void addKeyValuePair(DocumentKeyValuePair documentKeyValuePair, PageBatch batch) {
+
+        TextRegion key = null;
+        if (documentKeyValuePair.getKey() != null && !documentKeyValuePair.getKey().getContent().isEmpty()) {
+            Region region = toRegionFromRegions(batch, documentKeyValuePair.getKey().getBoundingRegions());
+            key = new TextRegion(region, cleanString(documentKeyValuePair.getKey().getContent()));
+        }
+        TextRegion value = null;
+        if (documentKeyValuePair.getValue() != null && !documentKeyValuePair.getValue().getContent().isEmpty()) {
+            Region region = toRegionFromRegions(batch, documentKeyValuePair.getValue().getBoundingRegions());
+            value = new TextRegion(region, cleanString(documentKeyValuePair.getValue().getContent()));
+        }
+
+        idpResult.keyValuePairs().add(new KeyValuePair(key, value));
+    }
+
+
+    private void addTable(DocumentTable documentTable, DocumentSpanLookup words, PageBatch batch) {
+
+        TextRegion caption = toTextRegion(documentTable.getCaption(), batch);
+        List<TableCell> tableCells = documentTable.getCells()
+                .stream()
+                .map(documentTableCell -> toTableCell(documentTableCell, words, batch))
+                .toList();
+        List<TextRegion> footNotes = new LinkedList<>();
+
+        if (documentTable.getFootnotes() != null) {
+            documentTable.getFootnotes()
+                    .stream()
+                    .map(footNote -> toTextRegion(footNote, batch))
+                    .filter(Objects::nonNull)
+                    .forEach(footNotes::add);
+        }
+        List<Region> bbox = documentTable.getBoundingRegions()
+                .stream()
+                .map(b -> toRegion(b, batch))
+                .toList();
+        Table table = new Table(caption, documentTable.getColumnCount(), documentTable.getRowCount(), tableCells, footNotes, bbox);
+        idpResult.tables().add(table);
+    }
+
+
+    private TextRegion toTextRegion(DocumentFootnote footNote, PageBatch batch) {
+
+        if (footNote == null || footNote.getBoundingRegions().isEmpty()) {
+            return null;
+        }
+
+        Region region = toRegionFromRegions(batch, footNote.getBoundingRegions());
+        return new TextRegion(region, cleanString(footNote.getContent()));
+    }
+
+
+    private TextRegion toTextRegion(DocumentCaption caption, PageBatch batch) {
+
+        if (caption == null || caption.getBoundingRegions().isEmpty()) {
+            return null;
+        }
+
+        Region region = toRegionFromRegions(batch, caption.getBoundingRegions());
+        return new TextRegion(region, cleanString(caption.getContent()));
+    }
+
+
+    private TableCell toTableCell(DocumentTableCell documentTableCell, DocumentSpanLookup words, PageBatch batch) {
+
+        int batchPageNumber = documentTableCell.getBoundingRegions()
+                .get(0).getPageNumber();
+        Region region = toRegionFromRegions(batch.getPageNumber(batchPageNumber), documentTableCell.getBoundingRegions());
+        TableCellType kind = mapTableCellType(documentTableCell);
+        return new TableCell(new TextRegion(region, cleanString(documentTableCell.getContent())), documentTableCell.getRowIndex(), documentTableCell.getColumnIndex(), kind);
+    }
+
+
+    private static TableCellType mapTableCellType(DocumentTableCell documentTableCell) {
+
+        if (documentTableCell.getKind() == null) {
+            return TableCellType.CONTENT;
+        }
+        return switch (documentTableCell.getKind().toString()) {
+            case "columnHeader" -> TableCellType.COLUMN_HEADER;
+            case "rowHeader" -> TableCellType.ROW_HEADER;
+            case "description" -> TableCellType.DESCRIPTION;
+            case "stubHead" -> TableCellType.STUB_HEAD;
+            default -> TableCellType.CONTENT;
+        };
+    }
+
+
+    private Region toRegion(BoundingRegion boundingRegion, PageBatch batch) {
+
+        int pageNumber = batch.getPageNumber(boundingRegion.getPageNumber());
+        QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(getResultToPageTransform(pageNumber));
+        return new Region(pageNumber, qp.toData());
+    }
+
+
+    private Region toRegionFromRegions(int pageNumber, List<BoundingRegion> regions) {
+
+        if (regions.size() == 1) {
+            return new Region(pageNumber, QuadPoint.fromPolygons(regions.get(0).getPolygon()).getTransformed(getResultToPageTransform(pageNumber)).toData());
+        }
+        QuadPoint bbox = QuadPoint.fromRectangle2D(regions.stream()
+                                                           .map(BoundingRegion::getPolygon)
+                                                           .map(QuadPoint::fromPolygons)
+                                                           .map(qp -> qp.getTransformed(getResultToPageTransform(pageNumber)).getBounds2D())
+                                                           .collect(new Rectangle2DBBoxCollector()));
+
+        return new Region(pageNumber, bbox.toData());
+    }
+
+
+    private Region toRegionFromRegions(PageBatch batch, List<BoundingRegion> regions) {
+
+        assert !regions.isEmpty();
+        int batchPageNumber = regions.get(0).getPageNumber();
+        if (!regions.stream()
+                .map(BoundingRegion::getPageNumber)
+                .allMatch(number -> number == batchPageNumber)) {
+            throw new AssertionError();
+        }
+        int pageNumber = batch.getPageNumber(batchPageNumber);
+        return toRegionFromRegions(pageNumber, regions);
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
@ -1,57 +1,77 @@
 package com.knecon.fforesight.service.ocr.processor.visualizations.layers;

+import java.util.Collections;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
 import com.knecon.fforesight.service.ocr.processor.service.OcrExecutionSupervisor;
-import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.OcrResultPostProcessingPipeline;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
-import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
+import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;

 import lombok.AccessLevel;
 import lombok.experimental.FieldDefaults;
+import lombok.extern.slf4j.Slf4j;

+@Slf4j
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class LayerFactory {

    OcrExecutionSupervisor supervisor;
-    WritableOcrResultFactory writableOcrResultFactory;
+    OcrResultPostProcessingPipeline ocrResultPostProcessingPipeline;
    IdpLayerFactory idpLayerFactory;
    OcrDebugLayerFactory ocrDebugLayerFactory;
    OcrTextLayerFactory ocrTextLayerFactory;
+    IdpResultFactory idpResultFactory;
    OcrServiceSettings settings;
+    Set<AzureOcrFeature> features;
+    Map<Integer, Double> angles;


-    public LayerFactory(OcrServiceSettings settings, OcrExecutionSupervisor supervisor, ImageProcessingSupervisor imageSupervisor, Map<Integer, PageInformation> pageInformation) {
+    public LayerFactory(OcrServiceSettings settings,
+                        Set<AzureOcrFeature> features,
+                        OcrExecutionSupervisor supervisor,
+                        Map<Integer, PageInformation> pageInformation,
+                        ImageProcessingPipeline imageProcessingPipeline) {

-        this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, settings, imageSupervisor);
-        this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getPageCtms());
+        this.ocrResultPostProcessingPipeline = new OcrResultPostProcessingPipeline(pageInformation, imageProcessingPipeline, settings, features);
+        this.idpLayerFactory = new IdpLayerFactory(ocrResultPostProcessingPipeline.getResultToPageTransforms());
        this.ocrDebugLayerFactory = new OcrDebugLayerFactory();
        this.ocrTextLayerFactory = new OcrTextLayerFactory();
        this.settings = settings;
+        this.features = features;
        this.supervisor = supervisor;
+        this.angles = Collections.synchronizedMap(new HashMap<>());
+        this.idpResultFactory = new IdpResultFactory(ocrResultPostProcessingPipeline.getResultToPageTransforms(), pageInformation, angles, features);
    }


-    public void addAnalyzeResult(PageBatch pageRange, AnalyzeResult analyzeResult) throws InterruptedException {
+    public void processAnalyzeResult(PageBatch batch, AnalyzeResult analyzeResult) throws InterruptedException {
+
+        List<WritableOcrResult> results = ocrResultPostProcessingPipeline.processAnalyzeResult(analyzeResult, batch);
+        results.forEach(result -> angles.put(result.getPageNumber(), result.getAngle()));

-        List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, pageRange);
        ocrTextLayerFactory.addWritableOcrResult(results);

        if (settings.isDebug()) {
            ocrDebugLayerFactory.addAnalysisResult(results);
        }
-        if (settings.isIdpEnabled()) {
-            idpLayerFactory.addAnalyzeResult(analyzeResult, pageRange);
+        if (features.contains(AzureOcrFeature.IDP)) {
+            idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
+            idpResultFactory.addAnalyzeResult(analyzeResult, batch);
        }

-        this.supervisor.finishMappingResult(pageRange);
+        this.supervisor.finishMappingResult(batch);
    }


@ -64,10 +84,11 @@ public class LayerFactory {
        if (settings.isDebug()) {
            debugLayers.add(ocrDebugLayerFactory.getOcrDebugLayer());
        }
-        if (settings.isIdpEnabled()) {
+        if (features.contains(AzureOcrFeature.IDP)) {
            debugLayers.add(idpLayerFactory.getIdpLayer());
        }
-        return new OcrResult(List.of(ocrTextLayer), debugLayers);
+        IdpResult idpResult = features.contains(AzureOcrFeature.IDP) ? idpResultFactory.getIdpResult() : null;
+        return new OcrResult(List.of(ocrTextLayer), debugLayers, angles, idpResult);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrDebugLayer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrDebugLayer.java
@ -31,7 +31,7 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
                                    word.getFontMetricsProvider(),
                                    Optional.of(word.getTextMatrix()),
                                    Optional.of(RenderingMode.FILL)));
-        bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox()));
+        bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox(), word.isSnugBBox()));
    }


@ -57,4 +57,11 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
        };
    }

+
+    @Override
+    public boolean isVisibleByDefault() {
+
+        return true;
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrResult.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrResult.java
@ -1,9 +1,11 @@
 package com.knecon.fforesight.service.ocr.processor.visualizations.layers;

 import java.util.List;
+import java.util.Map;

+import com.knecon.fforesight.service.ocr.v1.api.model.IdpResult;
 import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;

-public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers) {
+public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage, IdpResult idpResult) {

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/LineUtils.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/LineUtils.java
@ -14,12 +14,19 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class LineUtils {

-    public List<ColoredLine> quadPointAsLines(QuadPoint rect) {
+    public List<ColoredLine> quadPointAsLines(QuadPoint rect, boolean tight) {

-        return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
+        if (tight) {
+            return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.GREEN, 1));
+        }
+
+        return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.BLUE, 1),
                       new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
-                       new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
-                       new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
+                       new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.BLUE, 1),
+                       new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.BLUE, 1));
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java
@ -0,0 +1,217 @@
+package com.knecon.fforesight.service.ocr.processor.visualizations.utils;
+
+import java.awt.geom.AffineTransform;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
+import com.pdftron.common.PDFNetException;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.Page;
+import com.pdftron.sdf.Obj;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.SneakyThrows;
+import lombok.experimental.UtilityClass;
+
+@UtilityClass
+public class RotationCorrectionUtility {
+
+    public static final LayerIdentifier KNECON_ROTATION_CORRECTION = new LayerIdentifier(null, "ROTATION_CORRECTION");
+
+
+    @SneakyThrows
+    public void rotatePages(Path inputFile, Path outputFile, Map<Integer, Double> anglesPerPage) {
+
+        Path tmp = Files.createTempFile("tempDocument", ".pdf");
+        Files.copy(inputFile, tmp, StandardCopyOption.REPLACE_EXISTING);
+        try (var in = new FileInputStream(tmp.toFile()); var out = new FileOutputStream(outputFile.toFile())) {
+            rotatePages(in, out, anglesPerPage);
+        }
+        Files.deleteIfExists(tmp);
+    }
+
+
+    @SneakyThrows
+    public void rotatePages(InputStream in, OutputStream out, Map<Integer, Double> anglesPerPage) {
+
+        try (PDFDoc doc = new PDFDoc(in)) {
+            anglesPerPage.forEach((pageNumber, angle) -> rotatePage(pageNumber, doc, angle));
+            doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
+        }
+    }
+
+
+    @SneakyThrows
+    public void rotatePage(int pageNumber, PDFDoc doc, double angle) {
+
+        int quadrants = getQuadrantRotation(angle);
+        Page page = doc.getPage(pageNumber);
+        page.setRotation((quadrants + page.getRotation()) % 4);
+        double remainingAngle = getRemainingAngle(angle, quadrants);
+
+        Obj contents = page.getContents();
+        String content = buildRotationContent(remainingAngle, page);
+        Obj rotationStream = doc.createIndirectStream(content.getBytes());
+        Obj newContentsArray = doc.createIndirectArray();
+        newContentsArray.pushBack(rotationStream);
+        addPreviousContents(contents, newContentsArray);
+        String closingContent = buildClosingContent();
+        Obj closingStream = doc.createIndirectStream(closingContent.getBytes());
+        newContentsArray.pushBack(closingStream);
+        page.getSDFObj().erase("Contents");
+        page.getSDFObj().put("Contents", newContentsArray);
+    }
+
+
+    private String buildClosingContent() {
+
+        List<String> closingCommands = new LinkedList<>();
+        closingCommands.add("Q");
+        return String.join("\n", closingCommands);
+    }
+
+
+    private String buildRotationContent(double angle, Page page) throws PDFNetException {
+
+        List<String> commands = new LinkedList<>();
+
+        double scale = getScalingFactor(angle, page);
+        double x = page.getCropBox().getWidth() / 2;
+        double y = page.getCropBox().getHeight() / 2;
+        commands.add("q");
+        commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
+        commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(x, y)));
+        commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
+        commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
+        commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-x, -y)));
+        commands.add("EMC");
+        return String.join("\n", commands);
+    }
+
+
+    private void addPreviousContents(Obj contents, Obj newContentsArray) throws PDFNetException {
+
+        switch (contents.getType()) {
+            case Obj.e_array -> {
+                for (int i = 0; i < contents.size(); i++) {
+                    newContentsArray.pushBack(contents.getAt(i));
+                }
+            }
+            case Obj.e_stream -> newContentsArray.pushBack(contents);
+            default -> throw new IllegalStateException("Unexpected value: " + contents.getType());
+        }
+    }
+
+
+    public static double getScalingFactor(double angle, Page page) throws PDFNetException {
+
+        double width = page.getPageWidth();
+        double height = page.getPageHeight();
+        return getScalingFactor(angle, width, height);
+    }
+
+
+    public static double getScalingFactor(double angle, double w, double h) {
+
+        if (Math.abs(angle) < 20) {
+            return 1;
+        }
+        double sin = Math.abs(Math.sin(Math.toRadians(angle)));
+        double cos = Math.abs(Math.cos(Math.toRadians(angle)));
+        double newWidth = w * cos + h * sin;
+        double newHeight = h * cos + w * sin;
+        return Math.min(w / newWidth, h / newHeight);
+    }
+
+
+    public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight) {
+
+        return buildTransform(angle, originalWidth, originalHeight, true);
+    }
+
+
+    public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight, boolean quadrantRotation) {
+
+        int quadrants = getQuadrantRotation(angle);
+
+        double h = originalHeight;
+        double w = originalWidth;
+
+        AffineTransform quadrantRotationTransform = new AffineTransform();
+        if (quadrantRotation) {
+
+            if (quadrants == 1 || quadrants == 3) {
+                w = originalHeight;
+                h = originalWidth;
+            }
+
+            quadrantRotationTransform = switch (quadrants) {
+                case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
+                case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
+                case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
+                default -> new AffineTransform();
+            };
+        }
+
+        double remainder = getRemainingAngle(angle, quadrants);
+        double scale = getScalingFactor(remainder, w, h);
+
+        AffineTransform transform = new AffineTransform();
+        transform.translate(w / 2, h / 2);
+        transform.rotate(Math.toRadians(remainder));
+        transform.scale(scale, scale);
+        transform.translate(-w / 2, -h / 2);
+        transform.concatenate(quadrantRotationTransform);
+
+        return transform;
+    }
+
+
+    public static int getQuadrantRotation(double angle) {
+
+        double remainder = angle % 360;
+
+        if (remainder < 0) {
+            remainder += 360;
+        }
+
+        if (remainder > 315 || remainder <= 45) {
+            return 0;
+        } else if (remainder > 45 && remainder <= 135) {
+            return 1;
+        } else if (remainder > 135 && remainder <= 225) {
+            return 2;
+        } else {
+            return 3;
+        }
+    }
+
+
+    public static double getRemainingAngle(double angle, int quadrants) {
+
+        double referenceAngle = 90 * quadrants;
+        return (angle - referenceAngle) % 360;
+    }
+
+
+    public static double getRemainingAngle(double angle) {
+
+        return getRemainingAngle(angle, getQuadrantRotation(angle));
+    }
+
+
+    private String buildMatrixCommands(AffineTransform at) {
+
+        return "%f %f %f %f %f %f cm".formatted(at.getScaleX(), at.getShearX(), at.getShearY(), at.getScaleY(), at.getTranslateX(), at.getTranslateY());
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/ImageProcessingPipelineTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/ImageProcessingPipelineTest.java
@ -4,8 +4,8 @@ import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.LinkedList;
+import java.util.List;

 import org.apache.pdfbox.Loader;
 import org.junit.jupiter.api.BeforeEach;
@ -13,11 +13,16 @@ import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.springframework.core.io.ClassPathResource;

+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.GhostScriptService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
 import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.PDFNet;
 import com.sun.jna.NativeLibrary;

 import lombok.SneakyThrows;
@ -31,13 +36,11 @@ class ImageProcessingPipelineTest {
    @BeforeEach
    public void setup() {

-        System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
-        try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
-            assert leptonicaLib != null;
-        }
+        new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a", "/home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/").init();

-        ImageProcessingService imageProcessingService = new ImageProcessingService();
-        GhostScriptService ghostScriptService = new GhostScriptService();
+        OcrServiceSettings settings = new OcrServiceSettings();
+        ImageProcessingService imageProcessingService = new ImageProcessingService(settings);
+        GhostScriptService ghostScriptService = new GhostScriptService(settings);
        imageProcessingPipeline = new ImageProcessingPipeline(ghostScriptService, imageProcessingService);
    }

@ -46,7 +49,7 @@ class ImageProcessingPipelineTest {
    @SneakyThrows
    public void testImageProcessingPipeline() {

-        String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340.pdf";
+        String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340/VV-331340_OCRED_first15.pdf";

        File file;
        if (fileName.startsWith("files")) {
@ -63,21 +66,26 @@ class ImageProcessingPipelineTest {

        Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);

-        int numberOfpages;
-        try (var doc = Loader.loadPDF(file)) {
-            numberOfpages = doc.getNumberOfPages();
-        }
-        Set<Integer> pageNumbers = new HashSet<>();
-        for (int i = 1; i <= numberOfpages; i++) {
-            if (i % 2 == 0) {
-                continue;
+        try (var doc = new PDFDoc(fileName)) {
+            List<Integer> pageNumbers = new LinkedList<>();
+            for (int i = 1; i <= doc.getPageCount(); i++) {
+                if (i % 2 == 0) {
+                    continue;
+                }
+                pageNumbers.add(i);
            }
-            pageNumbers.add(i);
+            PageBatch batch = BatchFactory.create(0, doc, pageNumbers, tmpDir);
+
+            ImageProcessingSupervisor supervisor = imageProcessingPipeline.addToPipeline(batch);
+
+            batch.forEach(pageNumber -> {
+                try {
+                    assert supervisor.awaitProcessedPage(pageNumber) != null;
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            });
        }
-
-        ImageProcessingSupervisor supervisor = imageProcessingPipeline.run(pageNumbers, tmpDir.resolve("images"), documentFile.toFile());
-
-        supervisor.awaitAll();
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/PageRotationTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/PageRotationTest.java
@ -0,0 +1,70 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import static com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility.KNECON_ROTATION_CORRECTION;
+
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.viewerdoc.service.PageContentCleaner;
+import com.pdftron.pdf.ElementReader;
+import com.pdftron.pdf.ElementWriter;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.PDFNet;
+import com.pdftron.pdf.Page;
+import com.pdftron.pdf.PageIterator;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.SneakyThrows;
+
+@Disabled // leptonica is not available in build server
+public class PageRotationTest {
+
+    @BeforeAll
+    public static void setUp() {
+
+        PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
+    }
+
+
+    @Test
+    public void putRotation() {
+
+        Map<Integer, Double> angles = new HashMap<>();
+        for (int i = 1; i <= 100; i++) {
+            double a = -90 + (i * ((double) 180 / 100));
+            angles.put(i, a);
+        }
+        Path inputFile = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+        RotationCorrectionUtility.rotatePages(inputFile, Path.of("/tmp").resolve(inputFile.getFileName() + "_rotated.pdf"), angles);
+    }
+
+
+    @Test
+    @SneakyThrows
+    public void removeRotation() {
+
+        Path inputFile = Path.of("/tmp/VV-331340-first100.pdf_rotated.pdf");
+        try (var doc = new PDFDoc(inputFile.toFile()
+                                          .toString()); var reader = new ElementReader(); var writer = new ElementWriter(); PageIterator pageIterator = doc.getPageIterator()) {
+            PageContentCleaner cleaner = PageContentCleaner.builder()
+                    .reader(reader)
+                    .writer(writer)
+                    .markedContentToRemove(Set.of(KNECON_ROTATION_CORRECTION.markedContentName()))
+                    .build();
+
+            while (pageIterator.hasNext()) {
+                Page page = pageIterator.next();
+                cleaner.removeMarkedContent(page);
+            }
+            doc.save(inputFile.resolveSibling(inputFile.getFileName() + "_derotated.pdf").toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
+        }
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/SnugBoxesTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/SnugBoxesTest.java
@ -0,0 +1,232 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import java.awt.Color;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Line2D;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.json.JsonOptions;
+import com.azure.json.JsonReader;
+import com.azure.json.implementation.DefaultJsonReader;
+import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
+import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
+import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
+import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
+import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
+import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrDebugLayerFactory;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
+import com.pdftron.pdf.ColorPt;
+import com.pdftron.pdf.ColorSpace;
+import com.pdftron.pdf.Element;
+import com.pdftron.pdf.ElementBuilder;
+import com.pdftron.pdf.ElementWriter;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.Page;
+
+import lombok.SneakyThrows;
+
+@Disabled // leptonica is not available in build server
+public class SnugBoxesTest {
+
+    public static final int PAGE_NUMBER = 41;
+    public static final Path ORIGIN_FILE = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+    public static final Path TEST_FOLDER = Path.of("/tmp/OCR_TEST/").resolve(ORIGIN_FILE.getFileName());
+    public static final Path BATCH_FOLDER = TEST_FOLDER.resolve("batch_0");
+    public static final Path DESTINATION_FILE = BATCH_FOLDER.resolve("SnugBoxesTest.pdf");
+
+    PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
+
+
+    @BeforeAll
+    public static void setUp() {
+
+        new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a", "/home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/").init();
+    }
+
+
+    @Test
+    @SneakyThrows
+    public void snugBoxes() {
+
+        String filePath = ORIGIN_FILE.toFile().toString();
+        File file = new File(filePath);
+        assert file.exists();
+        ImageFile imageFile = new ImageFile(PAGE_NUMBER, file.toString());
+        AnalyzeResult result = null;
+        try (var in = new FileInputStream(BATCH_FOLDER.resolve("analyzeResult.json").toFile()); JsonReader reader = DefaultJsonReader.fromStream(in, new JsonOptions());) {
+            result = AnalyzeResult.fromJson(reader);
+        }
+
+        var resultPage = result.getPages()
+                .get(PAGE_NUMBER - 1);
+        OcrResultPostProcessingPipeline ocrResultPostProcessingPipeline = new OcrResultPostProcessingPipeline(null, null, new OcrServiceSettings(), Set.of());
+        OcrDebugLayerFactory debugLayerFactory = new OcrDebugLayerFactory();
+        InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
+        try (var in = new FileInputStream(ORIGIN_FILE.toFile()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
+            invisibleElementRemovalService.removeInvisibleElements(in, out, false);
+        }
+        PageInformation pageInformation = getPageInformation(PAGE_NUMBER, DESTINATION_FILE.toFile().toString());
+        OcrResultPostProcessingPipeline.Lookups empty = OcrResultPostProcessingPipeline.Lookups.empty();
+
+        AffineTransform pageCtm = getPageCtm(PAGE_NUMBER, filePath, resultPage.getWidth());
+//        pageCtm.preConcatenate(rotationCorrection);
+//        pageCtm.preConcatenate(quadrantTransform);
+//        Pix pageImage = imageFile.readPix();
+//        AffineTransform imageTransform = WritableOcrResultFactory.buildImageTransform(resultPage, pageImage);
+//        List<Rectangle2D> rects = new LinkedList<>();
+//        for (DocumentWord word : resultPage.getWords()) {
+//            QuadPoint quadPoint = QuadPoint.fromPolygons(word.getPolygon());
+//            Rectangle2D rect = quadPoint.getTransformed(imageTransform).getBounds2D();
+//            if (rect.getX() > 0 && rect.getY() > 0 && rect.getMaxX() < pageImage.w && rect.getMaxY() < pageImage.h) {
+//                rects.add(rect);
+//            }
+//        }
+//        Boxa boxa = createBoxaFromRectangles(rects);
+//        Pix drawedPix = Leptonica1.pixDrawBoxa(pageImage, boxa, 5, 1);
+//        Leptonica1.pixWrite("/tmp/OCR_TEST/VV-331340-first100.pdf/image_pipeline/page_" + PAGE_NUMBER + ".tiff", drawedPix, 5);
+
+//
+
+        List<TextPositionInImage> words = ocrResultPostProcessingPipeline.buildTextWithSnugBBoxes(resultPage, imageFile, pageCtm, empty, pageInformation);
+        var results = new WritableOcrResult(PAGE_NUMBER, -resultPage.getAngle(), words, Collections.emptyList());
+        debugLayerFactory.addAnalysisResult(List.of(results));
+
+//        try (var doc = new PDFDoc(tmpFile.toString()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
+//            PageRotationHelper.rotatePage(PAGE_NUMBER, doc, -resultPage.getAngle());
+//            var rects = resultPage.getWords()
+//                    .stream()
+//                    .map(DocumentWord::getPolygon)
+//                    .map(QuadPoint::fromPolygons)
+//                    .map(qp -> qp.getTransformed(pageCtm))
+//                    .toList();
+//            drawRects(doc, rects, PAGE_NUMBER);
+//            doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
+//        }
+//        Files.deleteIfExists(tmpFile);
+
+        viewerDocumentService.addLayerGroups(DESTINATION_FILE.toFile(), DESTINATION_FILE.toFile(), List.of(debugLayerFactory.getOcrDebugLayer()));
+        RotationCorrectionUtility.rotatePages(DESTINATION_FILE, DESTINATION_FILE, Map.of(PAGE_NUMBER, -resultPage.getAngle()));
+    }
+
+//
+//    private static List<Rectangle2D> readRectsFromBoxa(Boxa boxa) {
+//
+//        Pointer[] pointers = boxa.box.getPointer().getPointerArray(0, boxa.n);
+//        List<Rectangle2D> boxes = new ArrayList<>(boxa.n);
+//        for (int i = 0; i < boxa.n; i++) {
+//            Box box = new Box(pointers[i]);
+//            boxes.add(new Rectangle2D.Double(box.x, box.y, box.w, box.h));
+//            LeptUtils.dispose(box);
+//        }
+//        return boxes;
+//    }
+//
+//
+//    @SuppressWarnings("PMD") // Memory will be de-allocated with boxa
+//    public static Boxa createBoxaFromRectangles(List<Rectangle2D> rectangles) {
+//
+//        if (rectangles.isEmpty()) {
+//            return new Boxa();
+//        }
+//
+//        int n = rectangles.size();  // Number of rectangles
+//        int nalloc = n;             // Allocating memory for exactly 'n' boxes
+//        int refcount = 1;           // Default refcount
+//
+//        Pointer boxPointerArray = new Memory((long) Native.POINTER_SIZE * n);  // Memory for n pointers
+//
+//        for (int i = 0; i < n; i++) {
+//
+//            Rectangle2D rect = rectangles.get(i);
+//            var mem = new Memory(20L);
+//            mem.setInt(0, (int) rect.getX());
+//            mem.setInt(4, (int) rect.getY());
+//            mem.setInt(8, (int) rect.getWidth());
+//            mem.setInt(12, (int) rect.getHeight());
+//            mem.setInt(16, refcount);
+//
+//            // Write the pointer of each Box into the native memory
+//            boxPointerArray.setPointer((long) Native.POINTER_SIZE * i, mem);
+//        }
+//
+//        // Create a PointerByReference pointing to the native memory of the array
+//        PointerByReference boxPointerRef = new PointerByReference();
+//        boxPointerRef.setPointer(boxPointerArray);
+//
+//        // Create the Boxa instance
+//
+//        return new Boxa(n, nalloc, refcount, boxPointerRef);
+//    }
+
+
+    @SneakyThrows
+    private void drawRects(PDFDoc doc, List<QuadPoint> quadPoints, int pageNumber) {
+
+        try (ElementWriter writer = new ElementWriter(); ElementBuilder builder = new ElementBuilder()) {
+            Page page = doc.getPage(pageNumber);
+            writer.begin(page, ElementWriter.e_overlay);
+            for (QuadPoint quadPoint : quadPoints) {
+                quadPoint.asLines()
+                        .forEach(line -> {
+                            drawLine(line, builder, writer);
+                        });
+            }
+            writer.end();
+        }
+    }
+
+
+    @SneakyThrows
+    private static void drawLine(Line2D l, ElementBuilder builder, ElementWriter writer) {
+
+        float[] rgbComponents = Color.BLUE.getRGBColorComponents(null);
+
+        builder.pathBegin();
+        builder.moveTo(l.getX1(), l.getY1());
+        builder.lineTo(l.getX2(), l.getY2());
+        Element line = builder.pathEnd();
+
+        line.setPathStroke(true);
+        line.setPathFill(false);
+        line.getGState().setLineWidth(1);
+        line.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
+
+        try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
+            line.getGState().setStrokeColor(color);
+        }
+        writer.writeElement(line);
+    }
+
+
+    @SneakyThrows
+    private static AffineTransform getPageCtm(int pageNumber, String file, double imageWidh) {
+
+        return OcrResultPostProcessingPipeline.buildResultToPageTransform(getPageInformation(pageNumber, file), imageWidh);
+    }
+
+
+    @SneakyThrows
+    private static PageInformation getPageInformation(int pageNumber, String file) {
+
+        try (var in = new FileInputStream(file); var doc = new PDFDoc(in)) {
+            return PageInformation.fromPage(pageNumber, doc.getPage(pageNumber));
+        }
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/Type0FontMetricsProviderTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/Type0FontMetricsProviderTest.java
@ -20,7 +20,7 @@ class Type0FontMetricsProviderTest {

        try (PDDocument document = Loader.loadPDF(new File(Type0FontMetricsProviderTest.class.getClassLoader().getResource("InvisibleText.pdf").getPath()))) {
            Type0FontMetricsProvider metricsFactory = Type0FontMetricsProvider.regular(document);
-            FontMetrics fontMetrics = metricsFactory.calculateMetrics("deine mutter", 100, 50);
+            FontMetrics fontMetrics = metricsFactory.calculateMetricsForAzureBBox("deine mutter", 100, 50);
        }

    }
--- a/azure-ocr-service/azure-ocr-service-server/build.gradle.kts
+++ b/azure-ocr-service/azure-ocr-service-server/build.gradle.kts
@ -8,6 +8,9 @@ plugins {
    id("org.sonarqube") version "4.3.0.3225"
    id("io.freefair.lombok") version "8.4"
 }
+pmd {
+    isConsoleOutput = true
+}

 configurations {
    all {
@ -24,15 +27,21 @@ dependencies {
    implementation(project(":azure-ocr-service-api"))

    implementation("com.knecon.fforesight:tracing-commons:0.5.0")
+    implementation("io.github.openfeign:feign-core:12.4")
    implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.1.1")
    implementation("org.springframework.boot:spring-boot-starter-amqp:${springBootStarterVersion}")

+    implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.224.0")
+    implementation("com.knecon.fforesight:tenant-commons:0.31.0")
+    implementation("com.iqser.red.commons:storage-commons:2.50.0")
+
    implementation("net.logstash.logback:logstash-logback-encoder:7.4")
    implementation("ch.qos.logback:logback-classic")

    testImplementation("org.springframework.boot:spring-boot-starter-test:${springBootStarterVersion}")
    testImplementation("com.iqser.red.commons:test-commons:2.1.0")
    testImplementation("org.springframework.amqp:spring-rabbit-test:3.0.2")
+    testImplementation("com.iqser.red.commons:pdftron-logic-commons:2.32.0")
 }

 tasks.named<BootBuildImage>("bootBuildImage") {
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/Application.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/Application.java
@ -9,11 +9,9 @@ import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Import;
 import org.springframework.scheduling.annotation.EnableAsync;

-import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
-import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
+import com.iqser.red.storage.commons.StorageAutoConfiguration;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceProcessorConfiguration;
 import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration;
-import com.iqser.red.storage.commons.StorageAutoConfiguration;
 import com.knecon.fforesight.tenantcommons.MultiTenancyAutoConfiguration;

 import io.micrometer.core.aop.TimedAspect;
@ -43,17 +41,4 @@ public class Application {
    }


-    @Bean
-    public InvisibleElementRemovalService invisibleElementRemovalService() {
-
-        return new InvisibleElementRemovalService();
-    }
-
-
-    @Bean
-    public WatermarkRemovalService watermarkRemovalService() {
-
-        return new WatermarkRemovalService();
-    }
-
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/FileStorageService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/FileStorageService.java
@ -1,4 +1,4 @@
-package com.knecon.fforesight.service.ocr.processor.service;
+package com.knecon.fforesight.service.ocr.v1.server;

 import java.io.File;
 import java.io.FileInputStream;
@ -33,20 +33,23 @@ public class FileStorageService {
    public void storeFiles(DocumentRequest request, File documentFile, File viewerDocumentFile, File analyzeResultFile) {

        try (var in = new FileInputStream(viewerDocumentFile)) {
-            if (request.optionalViewerDocumentId().isPresent()) {
+            if (request.optionalViewerDocumentId()
+                    .isPresent()) {
                storageService.storeObject(TenantContext.getTenantId(), request.getViewerDocId(), in);
            } else {
                storageService.storeObject(TenantContext.getTenantId(), getStorageId(request.getDossierId(), request.getFileId(), FileType.VIEWER_DOCUMENT), in);
            }
        }
        try (var in = new FileInputStream(documentFile)) {
-            if (request.optionalOriginDocumentId().isPresent()) {
+            if (request.optionalOriginDocumentId()
+                    .isPresent()) {
                storageService.storeObject(TenantContext.getTenantId(), request.getOriginDocumentId(), in);
            } else {
                storageService.storeObject(TenantContext.getTenantId(), getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN), in);
            }
        }
-        if (request.optionalIdpResultId().isPresent()) {
+        if (request.optionalIdpResultId()
+                    .isPresent() && analyzeResultFile.exists()) {
            try (var in = new FileInputStream(analyzeResultFile)) {
                storageService.storeObject(TenantContext.getTenantId(), request.getIdpResultId(), in);
            }
@ -59,7 +62,8 @@ public class FileStorageService {

        Files.createDirectories(documentFile.getParentFile().toPath());

-        String originDocumentId = request.optionalOriginDocumentId().orElse(getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN));
+        String originDocumentId = request.optionalOriginDocumentId()
+                .orElse(getStorageId(request.getDossierId(), request.getFileId(), FileType.ORIGIN));

        storageService.downloadTo(TenantContext.getTenantId(), originDocumentId, documentFile);

--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/configuration/MessagingConfiguration.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/configuration/MessagingConfiguration.java
@ -1,5 +1,9 @@
 package com.knecon.fforesight.service.ocr.v1.server.configuration;

+import org.springframework.amqp.core.DirectExchange;
+import org.springframework.amqp.core.Queue;
+import org.springframework.amqp.core.QueueBuilder;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;

 import lombok.RequiredArgsConstructor;
@ -8,10 +12,26 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
 public class MessagingConfiguration {

-    public static final String OCR_REQUEST_QUEUE = "ocr_request_queue";
-    public static final String OCR_RESPONSE_QUEUE = "ocr_response_queue";
-    public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue";
+    public static final String OCR_REQUEST_QUEUE_PREFIX = "ocr_request";
+    public static final String OCR_REQUEST_EXCHANGE = "ocr_request_exchange";
+    public static final String OCR_DLQ = "ocr_error";
+    public static final String OCR_RESPONSE_EXCHANGE = "ocr_response_exchange";
+    public static final String OCR_STATUS_UPDATE_RESPONSE_EXCHANGE = "ocr_status_update_response_exchange";
+    public static final String OCR_STATUS_UPDATE_DLQ = "ocr_status_update_error";
+
    public static final String X_ERROR_INFO_HEADER = "x-error-message";
    public static final String X_ERROR_INFO_TIMESTAMP_HEADER = "x-error-message-timestamp";

+    @Bean
+    public DirectExchange ocrRequestExchange() {
+
+        return new DirectExchange(OCR_REQUEST_EXCHANGE);
+    }
+
+
+    @Bean
+    public Queue ocrDLQ() {
+
+        return QueueBuilder.durable(OCR_DLQ).build();
+    }
 }
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/configuration/TenantQueueProviderConfig.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/configuration/TenantQueueProviderConfig.java
@ -0,0 +1,32 @@
+package com.knecon.fforesight.service.ocr.v1.server.configuration;
+
+import static com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration.OCR_DLQ;
+import static com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration.OCR_REQUEST_EXCHANGE;
+import static com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration.OCR_REQUEST_QUEUE_PREFIX;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+import com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver;
+import com.knecon.fforesight.tenantcommons.model.TenantQueueConfiguration;
+import com.knecon.fforesight.tenantcommons.model.TenantQueueProvider;
+
+@Configuration
+public class TenantQueueProviderConfig {
+
+    @Bean
+    protected TenantQueueProvider getTenantQueueConfigs() {
+
+        return new TenantQueueProvider(Set.of(TenantQueueConfiguration.builder()
+                                                      .listenerId(OcrMessageReceiver.OCR_REQUEST_LISTENER_ID)
+                                                      .exchangeName(OCR_REQUEST_EXCHANGE)
+                                                      .queuePrefix(OCR_REQUEST_QUEUE_PREFIX)
+                                                      .dlqName(OCR_DLQ)
+                                                      .arguments(Map.of("x-max-priority", 2))
+                                                      .build()));
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/NoStatusUpdateOcrMessageSender.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/NoStatusUpdateOcrMessageSender.java
@ -1,10 +1,13 @@
 package com.knecon.fforesight.service.ocr.v1.server.queue;

+import java.util.Set;
+
 import org.springframework.amqp.rabbit.core.RabbitTemplate;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
 import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration;
 import com.knecon.fforesight.tenantcommons.TenantContext;
@ -22,24 +25,24 @@ public class NoStatusUpdateOcrMessageSender implements IOcrMessageSender {
    RabbitTemplate rabbitTemplate;


-    public void sendOcrFinished(String fileId, int totalImages) {
+    public void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features) {

    }


-    public void sendOCRStarted(String fileId) {
+    public void sendOCRStarted(String fileId, Set<AzureOcrFeature> features) {

    }


-    public void sendUpdate(String fileId, int finishedImages, int totalImages) {
+    public void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features) {

    }


-    public void sendOcrResponse(String dossierId, String fileId) {
+    public void sendOcrResponse(DocumentRequest request) {

-        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_QUEUE, TenantContext.getTenantId(), new DocumentRequest(dossierId, fileId));
+        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), request);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
@ -2,11 +2,13 @@ package com.knecon.fforesight.service.ocr.v1.server.queue;

 import java.io.File;
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.OffsetDateTime;
 import java.time.temporal.ChronoUnit;
+import java.util.Objects;
+import java.util.stream.Collectors;

+import org.slf4j.MDC;
 import org.springframework.amqp.AmqpRejectAndDontRequeueException;
 import org.springframework.amqp.core.Message;
 import org.springframework.amqp.rabbit.annotation.RabbitHandler;
@ -15,7 +17,8 @@ import org.springframework.stereotype.Service;
 import org.springframework.util.FileSystemUtils;

 import com.fasterxml.jackson.databind.ObjectMapper;
-import com.knecon.fforesight.service.ocr.processor.service.FileStorageService;
+import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
+import com.knecon.fforesight.service.ocr.v1.server.FileStorageService;
 import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender;
 import com.knecon.fforesight.service.ocr.processor.service.OCRService;
 import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
@ -32,6 +35,11 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class OcrMessageReceiver {

+    public static final String OCR_REQUEST_LISTENER_ID = "ocr-request-listener";
+    public static final String IDP_RESULT_FILE_NAME = "idpResult.json";
+    public static final String VIEWER_DOCUMENT_FILE_NAME = "viewerDocument.pdf";
+    public static final String DOCUMENT_FILE_NAME = "document.pdf";
+
    FileStorageService fileStorageService;
    ObjectMapper objectMapper;
    OCRService ocrService;
@ -39,7 +47,7 @@ public class OcrMessageReceiver {


    @RabbitHandler
-    @RabbitListener(queues = MessagingConfiguration.OCR_REQUEST_QUEUE, concurrency = "1")
+    @RabbitListener(id = OCR_REQUEST_LISTENER_ID, concurrency = "1")
    public void receiveOcr(Message in) throws IOException {

        if (in.getMessageProperties().isRedelivered()) {
@ -49,32 +57,35 @@ public class OcrMessageReceiver {
        DocumentRequest request = objectMapper.readValue(in.getBody(), DocumentRequest.class);
        String dossierId = request.getDossierId();
        String fileId = request.getFileId();
-        Path tmpDir = Files.createTempDirectory(null);
+        Path runDir = Path.of(OsUtils.getTemporaryDirectory()).resolve(request.getDossierId()).resolve(request.getFileId());

        try {
-            log.info("--------------------------------------------------------------------------");
-            log.info("Start ocr for file with dossierId {} and fileId {}", dossierId, fileId);
+            MDC.put("fileId", fileId);
+            log.info("--------------------------------- Starting OCR ---------------------------------");
+            log.info("Features: {}", request.getFeatures().stream().map(Objects::toString).collect(Collectors.joining(", ")));
+            ocrMessageSender.sendOCRStarted(fileId, request.getFeatures());

-            ocrMessageSender.sendOCRStarted(fileId);
-
-            File documentFile = tmpDir.resolve("document.pdf").toFile();
-            File viewerDocumentFile = tmpDir.resolve("viewerDocument.pdf").toFile();
-            File analyzeResultFile = tmpDir.resolve("azureAnalysisResult.json").toFile();
+            File documentFile = runDir.resolve(DOCUMENT_FILE_NAME).toFile();
+            File viewerDocumentFile = runDir.resolve(VIEWER_DOCUMENT_FILE_NAME).toFile();
+            File idpResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile();

            fileStorageService.downloadFiles(request, documentFile);

-            ocrService.runOcrOnDocument(dossierId, fileId, request.isRemoveWatermarks(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
+            ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), runDir, documentFile, viewerDocumentFile, idpResultFile);

-            fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile);
+            fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, idpResultFile);

-            ocrMessageSender.sendOcrResponse(dossierId, fileId);
+            ocrMessageSender.sendOcrResponse(request);
        } catch (Exception e) {
            log.warn("An exception occurred in ocr file stage: {}", e.getMessage());
            in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_HEADER, e.getMessage());
            in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER, OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
            throw new RuntimeException(e);
        } finally {
-            FileSystemUtils.deleteRecursively(tmpDir);
+            log.info("Done");
+            log.info("--------------------------------- Done ---------------------------------");
+            MDC.remove("fileId");
+            FileSystemUtils.deleteRecursively(runDir);
        }
    }

--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageSender.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageSender.java
@ -1,10 +1,13 @@
 package com.knecon.fforesight.service.ocr.v1.server.queue;

+import java.util.Set;
+
 import org.springframework.amqp.rabbit.core.RabbitTemplate;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.service.IOcrMessageSender;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
 import com.knecon.fforesight.service.ocr.v1.api.model.OCRStatusUpdateResponse;
 import com.knecon.fforesight.service.ocr.v1.server.configuration.MessagingConfiguration;
@ -25,35 +28,46 @@ public class OcrMessageSender implements IOcrMessageSender {
    RabbitTemplate rabbitTemplate;


-    public void sendOcrFinished(String fileId, int totalImages) {
+    public void sendOcrFinished(String fileId, int totalImages, Set<AzureOcrFeature> features) {

-        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
                                      TenantContext.getTenantId(),
-                                      OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(totalImages).ocrFinished(true).build());
+                                      OCRStatusUpdateResponse.builder()
+                                              .fileId(fileId)
+                                              .numberOfPagesToOCR(totalImages)
+                                              .numberOfOCRedPages(totalImages)
+                                              .ocrFinished(true)
+                                              .features(features)
+                                              .build());
    }


-    public void sendOCRStarted(String fileId) {
+    public void sendOCRStarted(String fileId, Set<AzureOcrFeature> features) {

-        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
                                      TenantContext.getTenantId(),
-                                      OCRStatusUpdateResponse.builder().fileId(fileId).ocrStarted(true).build());
+                                      OCRStatusUpdateResponse.builder().fileId(fileId).features(features).ocrStarted(true).build());

    }


-    public void sendUpdate(String fileId, int finishedImages, int totalImages) {
+    public void sendUpdate(String fileId, int finishedImages, int totalImages, Set<AzureOcrFeature> features) {

-        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_EXCHANGE,
                                      TenantContext.getTenantId(),
-                                      OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(finishedImages).build());
+                                      OCRStatusUpdateResponse.builder()
+                                              .fileId(fileId)
+                                              .features(features)
+                                              .numberOfPagesToOCR(totalImages)
+                                              .numberOfOCRedPages(finishedImages)
+                                              .build());

    }


-    public void sendOcrResponse(String dossierId, String fileId) {
+    public void sendOcrResponse(DocumentRequest request) {

-        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_QUEUE, TenantContext.getTenantId(), new DocumentRequest(dossierId, fileId));
+        rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_EXCHANGE, TenantContext.getTenantId(), request);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-server/src/main/resources/Aptfile
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/resources/Aptfile
@ -1,5 +1,5 @@
 # you can list packages
-ghostscript=9.55.0~dfsg1-0ubuntu5.9
+ghostscript=9.55.0~dfsg1-0ubuntu5.10
 pkg-config
 zip
 unzip
--- a/azure-ocr-service/azure-ocr-service-server/src/main/resources/application.yml
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/resources/application.yml
@ -63,3 +63,5 @@ azure:

 ocrService:
  sendStatusUpdates: true
+
+native-libs.path: ${VCPKG_DYNAMIC_LIB}
--- a/azure-ocr-service/azure-ocr-service-server/src/main/resources/logback-spring.xml
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/resources/logback-spring.xml
@ -7,11 +7,21 @@
    <include resource="org/springframework/boot/logging/logback/console-appender.xml"/>

    <appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
-        <encoder class="net.logstash.logback.encoder.LogstashEncoder"/>
+        <encoder class="net.logstash.logback.encoder.LogstashEncoder">
+            <pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
+        </encoder>
    </appender>

    <root level="INFO">
        <appender-ref ref="${logType}"/>
    </root>

+    <logger name="com.iqser.red.pdftronlogic.commons" level="ERROR"/>
+
 </configuration>
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPointTest.java
@ -0,0 +1,50 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.awt.geom.Point2D;
+import java.awt.geom.Rectangle2D;
+
+import org.junit.jupiter.api.Test;
+
+class QuadPointTest {
+
+    @Test
+    public void testContains() {
+
+        var a = new Point2D.Double(0, 0);
+        var b = new Point2D.Double(0, 1);
+        var c = new Point2D.Double(1, 1);
+        var d = new Point2D.Double(1, 0);
+        var q = new QuadPoint(a, b, c, d);
+        assertTrue(q.isHorizontal());
+        assertFalse(q.isVertical());
+
+        assertTrue(q.contains(a));
+        assertTrue(q.contains(b));
+        assertTrue(q.contains(c));
+        assertTrue(q.contains(d));
+
+        var p = new Point2D.Double(0.5, 0.5);
+        assertTrue(q.contains(p));
+
+        var r = new Rectangle2D.Double(0.5, 0.5, 0.1, 0.1);
+        assertTrue(q.contains(r));
+    }
+
+
+    @Test
+    public void testCenter() {
+
+        var a = new Point2D.Double(0, 0);
+        var b = new Point2D.Double(1, 1);
+        var c = new Point2D.Double(2, 1);
+        var d = new Point2D.Double(1, 0);
+        var q = new QuadPoint(a, b, c, d);
+        assertTrue(q.isHorizontal());
+        assertFalse(q.isVertical());
+        assertEquals(QuadPoint.Direction.RIGHT, q.getDirection());
+        assertEquals(new Point2D.Double(1, 0.5), q.getCenter());
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java
@ -1,6 +1,9 @@
 package com.knecon.fforesight.service.ocr.v1.server;

 import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;

 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
@ -8,7 +11,10 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.MockitoAnnotations;
 import org.mockito.junit.jupiter.MockitoExtension;
+import org.springframework.amqp.rabbit.core.RabbitAdmin;
 import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.amqp.rabbit.listener.MessageListenerContainer;
+import org.springframework.amqp.rabbit.listener.RabbitListenerEndpointRegistry;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
 import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
@ -52,6 +58,9 @@ public class AbstractTest {
    @MockBean
    protected RabbitTemplate rabbitTemplate;

+    @MockBean
+    private RabbitAdmin rabbitAdmin;
+
    private static String pdftronLicense;


@ -100,6 +109,16 @@ public class AbstractTest {
    @ComponentScan(excludeFilters = {@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, value = StorageAutoConfiguration.class)})
    public static class TestConfiguration {

+        @Bean
+        public RabbitListenerEndpointRegistry rabbitListenerEndpointRegistry() {
+
+            var mock = mock(RabbitListenerEndpointRegistry.class);
+            when(mock.getListenerContainer(any())).thenReturn(mock(MessageListenerContainer.class));
+
+            return mock;
+        }
+
+
        @Bean
        @Primary
        public StorageService inMemoryStorage() {
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
@ -1,6 +1,9 @@
 package com.knecon.fforesight.service.ocr.v1.server;

 import static com.iqser.red.pdftronlogic.commons.PdfTextExtraction.extractAllTextFromDocument;
+import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.DOCUMENT_FILE_NAME;
+import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.IDP_RESULT_FILE_NAME;
+import static com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageReceiver.VIEWER_DOCUMENT_FILE_NAME;

 import java.io.File;
 import java.io.FileInputStream;
@ -9,23 +12,28 @@ import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;

 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.slf4j.MDC;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.core.io.ClassPathResource;

 import com.knecon.fforesight.service.ocr.processor.service.OCRService;
 import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;

 import lombok.SneakyThrows;

-@Disabled // in order to run, the azure.key must be set first in the application.yml
+// in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder
+@Disabled
@SpringBootTest()
 public class OcrServiceIntegrationTest extends AbstractTest {

+    public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION, AzureOcrFeature.IDP);
    @Autowired
    private OCRService ocrService;

@ -34,7 +42,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWith2000PageFile() {

-        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/brokenText.pdf");
    }


@ -50,7 +58,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWithFile() {

-        testOCR("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/1.A16148F - Toxicidade oral aguda.pdf");
+        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/TestSet/VV-331340-first100.pdf");
    }


@ -58,7 +66,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWithFolder() {

-        String dir = "/home/kschuettler/Dokumente/TestFiles/BASF/Documine_Test_docs/2013-1110704.pdf";
+        String dir = "/home/kschuettler/Dokumente/TestFiles/OCR/TestSet";
        List<File> foundFiles = Files.walk(Path.of(dir))
                .sorted(Comparator.comparingLong(this::getFileSize))
                .map(Path::toFile)
@ -97,18 +105,21 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    private String testOCR(File file) {

+        MDC.put("fileId", "test");
+
        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(file.toPath().getFileName());

        assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();

-        var documentFile = tmpDir.resolve(Path.of("document.pdf"));
-        var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf"));
-        var analyzeResultFile = tmpDir.resolve(Path.of("azureAnalysisResult.json"));
+        var documentFile = tmpDir.resolve(Path.of(DOCUMENT_FILE_NAME));
+        var viewerDocumentFile = tmpDir.resolve(Path.of(VIEWER_DOCUMENT_FILE_NAME));
+        var analyzeResultFile = tmpDir.resolve(Path.of(IDP_RESULT_FILE_NAME));

        Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
        Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);

-        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
+        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", FEATURES, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
+        MDC.remove("fileId");
        System.out.println("File:" + documentFile);
        System.out.println("\n\n");
        try (var fileStream = new FileInputStream(documentFile.toFile())) {
--- a/azure-ocr-service/azure-ocr-service-server/src/test/resources/application.yml
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/resources/application.yml
@ -2,12 +2,16 @@ persistence-service.url: "http://persistence-service-v1:8080"

 pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a
 azure:
-  endpoint: https://ff-ocr-test.cognitiveservices.azure.com/
-  key: # find key in Bitwarden under: Azure IDP Test Key
+  endpoint: https://ff-ocr-dev.cognitiveservices.azure.com/
+  key: 444fe2f83e9c48da8e588c7bd5295309 # find key in Bitwarden under: Azure IDP Test Key
+native-libs:
+

 logging.type: ${LOGGING_TYPE:CONSOLE}

-ocrService.sendStatusUpdates: false
+ocrService:
+  sendStatusUpdates: false
+  debug: true

 management:
  endpoint:
@ -17,4 +21,5 @@ management:
  endpoints.web.exposure.include: prometheus, health, metrics
  metrics.export.prometheus.enabled: true

-POD_NAME: azure-ocr-service
+POD_NAME: azure-ocr-service
+native-libs.path: /home/kschuettler/software/leptonica/vcpkg/installed/x64-linux-dynamic/lib/
--- a/config/pmd/pmd.xml
+++ b/config/pmd/pmd.xml
@ -15,6 +15,7 @@
        <exclude name="NullAssignment"/>
        <exclude name="AssignmentInOperand"/>
        <exclude name="BeanMembersShouldSerialize"/>
+        <exclude name="AvoidFieldNameMatchingMethodName"/>
    </rule>

 </ruleset>
--- a/config/pmd/test_pmd.xml
+++ b/config/pmd/test_pmd.xml
@ -17,6 +17,7 @@
        <exclude name="AssignmentInOperand"/>
        <exclude name="TestClassWithoutTestCases"/>
        <exclude name="BeanMembersShouldSerialize"/>
+        <exclude name="AvoidFieldNameMatchingMethodName"/>
    </rule>

 </ruleset>
--- a/publish-custom-image.sh
+++ b/publish-custom-image.sh
@ -28,7 +28,7 @@ if [ -z "$1" ]; then
 fi

 namespace=${1}
-deployment_name="ocr-service-v1"
+deployment_name="azure-ocr-service"

 echo "deploying to ${namespace}"
Author	SHA1	Message	Date
Kilian Schüttler	23e63db6c5	Merge branch 'RED-8670' into 'main' RED-8670: add features to status update See merge request fforesight/azure-ocr-service!23	2025-01-09 11:27:33 +01:00
Kilian Schüttler	635fd4abf8	RED-8670: add features to status update	2025-01-09 11:27:33 +01:00
Kilian Schuettler	98123a5938	RED-8670: add features to status update	2024-12-17 12:33:32 +01:00
Kilian Schuettler	9bbeaf2335	RED-8670: add features to status update	2024-12-17 12:32:34 +01:00
Kilian Schuettler	b6666f6953	RED-8670: add features to status update	2024-12-17 12:30:26 +01:00
Kilian Schuettler	80dfa16103	RED-8670: add features to status update	2024-12-17 12:20:19 +01:00
Kilian Schüttler	76c8e98384	Merge branch 'RED-8670' into 'main' RED-8670: write IDP results to file See merge request fforesight/azure-ocr-service!22	2024-12-06 11:02:03 +01:00
Kilian Schüttler	df154cfe9c	RED-8670: write IDP results to file	2024-12-06 11:02:03 +01:00
Kilian Schüttler	e7b61353bf	Merge branch 'serverbuild-test' into 'main' fix ghostscript See merge request fforesight/azure-ocr-service!21	2024-11-27 10:53:05 +01:00
Kilian Schüttler	d63562ad24	fix ghostscript apt package	2024-11-27 10:53:05 +01:00
Kilian Schuettler	14bd6cf5c8	fix tests	2024-11-26 17:06:31 +01:00
Kilian Schuettler	ebfa55d3d1	RED-8670: add tables to idp result * apparently i've fixed some error, where the ocr-service sometimes hangs	2024-11-26 17:05:06 +01:00
Kilian Schuettler	f06e5779f3	RED-8670: add tables to idp result * apparently i've fixed some error, where the ocr-service sometimes hangs	2024-11-26 16:59:26 +01:00
Kilian Schuettler	1d1bd321c2	RED-8670: add tables to idp result * apparently i've fixed some error, where the ocr-service sometimes hangs	2024-11-26 16:58:03 +01:00
Kilian Schuettler	9ed9a3c37c	RED-10477: update api version * introduce file-based caches for faster recovery after retries * some refactoring	2024-11-26 16:57:48 +01:00
Kilian Schüttler	eac6a49100	Merge branch 'RED-10477' into 'main' RED-10477: update api version See merge request fforesight/azure-ocr-service!19	2024-11-20 11:44:55 +01:00
Kilian Schüttler	0b8de28823	RED-10477: update api version	2024-11-20 11:44:55 +01:00
Kilian Schuettler	8bbc33e01b	enable snuggification again, but with min size	2024-11-06 16:43:04 +01:00
Kilian Schuettler	e8483a8352	disable snuggification by default	2024-11-06 15:57:31 +01:00
Kilian Schuettler	6d6a0adcd4	upgrade pdftron-logic-commons version	2024-11-05 16:45:58 +01:00
Kilian Schüttler	f35fb2fce0	Merge branch 'improve-logging' into 'main' improve logging See merge request fforesight/azure-ocr-service!18	2024-10-25 21:19:12 +02:00
Kilian Schuettler	903e60a1f3	improve logging	2024-10-25 21:16:07 +02:00
Kilian Schuettler	b5523842dd	fix short word snuggification	2024-10-25 19:20:27 +02:00
Kilian Schuettler	0d0942ad46	set fallback value for message	2024-10-25 18:33:42 +02:00
Kilian Schüttler	6845afb1dd	Merge branch 'RED-10127' into 'main' RED-10127: improve headline detection by fitting BBoxes tightly and therefore... See merge request fforesight/azure-ocr-service!17	2024-10-22 17:03:29 +02:00
Kilian Schüttler	e78771d65f	RED-10127: improve headline detection by fitting BBoxes tightly and therefore...	2024-10-22 17:03:29 +02:00
Kilian Schüttler	9219f723f8	Merge branch 'hotfix-fp' into 'main' hotfix: properly remove layers when overwriting See merge request fforesight/azure-ocr-service!16	2024-09-12 10:18:08 +02:00
Kilian Schuettler	55e1bc00af	hotfix: properly remove layers when overwriting	2024-09-12 10:12:21 +02:00
Kilian Schüttler	750d7f96c5	Merge branch 'hotfix-fp' into 'main' hotfix: reduce file size using Optimizer.optimize See merge request fforesight/azure-ocr-service!14	2024-09-11 15:54:31 +02:00
Kilian Schuettler	469d04622b	hotfix: reduce file size using Optimizer.optimize	2024-09-11 15:48:58 +02:00
Maverick Studer	77b443e6e6	Merge branch 'update-tc' into 'main' Update tenant-commons for dlq fix See merge request fforesight/azure-ocr-service!12	2024-09-03 13:43:25 +02:00
maverickstuder	d7255119fb	Update tenant-commons for dlq fix	2024-09-03 13:14:37 +02:00
Maverick Studer	4afe8e7555	Merge branch 'feign-exeception-workaround' into 'main' Feign exception workaround See merge request fforesight/azure-ocr-service!11	2024-08-30 10:27:05 +02:00
Maverick Studer	e769248ca5	Feign exception workaround	2024-08-30 10:27:04 +02:00
Maverick Studer	989430f0ac	Merge branch 'tenants-retry' into 'main' Tenants retry logic and queue renames See merge request fforesight/azure-ocr-service!10	2024-08-29 13:14:46 +02:00
Maverick Studer	30536d2b43	Tenants retry logic and queue renames	2024-08-29 13:14:45 +02:00
Maverick Studer	d2d8544439	Merge branch 'RED-9331' into 'main' RED-9331: Explore possibilities for fair upload / analysis processing per tenant See merge request fforesight/azure-ocr-service!8	2024-08-27 09:26:21 +02:00
Maverick Studer	f56ce7023d	RED-9331: Explore possibilities for fair upload / analysis processing per tenant	2024-08-27 09:26:20 +02:00
Kilian Schüttler	412edec340	Merge branch 'RED-9864' into 'main' RED-9746: sped up invisible element removal, fixed crash See merge request fforesight/azure-ocr-service!9	2024-08-26 15:24:43 +02:00
Kilian Schuettler	242b3ef3b8	RED-9746: sped up invisible element removal, fixed crash	2024-08-26 15:20:38 +02:00
Christoph Schabert	e4aaecc750	Update .gitlab-ci.yml file	2024-08-22 14:16:02 +02:00
Kilian Schüttler	c7be843a6f	Merge branch 'RED-9746' into 'main' RED-9746: improve invisible element removal See merge request fforesight/azure-ocr-service!6	2024-08-19 14:47:48 +02:00
Kilian Schüttler	2cadfdb8bf	RED-9746: improve invisible element removal	2024-08-19 14:47:48 +02:00
Dominique Eifländer	b019ca8e2d	Merge branch 'RED-9760-watermark' into 'main' RED-9760: Fixed not working remove watermarks See merge request fforesight/azure-ocr-service!5	2024-08-09 11:47:20 +02:00
Dominique Eifländer	670c505042	RED-9760: Fixed not working remove watermarks	2024-08-09 11:44:13 +02:00
Dominique Eifländer	22049d81c2	Merge branch 'RED-9760-2' into 'main' RED-9760: Fixed bug in image detection See merge request fforesight/azure-ocr-service!4	2024-07-29 11:14:06 +02:00
Dominique Eifländer	050c399270	RED-9760: Fixed bug in image detection	2024-07-29 11:13:02 +02:00
Dominique Eifländer	8f123fb865	Merge branch 'RED-9760' into 'main' RED-9760: Fixed send updates to wrong exchange See merge request fforesight/azure-ocr-service!3	2024-07-29 10:32:06 +02:00
Dominique Eifländer	21523a7796	RED-9760: Fixed send updates to wrong exchange	2024-07-29 10:00:09 +02:00
Kilian Schüttler	fdb3ae46dc	Merge branch 'RED-9353' into 'main' RED-9353: undo tenant exchange See merge request fforesight/azure-ocr-service!2	2024-07-25 13:11:40 +02:00