Merge branch 'RED-10127' into 'main'

RED-10127: improve headline detection by fitting BBoxes tightly and therefore... See merge request fforesight/azure-ocr-service!17
2024-10-22 17:03:29 +02:00 · 2024-10-22 17:03:29 +02:00 · 6845afb1dd
commit 6845afb1dd
parent 9219f723f8 e78771d65f
41 changed files with 1485 additions and 413 deletions
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureOcrFeature.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/AzureOcrFeature.java
@ -0,0 +1,11 @@
+package com.knecon.fforesight.service.ocr.v1.api.model;
+
+public enum AzureOcrFeature {
+
+    ROTATION_CORRECTION,
+    IDP,
+    FONT_STYLE_DETECTION,
+    ALL_PAGES,
+    REMOVE_WATERMARKS
+
+}
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/DocumentRequest.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/DocumentRequest.java
@ -1,6 +1,8 @@
 package com.knecon.fforesight.service.ocr.v1.api.model;

+import java.util.Collections;
 import java.util.Optional;
+import java.util.Set;

 import lombok.AccessLevel;
 import lombok.AllArgsConstructor;
@ -18,12 +20,13 @@ public class DocumentRequest {

    String dossierId;
    String fileId;
+    boolean removeWatermark;

    String originDocumentId;
    String viewerDocId;
    String idpResultId;

-    boolean removeWatermark;
+    Set<AzureOcrFeature> features;


    public DocumentRequest(String dossierId, String fileId) {
@ -33,18 +36,23 @@ public class DocumentRequest {
        originDocumentId = null;
        viewerDocId = null;
        idpResultId = null;
-        removeWatermark = false;
+        features = Collections.emptySet();
    }

+
    // needed for backwards compatibility
    public DocumentRequest(String dossierId, String fileId, boolean removeWatermark) {

        this.dossierId = dossierId;
        this.fileId = fileId;
-        this.removeWatermark = removeWatermark;
        originDocumentId = null;
        viewerDocId = null;
        idpResultId = null;
+        if (removeWatermark) {
+            features = Set.of(AzureOcrFeature.REMOVE_WATERMARKS);
+        } else {
+            features = Collections.emptySet();
+        }
    }


--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
@ -9,19 +9,61 @@ import java.util.stream.Stream;

 public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {

-    /*
-    B _____ C
-     |     |
-    A|_____|D
-    */
+    public enum Direction {
+        RIGHT,
+        /*
+        B _____ C
+         |     |
+        A|_____|D
+        */
+        DOWN,
+        /*
+         * A _____ B
+         *  |     |
+         * D|_____|C
+         */
+        LEFT,
+        /*
+         * D _____ A
+         *  |     |
+         * C|_____|B
+         * */
+        UP,
+        /*
+         * C _____ D
+         *  |     |
+         * B|_____|A
+         */
+        NONE
+        /*
+         * ? _____ ?
+         *  |     |
+         * ?|_____|?
+         */
+    }
+    private static final double THRESHOLD_ANGLE = Math.toRadians(5); // QuadPoint is considered straight, when its angles are below this threshold


    public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D) {

-        return new QuadPoint(new Point2D.Double(rectangle2D.getX(), rectangle2D.getY()),
-                             new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY()),
-                             new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY()),
-                             new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY()));
+        return fromRectangle2D(rectangle2D, Direction.NONE);
+    }
+
+
+    public static QuadPoint fromRectangle2D(Rectangle2D rectangle2D, Direction direction) {
+
+        var lowerLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getY());
+        var upperLeft = new Point2D.Double(rectangle2D.getX(), rectangle2D.getMaxY());
+        var upperRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getMaxY());
+        var lowerRight = new Point2D.Double(rectangle2D.getMaxX(), rectangle2D.getY());
+
+        return switch (direction) {
+            case DOWN -> new QuadPoint(upperLeft, upperRight, lowerRight, lowerLeft);
+            case LEFT -> new QuadPoint(upperRight, lowerRight, lowerLeft, upperLeft);
+            case UP -> new QuadPoint(lowerRight, lowerLeft, upperLeft, upperRight);
+            default -> new QuadPoint(lowerLeft, upperLeft, upperRight, lowerRight);
+        };
+
    }


@ -56,6 +98,35 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


+    public boolean isHorizontal() {
+
+        double angle = calculateAngle(a, d);
+        double angle2 = calculateAngle(b, c);
+        return Math.abs(angle) <= THRESHOLD_ANGLE || Math.abs(angle2) <= THRESHOLD_ANGLE;
+    }
+
+
+    public boolean isVertical() {
+
+        double rightAngle = Math.PI / 2;
+        double angle = calculateAngle(a, d);
+        double angle2 = calculateAngle(b, c);
+        return Math.abs(rightAngle - Math.abs(angle)) <= THRESHOLD_ANGLE || Math.abs(rightAngle - Math.abs(angle2)) <= THRESHOLD_ANGLE;
+    }
+
+
+    public Direction getDirection() {
+
+        if (isHorizontal()) {
+            return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
+        }
+        if (isVertical()) {
+            return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
+        }
+        return Direction.NONE;
+    }
+
+
    public Stream<Line2D> asLines() {

        return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
@ -63,7 +134,7 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


-    public QuadPointData data() {
+    public QuadPointData toData() {

        return new QuadPointData(new float[]{(float) a.getX(), (float) a.getY(), (float) b.getX(), (float) b.getY(), (float) c.getX(), (float) c.getY(), (float) d.getX(), (float) d.getY()});
    }
@ -134,13 +205,19 @@ public record QuadPoint(Point2D a, Point2D b, Point2D c, Point2D d) {
    }


-    public double size() {
+    public double getRectangularSize() {

        return a().distance(b()) * a().distance(d());
    }


-    public double angle() {
+    public double getAngle() {
+
+        return calculateAngle(a, d);
+    }
+
+
+    private static double calculateAngle(Point2D a, Point2D d) {

        double deltaY = d.getY() - a.getY();
        double deltaX = d.getX() - a.getX();
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
@ -17,10 +17,8 @@ public class OcrServiceSettings {
    int batchSize = 128;

    boolean debug; // writes the ocr layer visibly to the viewer doc pdf
-    boolean idpEnabled; // Enables table detection, paragraph classification, section detection, key-value detection.
    boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
-    boolean processAllPages; // if this parameter is set, ocr will be performed on any page, regardless if it has images or not
-    boolean fontStyleDetection; // Enables bold detection using ghostscript and leptonica
+    boolean snuggify = true; // Enables bold detection using ghostscript and leptonica
    String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java
@ -7,6 +7,7 @@ import com.pdftron.pdf.PDFNet;
 import com.sun.jna.NativeLibrary;

 import jakarta.annotation.PostConstruct;
+import lombok.AllArgsConstructor;
 import lombok.RequiredArgsConstructor;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
@ -14,6 +15,7 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@RequiredArgsConstructor
+@AllArgsConstructor
 public class NativeLibrariesInitializer {

    @Value("${pdftron.license:}")
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageBatch.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/PageBatch.java
@ -2,19 +2,32 @@ package com.knecon.fforesight.service.ocr.processor.model;

 import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;

-import java.util.ArrayList;
+import java.nio.file.Path;
 import java.util.List;
 import java.util.function.Consumer;

+import com.azure.core.util.BinaryData;
+
 import lombok.AccessLevel;
+import lombok.Getter;
 import lombok.NonNull;
+import lombok.RequiredArgsConstructor;
 import lombok.experimental.FieldDefaults;

+@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public final class PageBatch implements Comparable<PageBatch> {

+    @Getter
+    int index;
    @NonNull
-    List<Integer> lookup = new ArrayList<>();
+    List<Integer> lookup;
+    @NonNull
+    @Getter
+    Path batchDoc;
+    @NonNull
+    @Getter
+    Path imagePipelineDir;


    @Override
@ -34,12 +47,6 @@ public final class PageBatch implements Comparable<PageBatch> {
    }


-    public void add(Integer pageNumber) {
-
-        lookup.add(pageNumber);
-    }
-
-
    public void forEach(Consumer<? super Integer> consumer) {

        lookup.forEach(consumer);
@ -84,4 +91,10 @@ public final class PageBatch implements Comparable<PageBatch> {
        return Integer.compare(lookup.get(0), o.lookup.get(0));
    }

+
+    public BinaryData render() {
+
+        return BinaryData.fromFile(batchDoc);
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/TextPositionInImage.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/TextPositionInImage.java
@ -3,7 +3,6 @@ package com.knecon.fforesight.service.ocr.processor.model;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.Point2D;

-import com.azure.ai.documentintelligence.models.DocumentWord;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetrics;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
@ -20,7 +19,8 @@ public class TextPositionInImage {

    final QuadPoint position;
    final String text;
-    final AffineTransform imageCTM;
+    final AffineTransform resultToPageTransform;
+    final boolean snugBBox;

    @Setter
    boolean overlapsIgnoreZone;
@ -30,33 +30,34 @@ public class TextPositionInImage {
    FontStyle fontStyle;


-    public TextPositionInImage(DocumentWord word, AffineTransform imageCTM, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle) {
+    public TextPositionInImage(QuadPoint position, String text, AffineTransform resultToPageTransform, FontMetricsProvider fontMetricsProvider, FontStyle fontStyle, boolean snugBBox) {

-        this.position = QuadPoint.fromPolygons(word.getPolygon());
-        this.text = word.getContent();
-        this.imageCTM = imageCTM;
+        this.position = position;
+        this.text = text;
+        this.resultToPageTransform = resultToPageTransform;
        this.fontMetricsProvider = fontMetricsProvider;
        this.fontStyle = fontStyle;
+        this.snugBBox = snugBBox;
    }


    public QuadPoint getTransformedTextBBox() {

-        return position.getTransformed(imageCTM);
+        return position.getTransformed(resultToPageTransform);
    }


    public AffineTransform getTextMatrix() {

-        FontMetrics metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
+        FontMetrics metrics = getMetrics();

        // Matrix multiplication is from right to left:
        // convert to image coords -> subtract descent -> scale height -> reverse imageCTM scaling -> translate to coordinates in image -> convert to pdf coords
        // width must not be set, since it is scaled with the fontsize attribute
-        double rotation = position.angle();
+        double rotation = position.getAngle();
        Point2D anchor = new Point2D.Double(position.b().getX(), position.b().getY());
        AffineTransform ctm = new AffineTransform();
-        ctm.concatenate(imageCTM);
+        ctm.concatenate(resultToPageTransform);
        ctm.translate(anchor.getX(), anchor.getY());
        ctm.scale(getWidth() / getTransformedWidth(),
                  getHeight() / getTransformedHeight()); // scale with transformation coefficient, such that fontsize may be set with transformed width.
@ -69,6 +70,15 @@ public class TextPositionInImage {
    }


+    private FontMetrics getMetrics() {
+
+        if (snugBBox) {
+            return fontMetricsProvider.calculateMetricsForTightBBox(text, getTransformedWidth(), getTransformedHeight());
+        }
+        return fontMetricsProvider.calculateMetricsForAzureBBox(text, getTransformedWidth(), getTransformedHeight());
+    }
+
+
    public double getFontSize() {
        // The fontsize as estimated by the word width
        return fontMetricsProvider.calculateFontSize(text, getTransformedWidth());
@ -95,7 +105,7 @@ public class TextPositionInImage {

    public double getFontSizeByHeight() {
        // The fontsize as estimated by the word height, only used for font style detection
-        var metrics = fontMetricsProvider.calculateMetrics(text, getTransformedWidth(), getTransformedHeight());
+        var metrics = getMetrics();
        return fontMetricsProvider.calculateFontSize(text, getTransformedWidth()) * metrics.getHeightScaling();
    }

@ -108,25 +118,25 @@ public class TextPositionInImage {

    public Point2D transformedA() {

-        return imageCTM.transform(position.a(), null);
+        return resultToPageTransform.transform(position.a(), null);
    }


    public Point2D transformedB() {

-        return imageCTM.transform(position.b(), null);
+        return resultToPageTransform.transform(position.b(), null);
    }


    public Point2D transformedC() {

-        return imageCTM.transform(position.c(), null);
+        return resultToPageTransform.transform(position.c(), null);
    }


    public Point2D transformedD() {

-        return imageCTM.transform(position.d(), null);
+        return resultToPageTransform.transform(position.d(), null);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
@ -1,28 +1,27 @@
 package com.knecon.fforesight.service.ocr.processor.service;

-import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;

+import org.slf4j.MDC;
 import org.springframework.stereotype.Service;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
 import com.azure.core.util.BinaryData;
 import com.azure.core.util.polling.LongRunningOperationStatus;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.LayerFactory;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
-import com.pdftron.common.PDFNetException;
-import com.pdftron.pdf.Optimizer;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.pdftron.pdf.PDFDoc;
-import com.pdftron.sdf.SDFDoc;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
-import lombok.SneakyThrows;
 import lombok.experimental.FieldDefaults;
 import lombok.extern.slf4j.Slf4j;
 import reactor.core.publisher.Mono;
@ -35,16 +34,13 @@ public class AsyncOcrService {

    AzureOcrResource azureOcrResource;
    OcrServiceSettings settings;
+    ImageProcessingPipeline imageProcessingPipeline;
+    ObjectMapper mapper;


-    public OcrResult awaitOcr(PDFDoc pdfDoc,
-                              OcrExecutionSupervisor supervisor,
-                              Set<Integer> pagesWithImages,
-                              ImageProcessingSupervisor imageSupervisor) throws InterruptedException, PDFNetException {
+    public OcrResult awaitOcr(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, List<PageBatch> batches) throws InterruptedException {

-        LayerFactory layerFactory = new LayerFactory(settings, supervisor, imageSupervisor, PageInformation.fromPDFDoc(pdfDoc));
-
-        List<PageBatch> batches = splitIntoBatches(pdfDoc, supervisor, pagesWithImages);
+        LayerFactory layerFactory = new LayerFactory(settings, features, supervisor, PageInformation.fromPDFDoc(pdfDoc), imageProcessingPipeline);

        for (PageBatch batch : batches) {

@ -57,12 +53,10 @@ public class AsyncOcrService {
            supervisor.requireNoErrors();

            batchContext.batchStats().start();
-
-            BinaryData data = renderBatch(pdfDoc, batch);
-
+            BinaryData data = batch.render();
            batchContext.batchStats().batchRenderFinished();

-            beginAnalysis(data, batchContext);
+            beginAnalysis(data, batchContext, features);
        }

        supervisor.awaitAllPagesProcessed();
@ -71,45 +65,17 @@ public class AsyncOcrService {
    }


-    private static BinaryData renderBatch(PDFDoc pdfDoc, PageBatch batch) throws PDFNetException {
-
-        BinaryData docData;
-        try (var smallerDoc = extractBatchDocument(pdfDoc, batch)) {
-            Optimizer.optimize(smallerDoc);
-            docData = BinaryData.fromBytes(smallerDoc.save(SDFDoc.SaveMode.LINEARIZED, null));
-        }
-        return docData;
-    }
-
-
-    private List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<Integer> pagesWithImages) throws PDFNetException {
-
-        List<PageBatch> batches = new ArrayList<>();
-        PageBatch currentBatch = new PageBatch();
-        batches.add(currentBatch);
-        for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
-            if (!settings.isProcessAllPages() && !pagesWithImages.contains(pageNumber)) {
-                supervisor.logPageSkipped(pageNumber);
-                continue;
-            }
-            currentBatch.add(pageNumber);
-            if (currentBatch.size() == settings.getBatchSize()) {
-                currentBatch = new PageBatch();
-                batches.add(currentBatch);
-            }
-        }
-        return batches;
-    }
-
-
-    private void beginAnalysis(BinaryData data, BatchContext batchContext) throws InterruptedException {
+    private void beginAnalysis(BinaryData data, BatchContext batchContext, Set<AzureOcrFeature> features) throws InterruptedException {

        batchContext.supervisor.enterConcurrency(batchContext.batch);

        batchContext.supervisor.logUploadStart(batchContext.batch, data.getLength());

-        azureOcrResource.callAzureAsync(data)
+        var mdcContext = MDC.getCopyOfContextMap();
+
+        azureOcrResource.callAzureAsync(data, features)
                .flatMap(response -> {
+                    MDC.setContextMap(mdcContext);
                    if (response.getStatus().equals(LongRunningOperationStatus.IN_PROGRESS)) {
                        batchContext.supervisor.logInProgress(batchContext.batch);
                    }
@ -128,6 +94,7 @@ public class AsyncOcrService {

    private static void handleCompleted(BatchContext batchContext) {

+        log.info("Completed : {}", batchContext.batch);
        batchContext.supervisor.leaveConcurrency(batchContext.batch);
    }

@ -141,32 +108,17 @@ public class AsyncOcrService {
    private void handleSuccessful(AnalyzeResult finalResult, BatchContext batchContext) {

        try {
-            batchContext.layerFactory.addAnalyzeResult(batchContext.batch, finalResult);
-            batchContext.supervisor.logPageSuccess(batchContext.batch);
+            batchContext.supervisor.logPageSuccess(batchContext.batch());
+            if (settings.isDebug()) {
+                mapper.writeValue(batchContext.batch().getImagePipelineDir().resolve("azure_result_%d.json".formatted(batchContext.batch().getIndex())).toFile(), finalResult);
+            }
+            batchContext.layerFactory.processAnalyzeResult(batchContext.batch(), finalResult);
        } catch (Exception e) {
            handleError(e, batchContext);
        }
    }


-    private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, PageBatch pageBatch) throws PDFNetException {
-
-        if (pageBatch.size() < 0) {
-            throw new IllegalArgumentException();
-        }
-        PDFDoc singlePagePdfDoc = new PDFDoc();
-        pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, singlePagePdfDoc));
-        return singlePagePdfDoc;
-    }
-
-
-    @SneakyThrows
-    private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc singlePagePdfDoc) {
-
-        singlePagePdfDoc.pagePushBack(pdfDoc.getPage(pageNumber));
-    }
-
-
    private record BatchContext(LayerFactory layerFactory, OcrExecutionSupervisor supervisor, PageBatch batch) {

        BatchStats batchStats() {
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
@ -2,9 +2,11 @@ package com.knecon.fforesight.service.ocr.processor.service;

 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;

 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
+import org.yaml.snakeyaml.events.Event;

 import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
 import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
@ -19,6 +21,7 @@ import com.azure.core.util.BinaryData;
 import com.azure.core.util.polling.PollerFlux;
 import com.google.common.base.Objects;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;

 import lombok.AccessLevel;
 import lombok.SneakyThrows;
@ -42,11 +45,18 @@ public class AzureOcrResource {


    @SneakyThrows
-    public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data) {
+    public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {

        AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes());

-        return asyncClient.beginAnalyzeDocument(getModelId(), null, null, StringIndexType.UTF16CODE_UNIT, buildFeatures(), null, buildContentFormat(), analyzeRequest);
+        return asyncClient.beginAnalyzeDocument(getModelId(features),
+                                                null,
+                                                null,
+                                                StringIndexType.UTF16CODE_UNIT,
+                                                buildFeatures(features),
+                                                null,
+                                                buildContentFormat(),
+                                                analyzeRequest);

    }

@ -60,25 +70,25 @@ public class AzureOcrResource {
    }


-    private String getModelId() {
+    private String getModelId(Set<AzureOcrFeature> features) {

-        if (settings.isIdpEnabled()) {
+        if (features.contains(AzureOcrFeature.IDP)) {
            return "prebuilt-layout";
        }
        return "prebuilt-read";
    }


-    private List<DocumentAnalysisFeature> buildFeatures() {
+    private List<DocumentAnalysisFeature> buildFeatures(Set<AzureOcrFeature> features) {

-        var features = new ArrayList<DocumentAnalysisFeature>();
+        var azureFeatures = new ArrayList<DocumentAnalysisFeature>();

-        if (settings.isIdpEnabled()) {
-            features.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
+        if (features.contains(AzureOcrFeature.IDP)) {
+            azureFeatures.add(DocumentAnalysisFeature.KEY_VALUE_PAIRS);
        }
-        features.add(DocumentAnalysisFeature.BARCODES);
+        azureFeatures.add(DocumentAnalysisFeature.BARCODES);

-        return features;
+        return azureFeatures;
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/BatchFactory.java
@ -0,0 +1,107 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.springframework.stereotype.Service;
+
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
+import com.pdftron.common.PDFNetException;
+import com.pdftron.pdf.Optimizer;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.AccessLevel;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+
+import lombok.experimental.FieldDefaults;
+
+@Service
+@RequiredArgsConstructor
+@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
+public class BatchFactory {
+
+    OcrServiceSettings settings;
+
+
+    public static String formatBatchFilename(int number) {
+
+        return "batch_%d.pdf".formatted(number);
+    }
+
+
+    @SneakyThrows
+    public List<PageBatch> splitIntoBatches(PDFDoc pdfDoc, OcrExecutionSupervisor supervisor, Set<AzureOcrFeature> features, Path fileDir) {
+
+        Set<Integer> pagesWithImages = ImageDetectionService.findPagesToProcess(pdfDoc, features);
+        supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
+
+        return buildBatches(pdfDoc, supervisor, features, fileDir, pagesWithImages);
+    }
+
+
+    public List<PageBatch> buildBatches(PDFDoc pdfDoc,
+                                        OcrExecutionSupervisor supervisor,
+                                        Set<AzureOcrFeature> features,
+                                        Path fileDir,
+                                        Set<Integer> pagesWithImages) throws PDFNetException {
+
+        List<PageBatch> batches = new ArrayList<>();
+        List<Integer> numbersForCurrentBatch = new ArrayList<>();
+        for (int pageNumber = 1; pageNumber <= pdfDoc.getPageCount(); pageNumber++) {
+            if (!features.contains(AzureOcrFeature.ALL_PAGES) && !pagesWithImages.contains(pageNumber)) {
+                supervisor.logPageSkipped(pageNumber);
+                continue;
+            }
+            numbersForCurrentBatch.add(pageNumber);
+            if (numbersForCurrentBatch.size() == settings.getBatchSize()) {
+                batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
+                numbersForCurrentBatch = new ArrayList<>();
+            }
+        }
+        if (!numbersForCurrentBatch.isEmpty()) {
+            batches.add(create(batches.size(), pdfDoc, numbersForCurrentBatch, fileDir));
+        }
+        return batches;
+    }
+
+
+    @SneakyThrows
+    public static PageBatch create(int number, PDFDoc pdfDoc, List<Integer> pageNumbers, Path fileDir) {
+
+        if (pageNumbers.isEmpty()) {
+            throw new IllegalArgumentException("pageNumbers must not be empty");
+        }
+
+        Path batchDocPath = fileDir.resolve(formatBatchFilename(number));
+        try (var batchDoc = extractBatchDocument(pdfDoc, pageNumbers)) {
+            Optimizer.optimize(batchDoc);
+            batchDoc.save(batchDocPath.toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
+        }
+        return new PageBatch(number, pageNumbers, batchDocPath, fileDir);
+    }
+
+
+    private static PDFDoc extractBatchDocument(PDFDoc pdfDoc, List<Integer> pageBatch) throws PDFNetException {
+
+        if (pageBatch.isEmpty()) {
+            throw new IllegalArgumentException();
+        }
+        PDFDoc batchDoc = new PDFDoc();
+        pageBatch.forEach(pageNumber -> addPageToNewDoc(pageNumber, pdfDoc, batchDoc));
+        return batchDoc;
+    }
+
+
+    @SneakyThrows
+    private static void addPageToNewDoc(Integer pageNumber, PDFDoc pdfDoc, PDFDoc batchDoc) {
+
+        batchDoc.pagePushBack(pdfDoc.getPage(pageNumber));
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/ImageDetectionService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/ImageDetectionService.java
@ -7,28 +7,26 @@ import java.util.Set;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.pdftron.common.PDFNetException;
 import com.pdftron.pdf.Element;
 import com.pdftron.pdf.ElementReader;
 import com.pdftron.pdf.PDFDoc;

 import lombok.SneakyThrows;
+import lombok.experimental.UtilityClass;

-@Service
+@UtilityClass
 public class ImageDetectionService {

    // any image with smaller height and width than this gets thrown out, see everyPointInDashedLineIsImage.pdf
    private static final int PIXEL_THRESHOLD = 0;
-    private final OcrServiceSettings ocrServiceSettings;
-
-
-    public ImageDetectionService(OcrServiceSettings ocrServiceSettings) {this.ocrServiceSettings = ocrServiceSettings;}


    @SneakyThrows
-    public Set<Integer> findPagesToProcess(PDFDoc pdfDoc) {
+    public Set<Integer> findPagesToProcess(PDFDoc pdfDoc, Set<AzureOcrFeature> features) {

-        if (ocrServiceSettings.isProcessAllPages()) {
+        if (features.contains(AzureOcrFeature.ALL_PAGES)) {
            Set<Integer> pages = new HashSet<>();
            for (int i = 1; i <= pdfDoc.getPageCount(); i++) {
                pages.add(i);
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
@ -8,6 +8,7 @@ import java.io.FileOutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
+import java.util.List;
 import java.util.Set;

 import org.springframework.stereotype.Service;
@ -16,10 +17,11 @@ import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
 import com.iqser.red.pdftronlogic.commons.OCGWatermarkRemovalService;
 import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.Statistics;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
 import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrResult;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
 import com.pdftron.pdf.PDFDoc;

@ -36,14 +38,15 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class OCRService {

+    public static final String IMAGE_PIPELINE_DIR = "image_pipeline";
+    public static final String AZURE_OUTPUT_DIR = "azure_output";
    IOcrMessageSender ocrMessageSender;
    WatermarkRemovalService watermarkRemovalService;
    InvisibleElementRemovalService invisibleElementRemovalService;
    PDFTronViewerDocumentService viewerDocumentService;
-    ImageDetectionService imageDetectionService;
+    BatchFactory batchFactory;
    AsyncOcrService asyncOcrService;
    OcrServiceSettings settings;
-    ImageProcessingPipeline imageProcessingPipeline;


    /**
@ -59,21 +62,21 @@ public class OCRService {
     * @param analyzeResultFile  result file with additional information
     */
    @Observed(name = "OCRService", contextualName = "run-ocr-on-document")
-    public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
+    public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {

-        if (removeWatermark) {
+        if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
            removeWatermark(documentFile);
        }

        removeInvisibleElements(documentFile);

-        log.info("Starting OCR for file {}", fileId);
+        log.info("Starting OCR");
        long ocrStart = System.currentTimeMillis();

-        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile).getStatistics();
+        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile, features).getStatistics();

        long ocrEnd = System.currentTimeMillis();
-        log.info("ocr successful for file with dossierId {} and fileId {}, took {}", dossierId, fileId, humanizeDuration(ocrEnd - ocrStart));
+        log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));

        if (settings.isDebug()) {
            logRuntimeBreakdown(ocrEnd, ocrStart, stats);
@ -117,10 +120,16 @@ public class OCRService {


    @SneakyThrows
-    public OcrExecutionSupervisor runOcr(Path tmpDir, File documentFile, File viewerDocumentFile, String fileId, String dossierId, File analyzeResultFile) {
+    public OcrExecutionSupervisor runOcr(Path tmpDir,
+                                         File documentFile,
+                                         File viewerDocumentFile,
+                                         String fileId,
+                                         String dossierId,
+                                         File analyzeResultFile,
+                                         Set<AzureOcrFeature> features) {

-        Path tmpImageDir = tmpDir.resolve("images");
-        Path azureOutputDir = tmpDir.resolve("azure_output");
+        Path tmpImageDir = tmpDir.resolve(IMAGE_PIPELINE_DIR);
+        Path azureOutputDir = tmpDir.resolve(AZURE_OUTPUT_DIR);

        Files.createDirectories(azureOutputDir);
        Files.createDirectories(tmpImageDir);
@ -132,19 +141,18 @@ public class OCRService {
            OcrExecutionSupervisor supervisor = new OcrExecutionSupervisor(pdfDoc.getPageCount(), ocrMessageSender, fileId, settings);
            supervisor.getStatistics().setStart();

-            Set<Integer> pagesWithImages = imageDetectionService.findPagesToProcess(pdfDoc);
-            ImageProcessingSupervisor imageSupervisor = null;
-            if (settings.isFontStyleDetection()) {
-                imageSupervisor = imageProcessingPipeline.run(pagesWithImages, tmpImageDir, documentFile);
-            }
+            List<PageBatch> batches = batchFactory.splitIntoBatches(pdfDoc, supervisor, features, tmpImageDir);

-            supervisor.logImageExtractionFinished(pdfDoc.getPageCount(), pagesWithImages.size());
-
-            OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, pagesWithImages, imageSupervisor);
+            OcrResult ocrResult = asyncOcrService.awaitOcr(pdfDoc, supervisor, features, batches);

            viewerDocumentService.addLayerGroups(documentFile, documentFile, ocrResult.regularLayers());
            viewerDocumentService.addLayerGroups(documentFile, viewerDocumentFile, ocrResult.debugLayers());

+            if (features.contains(AzureOcrFeature.ROTATION_CORRECTION)) {
+                RotationCorrectionUtility.rotatePages(documentFile.toPath(), documentFile.toPath(), ocrResult.anglesPerPage());
+                RotationCorrectionUtility.rotatePages(viewerDocumentFile.toPath(), viewerDocumentFile.toPath(), ocrResult.anglesPerPage());
+            }
+
            supervisor.getStatistics().drawingPdfFinished();

            supervisor.sendFinished();
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
@ -92,10 +92,10 @@ public class OcrExecutionSupervisor {
    }


-    public void finishMappingResult(PageBatch pageRange) {
+    public void finishMappingResult(PageBatch batch) {

-        pageRange.forEach(pageIndex -> countDownPagesToProcess.countDown());
-        statistics.getBatchStats(pageRange).finishWritingText();
+        batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
+        statistics.getBatchStats(batch).finishWritingText();
        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount());
    }

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
@ -0,0 +1,179 @@
+package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Rectangle2D;
+import java.nio.IntBuffer;
+import java.util.Optional;
+
+import com.azure.ai.documentintelligence.models.DocumentPage;
+import com.azure.ai.documentintelligence.models.DocumentWord;
+import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+import com.sun.jna.Pointer;
+
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import net.sourceforge.lept4j.Leptonica1;
+import net.sourceforge.lept4j.Numa;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.lept4j.util.LeptUtils;
+
+/**
+ * This class attempts to shrink the BBox of a word to match the exact height of the word. This is only attempted for horizontal or vertical words. Any askew text is left as is.
+ */
+@Slf4j
+public class BBoxSnuggificationService {
+
+    public static final int PIXEL_COUNT_THRESHOLD = 2; // minimum active pixel count per row for shrinking to stop
+    private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
+    public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
+
+    private enum Operation {
+        HORIZONTAL,
+        VERTICAL,
+        NONE
+    }
+
+
+    @SneakyThrows
+    public static Optional<QuadPoint> snuggify(Pix pageImage, DocumentWord origin, AffineTransform resultToImageTransform) {
+
+        if (pageImage == null) {
+            return Optional.empty();
+        }
+
+        QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
+        double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
+        QuadPoint.Direction direction = originTransformed.getDirection();
+
+        Operation operation = determineOperation(origin, direction, remainingAngle, originTransformed);
+
+        if (operation == Operation.NONE) {
+            return Optional.empty();
+        }
+
+        Pix wordImage = WritableOcrResultFactory.extractWordImage(originTransformed, pageImage);
+
+        if (wordImage == null) {
+            log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
+            return Optional.empty();
+        }
+
+        Optional<Rectangle2D> snugBox = switch (operation) {
+            case HORIZONTAL -> snuggifyY(wordImage, originTransformed.getBounds2D());
+            case VERTICAL -> snuggifyX(wordImage, originTransformed.getBounds2D());
+            default -> Optional.empty();
+        };
+        LeptUtils.disposePix(wordImage);
+
+        AffineTransform imageToResultTransform = resultToImageTransform.createInverse();
+        return snugBox.map(snugBBox -> QuadPoint.fromRectangle2D(snugBBox, direction))
+                .map(bbox -> bbox.getTransformed(imageToResultTransform));
+
+    }
+
+
+    private static Operation determineOperation(DocumentWord origin, QuadPoint.Direction direction, double remainingAngle, QuadPoint originTransformed) {
+
+        Operation operation = Operation.NONE;
+        if (((direction.equals(QuadPoint.Direction.RIGHT) || direction.equals(QuadPoint.Direction.LEFT)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) //
+            || (origin.getContent().length() < 4 || Math.abs(originTransformed.getAngle()) < AVERAGE_ANGLE_THRESHOLD * 3)) {
+            operation = Operation.HORIZONTAL;
+        } else if ((direction.equals(QuadPoint.Direction.UP) || direction.equals(QuadPoint.Direction.DOWN)) && remainingAngle < INDIVIDUAL_ANGLE_THRESHOLD) {
+            operation = Operation.VERTICAL;
+        }
+        return operation;
+    }
+
+
+    private static Optional<Rectangle2D> snuggifyX(Pix wordImage, Rectangle2D origin) {
+
+        Numa colCounts = Leptonica1.pixCountPixelsByColumn(wordImage);
+        int start = 0;
+        int end = wordImage.w - PIXEL_COUNT_THRESHOLD;
+        for (int i = start; i < Math.min(wordImage.w, 25); i++) {
+            if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
+                start = i;
+                break;
+            }
+        }
+        for (int i = end; i > Math.max(0, wordImage.w - 25); i--) {
+            if (pixCountPerColumn(i, colCounts) > PIXEL_COUNT_THRESHOLD) {
+                end = i;
+                break;
+            }
+        }
+        if (start == 0 && end == wordImage.w) {
+            return Optional.empty();
+        }
+
+        return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
+    }
+
+
+    private static Optional<Rectangle2D> snuggifyY(Pix wordImage, Rectangle2D origin) {
+
+        int start = 0;
+        int end = wordImage.h - 1;
+        for (int i = start; i < Math.min(wordImage.h, 25); i++) {
+            if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
+                start = i;
+                break;
+            }
+        }
+        for (int i = end; i > Math.max(0, wordImage.h - 25); i--) {
+            if (pixCountPerRow(i, wordImage) > PIXEL_COUNT_THRESHOLD) {
+                end = i;
+                break;
+            }
+        }
+        if (start == 0 && end == wordImage.h) {
+            return Optional.empty();
+        }
+        return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
+    }
+
+
+    private static int pixCountPerRow(int row, Pix pix) {
+
+        IntBuffer result = IntBuffer.allocate(1);
+        int success = Leptonica1.pixCountPixelsInRow(pix, row, result, null);
+        if (success == 0) {
+            return result.get();
+        } else {
+            return -1;
+        }
+    }
+
+
+    private static int pixCountPerColumn(int column, Numa colCounts) {
+
+        if (column > colCounts.n) {
+            throw new IndexOutOfBoundsException("column " + column + " is out of bounds for column count " + colCounts.n);
+        }
+        Pointer pointer = colCounts.array.getPointer();
+
+        // Read the float value at position i. Each float takes 4 bytes.
+        return (int) pointer.getFloat((long) column * Float.BYTES);
+    }
+
+
+    public static boolean canBeSnuggified(DocumentPage resultPage, AffineTransform imageTransform) {
+
+        double averageAngle = resultPage.getWords()
+                .stream()
+                .filter(word -> word.getContent().length() >= 4)
+                .map(DocumentWord::getPolygon)
+                .map(QuadPoint::fromPolygons)
+                .map(qp -> qp.getTransformed(imageTransform))
+                .filter(qp -> qp.getDirection().equals(QuadPoint.Direction.RIGHT))
+                .mapToDouble(QuadPoint::getAngle)
+                .map(Math::toDegrees)
+                .map(RotationCorrectionUtility::getRemainingAngle).average()
+                .orElse(Double.MAX_VALUE);
+
+        return Math.abs(averageAngle) < AVERAGE_ANGLE_THRESHOLD;
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/FontStyleDetector.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/FontStyleDetector.java
@ -84,6 +84,7 @@ public class FontStyleDetector implements Closeable {
                wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.BOLD_INSTANCE);
                wordImage.textPosition().setFontStyle(FontStyle.BOLD);
            } else {
+                wordImage.textPosition().setFontMetricsProvider(Type0FontMetricsProvider.REGULAR_INSTANCE);
                wordImage.textPosition().setFontStyle(FontStyle.REGULAR);
            }
        }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
@ -1,16 +1,20 @@
 package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

+import static com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils.formatIntervals;
+
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

+import org.slf4j.MDC;
+
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -27,7 +31,7 @@ public class GhostScriptOutputHandler extends Thread {

    // If the stdError or stdOut buffer of a thread is not being emptied it might lock the process in case of errors, so we need to empty both streams to prevent a deadlock.
    // Since both need to read simultaneously we need to implement the readers as separate threads.
-
+    final int batchIdx;
    final InputStream is;
    final String processName;
    final Type type;
@ -36,24 +40,32 @@ public class GhostScriptOutputHandler extends Thread {
    final Consumer<ImageFile> outputHandler;
    final Consumer<String> errorHandler;

+    final Map<String, String> parentMdcContext;
+
    int currentPageNumber;


-    public static GhostScriptOutputHandler stdError(InputStream is, Consumer<String> errorHandler) {
+    public static GhostScriptOutputHandler stdError(int batchIdx, InputStream is, Consumer<String> errorHandler) {

-        return new GhostScriptOutputHandler(is, "GS", Type.ERROR, null, null, errorHandler);
+        return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.ERROR, null, null, errorHandler, MDC.getCopyOfContextMap());
    }


-    public static GhostScriptOutputHandler stdOut(InputStream is, Map<Integer, ImageFile> pagesToProcess, Consumer<ImageFile> imageFileOutput, Consumer<String> errorHandler) {
+    public static GhostScriptOutputHandler stdOut(int batchIdx,
+                                                  InputStream is,
+                                                  Map<Integer, ImageFile> pagesToProcess,
+                                                  Consumer<ImageFile> imageFileOutput,
+                                                  Consumer<String> errorHandler) {

-        return new GhostScriptOutputHandler(is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler);
+        return new GhostScriptOutputHandler(batchIdx, is, "GS", Type.STD_OUT, pagesToProcess, imageFileOutput, errorHandler, MDC.getCopyOfContextMap());
    }


    @SneakyThrows
    public void run() {

+        MDC.setContextMap(parentMdcContext);
+
        try (InputStreamReader isr = new InputStreamReader(is); BufferedReader br = new BufferedReader(isr)) {

            String line;
@ -77,7 +89,9 @@ public class GhostScriptOutputHandler extends Thread {
            queueFinishedPage(currentPageNumber);

            if (!pagesToProcess.isEmpty()) {
-                errorHandler.accept(String.format("Ghostscript finished for batch, but pages %s remain unprocessed.", formatPagesToProcess()));
+                errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
+            } else {
+                log.info("{}: Batch rendered successfully!", batchIdx);
            }
        }

@ -86,10 +100,16 @@ public class GhostScriptOutputHandler extends Thread {

    private String formatPagesToProcess() {

-        var pages = new PageBatch();
-        pagesToProcess.keySet()
-                .forEach(pages::add);
-        return pages.toString();
+        List<String> intervals = formatIntervals(pagesToProcess.keySet()
+                                                         .stream()
+                                                         .sorted()
+                                                         .toList());
+        if (intervals.size() > 4) {
+            intervals = intervals.subList(0, 4);
+            intervals.add("...");
+        }
+
+        return String.join(", ", intervals);
    }


@ -106,7 +126,6 @@ public class GhostScriptOutputHandler extends Thread {
                currentPageNumber = pageNumber;
                return;
            }
-
            queueFinishedPage(currentPageNumber);
            currentPageNumber = pageNumber;
        }
@ -117,10 +136,10 @@ public class GhostScriptOutputHandler extends Thread {

        var imageFile = this.pagesToProcess.remove(pageNumber);
        if (imageFile == null) {
-            errorHandler.accept(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
+            errorHandler.accept(String.format("%d: Page number %d does not exist in this thread. It only has pagenumbers %s", batchIdx, pageNumber, pagesToProcess.keySet()));
        } else {
            if (!new File(imageFile.absoluteFilePath()).exists()) {
-                errorHandler.accept(String.format("Rendered page with number %d does not exist!", pageNumber));
+                errorHandler.accept(String.format("%d: Rendered page with number %d does not exist!", batchIdx, pageNumber));
            }
        }
        outputHandler.accept(imageFile);
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
@ -2,18 +2,15 @@ package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

 import java.io.InputStream;
 import java.nio.file.Path;
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
-import java.util.stream.Collectors;

 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.utils.ListSplittingUtils;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -25,132 +22,60 @@ import lombok.extern.slf4j.Slf4j;
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
-@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 142/144
+@SuppressWarnings("PMD") // can't figure out how to safely close the stdOut and stdError streams in line 72/74
 public class GhostScriptService {

-    public static final int BATCH_SIZE = 256;
    static String FORMAT = ".tiff";
    static String DEVICE = "tiffgray";
    static int DPI = 300;
-    static int PROCESS_COUNT = 1;


    @SneakyThrows
-    public void renderPagesBatched(List<Integer> pagesToProcess,
-                                   String documentAbsolutePath,
-                                   Path tmpImageDir,
-                                   ImageProcessingSupervisor supervisor,
-                                   Consumer<ImageFile> successHandler,
-                                   Consumer<String> errorHandler) {
+    public void startBatchRender(PageBatch batch, ImageProcessingSupervisor supervisor, Path renderedImageDir, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

-        List<List<ProcessInfo>> processInfoBatches = buildSubListForEachProcess(pagesToProcess,
-                                                                                PROCESS_COUNT,
-                                                                                BATCH_SIZE
-                                                                                * PROCESS_COUNT); // GS has a limit on how many pageIndices per call are possible, so we limit it to 256 pages per process
-        for (int batchIdx = 0; batchIdx < processInfoBatches.size(); batchIdx++) {
+        supervisor.requireNoErrors();

-            supervisor.requireNoErrors();
-
-            List<ProcessInfo> processInfos = processInfoBatches.get(batchIdx);
-
-            log.info("Batch {}: Running {} gs processes with ({}) pages each",
-                     batchIdx,
-                     processInfos.size(),
-                     processInfos.stream()
-                             .map(info -> info.pageNumbers().size())
-                             .map(String::valueOf)
-                             .collect(Collectors.joining(", ")));
-
-            int finalBatchIdx = batchIdx;
-            List<Process> processes = processInfos.stream()
-                    .parallel()
-                    .map(info -> buildCmdArgs(info.processIdx(), finalBatchIdx, info.pageNumbers(), tmpImageDir, documentAbsolutePath))
-                    .peek(s -> log.debug(String.join(" ", s.cmdArgs())))
-                    .map(processInfo -> executeProcess(processInfo, successHandler, errorHandler))
-                    .toList();
-
-            List<Integer> processExitCodes = new LinkedList<>();
-            for (Process process : processes) {
-                processExitCodes.add(process.waitFor());
-            }
-            log.info("Batch {}: Ghostscript processes finished with exit codes {}", batchIdx, processExitCodes);
-        }
-    }
-
-
-    private List<List<ProcessInfo>> buildSubListForEachProcess(List<Integer> stitchedPageNumbers, int processCount, int batchSize) {
-
-        // GhostScript command line can only handle so many page numbers at once, so we split it into batches
-        int batchCount = (int) Math.ceil((double) stitchedPageNumbers.size() / batchSize);
-
-        log.info("Splitting {} page renderings across {} process(es) in {} batch(es) with size {}", stitchedPageNumbers.size(), processCount, batchCount, batchSize);
-
-        List<List<ProcessInfo>> processInfoBatches = new ArrayList<>(batchCount);
-        List<List<List<Integer>>> batchedBalancedSublist = ListSplittingUtils.buildBatchedBalancedSublist(stitchedPageNumbers.stream()
-                                                                                                                  .sorted()
-                                                                                                                  .toList(), processCount, batchCount);
-
-        for (var batch : batchedBalancedSublist) {
-            List<ProcessInfo> processInfos = new ArrayList<>(processCount);
-            for (int threadIdx = 0; threadIdx < batch.size(); threadIdx++) {
-                List<Integer> balancedPageNumbersSubList = batch.get(threadIdx);
-                processInfos.add(new ProcessInfo(threadIdx, balancedPageNumbersSubList));
-            }
-            processInfoBatches.add(processInfos);
-        }
-        return processInfoBatches;
+        log.info("Batch {}: starting GhostScript rendering with {} pages", batch, batch.size());
+        executeProcess(batch.getIndex(), buildCmdArgs(batch, renderedImageDir, batch.getBatchDoc()), successHandler, errorHandler);
    }


    @SneakyThrows
-    private ProcessCmdsAndRenderedImageFiles buildCmdArgs(Integer processIdx,
-                                                          Integer batchIdx,
-                                                          List<Integer> stitchedImagePageIndices,
-                                                          Path outputDir,
-                                                          String documentAbsolutePath) {
+    private ProcessCmdsAndRenderedImageFiles buildCmdArgs(PageBatch batch, Path outputDir, Path document) {

-        String imagePathFormat = outputDir.resolve("output_" + processIdx + "_" + batchIdx + ".%04d" + FORMAT).toFile().toString();
+        String imagePathFormat = outputDir.resolve("output_" + batch.getIndex() + ".%04d" + FORMAT).toFile().toString();

        Map<Integer, ImageFile> fullPageImages = new HashMap<>();
-        for (int i = 0; i < stitchedImagePageIndices.size(); i++) {
-            Integer pageNumber = stitchedImagePageIndices.get(i);
-            fullPageImages.put(pageNumber, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
+        List<Integer> allPageNumbers = batch.getAllPageNumbers();
+
+        for (int i = 0; i < allPageNumbers.size(); i++) {
+            Integer pageNumber = allPageNumbers.get(i);
+            fullPageImages.put(i + 1, new ImageFile(pageNumber, String.format(imagePathFormat, i + 1)));
        }

-        String[] cmdArgs = buildCmdArgs(stitchedImagePageIndices, documentAbsolutePath, imagePathFormat);
+        String[] cmdArgs = buildCmdArgs(document, imagePathFormat);

        return new ProcessCmdsAndRenderedImageFiles(cmdArgs, fullPageImages);
    }


-    private String[] buildCmdArgs(List<Integer> pageNumbers, String documentAbsolutePath, String imagePathFormat) {
+    private String[] buildCmdArgs(Path document, String imagePathFormat) {

-        StringBuilder sPageList = new StringBuilder();
-        int i = 1;
-        for (Integer integer : pageNumbers) {
-            sPageList.append(integer);
-            if (i < pageNumbers.size()) {
-                sPageList.append(",");
-            }
-            i++;
-        }
-
-        return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sPageList=" + sPageList, "-sOutputFile=" + imagePathFormat, documentAbsolutePath, "-c", "quit"};
+        return new String[]{"gs", "-dNOPAUSE", "-sDEVICE=" + DEVICE, "-r" + DPI, "-sOutputFile=" + imagePathFormat, document.toFile().toString(), "-c", "quit"};
    }


    @SneakyThrows
-    private Process executeProcess(ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
+    private void executeProcess(int batchIdx, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

        Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
        InputStream stdOut = p.getInputStream();
-        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
+        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batchIdx, stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
        InputStream stdError = p.getErrorStream();
-        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(stdError, errorHandler);
+        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batchIdx, stdError, errorHandler);

        stdOutLogger.start();
        stdErrorLogger.start();
-        return p;
    }


@ -158,8 +83,4 @@ public class GhostScriptService {

    }

-    private record ProcessInfo(Integer processIdx, List<Integer> pageNumbers) {
-
-    }
-
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingPipeline.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingPipeline.java
@ -1,15 +1,14 @@
 package com.knecon.fforesight.service.ocr.processor.service.imageprocessing;

-import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
-import java.util.Set;
 import java.util.function.Consumer;

 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -21,29 +20,30 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class ImageProcessingPipeline {

+    public static final String PROCESSED_DIR = "processed";
+    public static final String RENDERED_DIR = "rendered";
+
    GhostScriptService ghostScriptService;
    ImageProcessingService imageProcessingService;


    @SneakyThrows
-    public ImageProcessingSupervisor run(Set<Integer> pageNumberSet, Path imageDir, File document) {
+    public ImageProcessingSupervisor addToPipeline(PageBatch batch) {

-        Path processedImageDir = imageDir.resolve("processed");
-        Path renderedImageDir = imageDir.resolve("rendered");
+        Path processedImageDir = batch.getImagePipelineDir().resolve(PROCESSED_DIR);
+        Path renderedImageDir = batch.getImagePipelineDir().resolve(RENDERED_DIR);

        Files.createDirectories(renderedImageDir);
        Files.createDirectories(processedImageDir);

-        List<Integer> pageNumbers = pageNumberSet.stream()
-                .sorted()
-                .toList();
+        List<Integer> pageNumbers = batch.getAllPageNumbers();

        ImageProcessingSupervisor supervisor = new ImageProcessingSupervisor(pageNumbers);

        Consumer<ImageFile> renderingSuccessConsumer = imageFile -> imageProcessingService.addToProcessingQueue(imageFile, processedImageDir, supervisor);
        Consumer<String> renderingErrorConsumer = supervisor::markError;

-        ghostScriptService.renderPagesBatched(pageNumbers, document.toString(), renderedImageDir, supervisor, renderingSuccessConsumer, renderingErrorConsumer);
+        ghostScriptService.startBatchRender(batch, supervisor, renderedImageDir, renderingSuccessConsumer, renderingErrorConsumer);

        return supervisor;
    }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
@ -38,6 +38,7 @@ public class ImageProcessingService {
                try {
                    process(processParams.unprocessedImage(), processParams.outputDir, processParams.supervisor());
                } catch (Exception e) {
+                    processParams.supervisor.markPageFinished(processParams.unprocessedImage());
                    log.error(e.getMessage(), e);
                }
            }
@ -54,31 +55,31 @@ public class ImageProcessingService {
    }


-    @SneakyThrows
    private void process(ImageFile unprocessedImage, Path outputDir, ImageProcessingSupervisor supervisor) {

-        supervisor.requireNoErrors();
+        String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
+        ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
+        try {
+            synchronized (ImageProcessingSupervisor.class) {
+                // Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
+                Pix processedPix;
+                Pix pix = unprocessedImage.readPix();

-        synchronized (ImageProcessingSupervisor.class) {
-            // Leptonica is not thread safe, but is being called in WritableOcrResultFactory as well
-            Pix processedPix;
-            Pix pix = unprocessedImage.readPix();
+                processedPix = processPix(pix);
+                Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);

-            String absoluteFilePath = outputDir.resolve(Path.of(unprocessedImage.absoluteFilePath()).getFileName()).toFile().toString();
+                LeptUtils.disposePix(pix);
+                LeptUtils.disposePix(processedPix);

-            processedPix = processPix(pix);
-            Leptonica1.pixWrite(absoluteFilePath, processedPix, ILeptonica.IFF_TIFF_PACKBITS);
-
-            LeptUtils.disposePix(pix);
-            LeptUtils.disposePix(processedPix);
-
-            ImageFile imageFile = new ImageFile(unprocessedImage.pageNumber(), absoluteFilePath);
+            }
+        } catch (Exception e) {
+            supervisor.markError(e.getMessage());
+        } finally {
            supervisor.markPageFinished(imageFile);
        }
    }


-    @SneakyThrows
    private Pix processPix(Pix pix) {

        Pix binarized;
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
@ -63,7 +63,7 @@ public class ImageProcessingSupervisor {

    private boolean hasErrors() {

-        return errors.isEmpty();
+        return !errors.isEmpty();
    }


@ -86,7 +86,7 @@ public class ImageProcessingSupervisor {
        if (this.errors.isEmpty()) {
            return;
        }
-        throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors));
+        throw new IllegalStateException("Error(s) occurred during image processing: " + String.join("\n", errors.subList(0, Math.min(errors.size(), 3))));
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/FontStyler.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/FontStyler.java
@ -0,0 +1,23 @@
+package com.knecon.fforesight.service.ocr.processor.visualizations;
+
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+import com.azure.ai.documentintelligence.models.DocumentSpan;
+import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
+
+public class FontStyler {
+
+
+    public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
+
+        public static Lookups empty() {
+
+            return new Lookups(new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()),
+                               new SpanLookup<>(Stream.empty(), Function.identity()));
+        }
+
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResult.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResult.java
@ -20,6 +20,7 @@ import lombok.experimental.FieldDefaults;
 public final class WritableOcrResult {

    int pageNumber;
+    double angle;
    @Builder.Default
    List<TextPositionInImage> textPositionInImage = Collections.emptyList();
    @Builder.Default
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResultFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/WritableOcrResultFactory.java
@ -11,6 +11,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@ -25,54 +27,67 @@ import com.azure.ai.documentintelligence.models.DocumentTableCell;
 import com.azure.ai.documentintelligence.models.DocumentWord;
 import com.azure.ai.documentintelligence.models.FontWeight;
 import com.google.common.base.Functions;
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
+import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
 import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
 import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
-import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontStyle;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.Type0FontMetricsProvider;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;

 import lombok.AccessLevel;
 import lombok.Getter;
 import lombok.SneakyThrows;
 import lombok.experimental.FieldDefaults;
+import lombok.extern.slf4j.Slf4j;
 import net.sourceforge.lept4j.Box;
 import net.sourceforge.lept4j.Leptonica1;
 import net.sourceforge.lept4j.Pix;
 import net.sourceforge.lept4j.util.LeptUtils;

+@Slf4j
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class WritableOcrResultFactory {

-    FontMetricsProvider regularFont = Type0FontMetricsProvider.REGULAR_INSTANCE;
-    FontMetricsProvider boldFont = Type0FontMetricsProvider.BOLD_INSTANCE;
-    FontMetricsProvider italicFont = Type0FontMetricsProvider.ITALIC_INSTANCE;
-    FontMetricsProvider boldItalicFont = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
-
    @Getter
-    Map<Integer, AffineTransform> pageCtms;
+    Map<Integer, AffineTransform> resultToPageTransforms;
    Map<Integer, PageInformation> pageInformation;
+    ImageProcessingPipeline imageProcessingPipeline;
    OcrServiceSettings settings;
-    ImageProcessingSupervisor imageSupervisor;
+    Set<AzureOcrFeature> features;


    @SneakyThrows
-    public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation, OcrServiceSettings settings, ImageProcessingSupervisor imageSupervisor) {
+    public WritableOcrResultFactory(Map<Integer, PageInformation> pageInformation,
+                                    ImageProcessingPipeline imageProcessingPipeline,
+                                    OcrServiceSettings settings,
+                                    Set<AzureOcrFeature> features) {

+        this.imageProcessingPipeline = imageProcessingPipeline;
        this.pageInformation = pageInformation;
-        pageCtms = Collections.synchronizedMap(new HashMap<>());
+        resultToPageTransforms = Collections.synchronizedMap(new HashMap<>());
        this.settings = settings;
-        this.imageSupervisor = imageSupervisor;
+        this.features = features;
    }


-    public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch pageOffset) throws InterruptedException {
+    public List<WritableOcrResult> buildOcrResultToWrite(AnalyzeResult analyzeResult, PageBatch batch) throws InterruptedException {
+
+        Map<Integer, Double> anglesPerPage = analyzeResult.getPages()
+                .stream()
+                .collect(Collectors.toMap(DocumentPage::getPageNumber, documentPage -> -documentPage.getAngle()));
+        RotationCorrectionUtility.rotatePages(batch.getBatchDoc(), batch.getBatchDoc(), anglesPerPage);
+
+        ImageProcessingSupervisor imageSupervisor = imageProcessingPipeline.addToPipeline(batch);

        List<WritableOcrResult> writableOcrResultList = new ArrayList<>();

@ -80,16 +95,16 @@ public class WritableOcrResultFactory {

        for (DocumentPage resultPage : analyzeResult.getPages()) {

-            PageInformation pageInformation = getPageInformation(getPageNumber(pageOffset, resultPage));
-            AffineTransform pageCtm = getPageCTM(pageInformation, resultPage.getWidth());
-            pageCtms.put(getPageNumber(pageOffset, resultPage), pageCtm);
+            PageInformation pageInformation = getPageInformation(getPageNumber(batch, resultPage));
+            AffineTransform resultToPageTransform = buildResultToPageTransform(pageInformation, resultPage.getWidth());
+            resultToPageTransforms.put(getPageNumber(batch, resultPage), resultToPageTransform);

-            List<TextPositionInImage> words = buildTextPositionsInImage(pageOffset, resultPage, pageCtm, lookups, pageInformation);
+            List<TextPositionInImage> words = buildTextPositionsInImage(batch, resultPage, resultToPageTransform, lookups, pageInformation, imageSupervisor);

-            var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words);
+            var builder = WritableOcrResult.builder().pageNumber(pageInformation.number()).textPositionInImage(words).angle(-resultPage.getAngle());

            if (settings.isDrawTablesAsLines()) {
-                builder.tableLines(getTableLines(analyzeResult, pageInformation, pageCtm));
+                builder.tableLines(getTableLines(analyzeResult, pageInformation, resultToPageTransform));
            }

            writableOcrResultList.add(builder.build());
@ -101,46 +116,74 @@ public class WritableOcrResultFactory {

    private List<TextPositionInImage> buildTextPositionsInImage(PageBatch pageOffset,
                                                                DocumentPage resultPage,
-                                                                AffineTransform pageCtm,
+                                                                AffineTransform resultToPageTransform,
                                                                Lookups lookups,
-                                                                PageInformation pageInformation) throws InterruptedException {
+                                                                PageInformation pageInformation,
+                                                                ImageProcessingSupervisor imageSupervisor) throws InterruptedException {

-        if (!settings.isFontStyleDetection()) {
-            return buildText(resultPage, pageCtm, lookups, pageInformation);
+        if (!settings.isSnuggify() && !features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
+            return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
        }

        ImageFile imageFile = imageSupervisor.awaitProcessedPage(getPageNumber(pageOffset, resultPage));

        if (imageFile == null) {
-            return buildText(resultPage, pageCtm, lookups, pageInformation);
+            return buildText(resultPage, resultToPageTransform, lookups, pageInformation);
        }

        synchronized (ImageProcessingSupervisor.class) {
-            return buildTextWithBoldDetection(resultPage, pageCtm, pageInformation, imageFile);
+            // Leptonica is not thread safe, but is being called in ImageProcessingService as well
+
+            if (features.contains(AzureOcrFeature.FONT_STYLE_DETECTION)) {
+                return buildTextWithBoldDetection(resultPage, resultToPageTransform, pageInformation, imageFile);
+            }
+
+            return buildTextWithSnugBBoxes(resultPage, imageFile, resultToPageTransform, lookups, pageInformation);
        }
    }


-    private static List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage, AffineTransform pageCtm, PageInformation pageInformation, ImageFile imageFile) {
-        // Leptonica is not thread safe, but is being called in ImageProcessingService as well
+    @SneakyThrows
+    private List<TextPositionInImage> buildTextWithBoldDetection(DocumentPage resultPage,
+                                                                 AffineTransform resultToPageTransform,
+                                                                 PageInformation pageInformation,
+                                                                 ImageFile imageFile) {
+
        Pix pageImage = imageFile.readPix();
        List<TextPositionInImage> words = new ArrayList<>();

        try (FontStyleDetector fontStyleDetector = new FontStyleDetector()) {

-            AffineTransform imageTransform = new AffineTransform();
-            double scalingFactor = pageImage.w / resultPage.getWidth();
-            imageTransform.scale(scalingFactor, scalingFactor);
+            AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);

            for (DocumentWord word : resultPage.getWords()) {
-
-                TextPositionInImage textPosition = new TextPositionInImage(word, pageCtm, Type0FontMetricsProvider.REGULAR_INSTANCE, FontStyle.REGULAR);
+                TextPositionInImage textPosition;
+                if (canBeSnuggified(resultPage, resultToImageTransform)) {
+                    textPosition = buildTextPositionInImageWithSnugBBox(word,
+                                                                        resultToPageTransform,
+                                                                        new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE),
+                                                                        pageImage,
+                                                                        resultToImageTransform);
+                } else {
+                    textPosition = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                           word.getContent(),
+                                                           resultToPageTransform,
+                                                           new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).font(),
+                                                           new FontInformation(FontStyle.REGULAR, Type0FontMetricsProvider.REGULAR_INSTANCE).fontStyle(),
+                                                           false);
+                }

                if (intersectsIgnoreZone(pageInformation.wordBBoxes(), textPosition)) {
                    textPosition.setOverlapsIgnoreZone(true);
                }

-                Pix wordImage = extractWordImage(word, imageTransform, pageImage);
+                QuadPoint originTransformed = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(resultToImageTransform);
+                Pix wordImage = extractWordImage(originTransformed, pageImage);
+
+                if (wordImage == null) {
+                    log.debug("Unable to extract word image! wordImage: {}, pageImage {}", originTransformed.getBounds2D(), new Rectangle2D.Float(0, 0, pageImage.w, pageImage.h));
+                    continue;
+                }

                IntBuffer pixelCount = IntBuffer.allocate(1);
                Leptonica1.pixCountPixels(wordImage, pixelCount, null);
@ -162,9 +205,31 @@ public class WritableOcrResultFactory {
    }


-    private static Pix extractWordImage(DocumentWord word, AffineTransform imageTransform, Pix pageImage) {
+    @SneakyThrows
+    public static AffineTransform buildResultToImageTransform(DocumentPage resultPage, Pix pageImage) {
+
+        int quadrant = RotationCorrectionUtility.getQuadrantRotation(-resultPage.getAngle());
+        AffineTransform rotationCorrection = RotationCorrectionUtility.buildTransform(-resultPage.getAngle(), pageImage.w, pageImage.h);
+        AffineTransform imageTransform = new AffineTransform();
+        double scalingFactor = switch (quadrant) {
+            case 1, 3 -> pageImage.h / resultPage.getWidth();
+            default -> pageImage.w / resultPage.getWidth();
+        };
+        imageTransform.concatenate(rotationCorrection);
+        imageTransform.scale(scalingFactor, scalingFactor);
+        return imageTransform;
+    }
+
+
+    public static Pix extractWordImage(QuadPoint wordPosition, Pix pageImage) {
+
+        Rectangle2D wordBBox = wordPosition.getBounds2D();
+        Rectangle2D pageBBox = new Rectangle2D.Double(0, 0, pageImage.w, pageImage.h);
+
+        if (!pageBBox.contains(wordBBox)) {
+            return null;
+        }

-        Rectangle2D wordBBox = QuadPoint.fromPolygons(word.getPolygon()).getTransformed(imageTransform).getBounds2D();
        Box box = new Box((int) wordBBox.getX(), (int) wordBBox.getY(), (int) wordBBox.getWidth(), (int) wordBBox.getHeight(), 1);
        Pix wordImage = Leptonica1.pixClipRectangle(pageImage, box, null);
        box.clear();
@ -172,19 +237,65 @@ public class WritableOcrResultFactory {
    }


-    private List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {
+    public List<TextPositionInImage> buildTextWithSnugBBoxes(DocumentPage resultPage,
+                                                             ImageFile imageFile,
+                                                             AffineTransform pageCtm,
+                                                             Lookups lookups,
+                                                             PageInformation pageInformation) {
+
+        Pix pageImage = imageFile.readPix();
+        AffineTransform resultToImageTransform = buildResultToImageTransform(resultPage, pageImage);
+
+        boolean snuggify = canBeSnuggified(resultPage, resultToImageTransform);
+
+        List<TextPositionInImage> list = new ArrayList<>();
+        for (DocumentWord word : resultPage.getWords()) {
+
+            FontInformation fontInformation = FontInformation.determineStyle(word, lookups);
+
+            TextPositionInImage textPositionInImage;
+            if (snuggify) {
+                textPositionInImage = buildTextPositionInImageWithSnugBBox(word, pageCtm, fontInformation, pageImage, resultToImageTransform);
+            } else {
+                textPositionInImage = new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                              word.getContent(),
+                                                              pageCtm,
+                                                              fontInformation.font(),
+                                                              fontInformation.fontStyle(),
+                                                              false);
+            }
+            markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes());
+            list.add(textPositionInImage);
+        }
+        LeptUtils.disposePix(pageImage);
+        return list;
+    }
+
+
+    private boolean canBeSnuggified(DocumentPage resultPage, AffineTransform resultToImageTransform) {
+
+        return settings.isSnuggify() && BBoxSnuggificationService.canBeSnuggified(resultPage, resultToImageTransform);
+    }
+
+
+    public List<TextPositionInImage> buildText(DocumentPage resultPage, AffineTransform pageCtm, Lookups lookups, PageInformation pageInformation) {

        return resultPage.getWords()
                .stream()
-                .map(word -> buildTextPositionInImage(word, pageCtm, lookups))
+                .map(word -> new TextPositionInImage(QuadPoint.fromPolygons(word.getPolygon()),
+                                                     word.getContent(),
+                                                     pageCtm,
+                                                     FontInformation.determineStyle(word, lookups).font(),
+                                                     FontInformation.determineStyle(word, lookups).fontStyle(),
+                                                     false))
                .map(textPositionInImage -> markTextOverlappingIgnoreZone(textPositionInImage, pageInformation.wordBBoxes()))
                .collect(Collectors.toList());
    }


-    private static int getPageNumber(PageBatch pageOffset, DocumentPage resultPage) {
+    private static int getPageNumber(PageBatch pageBatch, DocumentPage resultPage) {

-        return pageOffset.getPageNumber(resultPage.getPageNumber());
+        return pageBatch.getPageNumber(resultPage.getPageNumber());
    }


@ -214,36 +325,53 @@ public class WritableOcrResultFactory {
                                                                              .flatMap(Collection::stream), Functions.identity());

        return new Lookups(boldLookup, italicLookup, handWrittenLookup);
-
    }


-    private TextPositionInImage buildTextPositionInImage(DocumentWord dw, AffineTransform imageCTM, Lookups lookups) {
+    @SneakyThrows
+    private TextPositionInImage buildTextPositionInImageWithSnugBBox(DocumentWord dw,
+                                                                     AffineTransform imageCTM,
+                                                                     FontInformation fontInformation,
+                                                                     Pix pageImage,
+                                                                     AffineTransform resultToImageTransform) {

-        boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
-        boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
-        boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
+        QuadPoint origin = QuadPoint.fromPolygons(dw.getPolygon());

-        FontStyle fontStyle;
-        FontMetricsProvider font;
-        if (handwritten) {
-            fontStyle = FontStyle.HANDWRITTEN;
-            font = regularFont;
-        } else if (italic && bold) {
-            fontStyle = FontStyle.BOLD_ITALIC;
-            font = boldItalicFont;
-        } else if (bold) {
-            fontStyle = FontStyle.BOLD;
-            font = boldFont;
-        } else if (italic) {
-            fontStyle = FontStyle.ITALIC;
-            font = italicFont;
-        } else {
-            fontStyle = FontStyle.REGULAR;
-            font = regularFont;
+        Optional<QuadPoint> snugBBox = BBoxSnuggificationService.snuggify(pageImage, dw, resultToImageTransform);
+
+        return new TextPositionInImage(snugBBox.orElse(origin), dw.getContent(), imageCTM, fontInformation.font(), fontInformation.fontStyle(), snugBBox.isPresent());
+    }
+
+
+    private record FontInformation(FontStyle fontStyle, FontMetricsProvider font) {
+
+        public static FontInformation determineStyle(DocumentWord dw, Lookups lookups) {
+
+            boolean bold = lookups.bold().containedInAnySpan(dw.getSpan());
+            boolean italic = lookups.italic().containedInAnySpan(dw.getSpan());
+            boolean handwritten = lookups.handwritten().containedInAnySpan(dw.getSpan());
+
+            FontStyle fontStyle;
+            FontMetricsProvider font;
+            if (handwritten) {
+                fontStyle = FontStyle.HANDWRITTEN;
+                font = Type0FontMetricsProvider.REGULAR_INSTANCE;
+            } else if (italic && bold) {
+                fontStyle = FontStyle.BOLD_ITALIC;
+                font = Type0FontMetricsProvider.BOLD_ITALIC_INSTANCE;
+            } else if (bold) {
+                fontStyle = FontStyle.BOLD;
+                font = Type0FontMetricsProvider.BOLD_INSTANCE;
+            } else if (italic) {
+                fontStyle = FontStyle.ITALIC;
+                font = Type0FontMetricsProvider.ITALIC_INSTANCE;
+            } else {
+                fontStyle = FontStyle.REGULAR;
+                font = Type0FontMetricsProvider.REGULAR_INSTANCE;
+            }
+            return new FontInformation(fontStyle, font);
        }

-        return new TextPositionInImage(dw, imageCTM, font, fontStyle);
    }


@ -307,7 +435,7 @@ public class WritableOcrResultFactory {
    }


-    public static AffineTransform getPageCTM(PageInformation pageInformation, double imageWidth) {
+    public static AffineTransform buildResultToPageTransform(PageInformation pageInformation, double imageWidth) {

        double scalingFactor = calculateScalingFactor(imageWidth, pageInformation);
        AffineTransform imageToCropBoxScaling = new AffineTransform(scalingFactor, 0, 0, scalingFactor, 0, 0);
@ -353,7 +481,7 @@ public class WritableOcrResultFactory {
    }


-    private record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {
+    public record Lookups(SpanLookup<DocumentSpan> bold, SpanLookup<DocumentSpan> italic, SpanLookup<DocumentSpan> handwritten) {

        public static Lookups empty() {

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/fonts/FontMetricsProvider.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/fonts/FontMetricsProvider.java
@ -8,7 +8,7 @@ import lombok.SneakyThrows;

 public interface FontMetricsProvider extends EmbeddableFont {

-    default FontMetrics calculateMetrics(String text, double textWidth, double textHeight) {
+    default FontMetrics calculateMetricsForAzureBBox(String text, double textWidth, double textHeight) {

        HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
        float fontSize = calculateFontSize(text, textWidth);
@ -18,6 +18,16 @@ public interface FontMetricsProvider extends EmbeddableFont {
    }


+    default FontMetrics calculateMetricsForTightBBox(String text, double textWidth, double textHeight) {
+
+        HeightAndDescent heightAndDescent = calculateHeightAndDescent(text);
+        float fontSize = calculateFontSize(text, textWidth);
+        float heightScaling = (float) ((textHeight / (heightAndDescent.height() - heightAndDescent.descent())) * 1000) / fontSize;
+
+        return new FontMetrics((heightAndDescent.descent() / 1000) * fontSize, fontSize, heightScaling);
+    }
+
+
    @SneakyThrows
    default float calculateFontSize(String text, double textWidth) {

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
@ -42,7 +42,7 @@ import lombok.experimental.FieldDefaults;
 public class IdpLayer extends IdpLayerConfig {

    public static final int LINE_WIDTH = 1;
-    private Map<Integer, AffineTransform> pageCtms;
+    private Map<Integer, AffineTransform> resultToPageTransform;


    public void addSection(int pageNumber, DocumentSection section, SpanLookup<DocumentWord> wordsOnPage) {
@ -65,7 +65,7 @@ public class IdpLayer extends IdpLayerConfig {

        var sectionsOnPage = getOrCreateVisualizationsOnPage(pageNumber, vis);

-        sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(pageCtms.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
+        sectionsOnPage.getColoredRectangles().add(new ColoredRectangle(bbox.getTransformed(resultToPageTransform.get(pageNumber)).getBounds2D(), color, LINE_WIDTH));
    }


@ -89,15 +89,20 @@ public class IdpLayer extends IdpLayerConfig {
        if (keyValue.getValue() != null) {
            addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);

-            if (keyValue.getKey().getBoundingRegions().get(0).getPageNumber() != keyValue.getValue().getBoundingRegions().get(0).getPageNumber()) {
+            if (keyValue.getKey().getBoundingRegions()
+                        .get(0).getPageNumber() != keyValue.getValue().getBoundingRegions()
+                        .get(0).getPageNumber()) {
                return;
            }
-            int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions().get(0).getPageNumber());
-            QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions().get(0).getPolygon());
-            QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions().get(0).getPolygon());
+            int pageNumberWithOffset = pageOffset.getPageNumber(keyValue.getKey().getBoundingRegions()
+                                                                        .get(0).getPageNumber());
+            QuadPoint key = QuadPoint.fromPolygons(keyValue.getKey().getBoundingRegions()
+                                                           .get(0).getPolygon());
+            QuadPoint value = QuadPoint.fromPolygons(keyValue.getValue().getBoundingRegions()
+                                                             .get(0).getPolygon());

            var line = LineUtils.findClosestMidpointLine(key, value);
-            line = LineUtils.transform(line, pageCtms.get(pageNumberWithOffset));
+            line = LineUtils.transform(line, resultToPageTransform.get(pageNumberWithOffset));
            var arrowHead = LineUtils.createArrowHead(line, Math.min(LineUtils.length(line), 5));
            var linesOnPage = getOrCreateVisualizationsOnPage(pageNumberWithOffset, keyValuePairs).getColoredLines();
            linesOnPage.add(new ColoredLine(line, KEY_VALUE_BBOX_COLOR, LINE_WIDTH));
@ -142,7 +147,7 @@ public class IdpLayer extends IdpLayerConfig {
    private void addPolygon(int pageNumber, List<Double> polygon, Visualizations visualizations, Color color) {

        VisualizationsOnPage visualizationsOnPage = getOrCreateVisualizationsOnPage(pageNumber, visualizations);
-        visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(pageCtms.get(pageNumber)), color));
+        visualizationsOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(QuadPoint.fromPolygons(polygon).getTransformed(resultToPageTransform.get(pageNumber)), color));
    }


@ -181,7 +186,8 @@ public class IdpLayer extends IdpLayerConfig {

                var vis = getOrCreateVisualizationsOnPage(pageOffset.getPageNumber(boundingRegion.getPageNumber()), tables);

-                QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon()).getTransformed(pageCtms.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));
+                QuadPoint qp = QuadPoint.fromPolygons(boundingRegion.getPolygon())
+                        .getTransformed(resultToPageTransform.get(pageOffset.getPageNumber(boundingRegion.getPageNumber())));

                vis.getFilledRectangles().add(new FilledRectangle(qp.getBounds2D(), TITLE_COLOR, 0.2f));

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayerFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayerFactory.java
@ -20,9 +20,9 @@ public class IdpLayerFactory {
    private final IdpLayer idpLayer;


-    IdpLayerFactory(Map<Integer, AffineTransform> pageCtms) {
+    IdpLayerFactory(Map<Integer, AffineTransform> resultToPageTransform) {

-        this.idpLayer = new IdpLayer(pageCtms);
+        this.idpLayer = new IdpLayer(resultToPageTransform);
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
@ -1,17 +1,21 @@
 package com.knecon.fforesight.service.ocr.processor.visualizations.layers;

+import java.util.Collections;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
 import com.knecon.fforesight.service.ocr.processor.service.OcrExecutionSupervisor;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
 import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;
 import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;

 import lombok.AccessLevel;
@ -26,32 +30,43 @@ public class LayerFactory {
    OcrDebugLayerFactory ocrDebugLayerFactory;
    OcrTextLayerFactory ocrTextLayerFactory;
    OcrServiceSettings settings;
+    Set<AzureOcrFeature> features;
+    Map<Integer, Double> angles;


-    public LayerFactory(OcrServiceSettings settings, OcrExecutionSupervisor supervisor, ImageProcessingSupervisor imageSupervisor, Map<Integer, PageInformation> pageInformation) {
+    public LayerFactory(OcrServiceSettings settings,
+                        Set<AzureOcrFeature> features,
+                        OcrExecutionSupervisor supervisor,
+                        Map<Integer, PageInformation> pageInformation,
+                        ImageProcessingPipeline imageProcessingPipeline) {

-        this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, settings, imageSupervisor);
-        this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getPageCtms());
+        this.writableOcrResultFactory = new WritableOcrResultFactory(pageInformation, imageProcessingPipeline, settings, features);
+        this.idpLayerFactory = new IdpLayerFactory(writableOcrResultFactory.getResultToPageTransforms());
        this.ocrDebugLayerFactory = new OcrDebugLayerFactory();
        this.ocrTextLayerFactory = new OcrTextLayerFactory();
        this.settings = settings;
+        this.features = features;
        this.supervisor = supervisor;
+        this.angles = Collections.synchronizedMap(new HashMap<>());
    }


-    public void addAnalyzeResult(PageBatch pageRange, AnalyzeResult analyzeResult) throws InterruptedException {
+    public void processAnalyzeResult(PageBatch batch, AnalyzeResult analyzeResult) throws InterruptedException {
+
+        List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, batch);
+
+        results.forEach(result -> angles.put(result.getPageNumber(), result.getAngle()));

-        List<WritableOcrResult> results = writableOcrResultFactory.buildOcrResultToWrite(analyzeResult, pageRange);
        ocrTextLayerFactory.addWritableOcrResult(results);

        if (settings.isDebug()) {
            ocrDebugLayerFactory.addAnalysisResult(results);
        }
-        if (settings.isIdpEnabled()) {
-            idpLayerFactory.addAnalyzeResult(analyzeResult, pageRange);
+        if (features.contains(AzureOcrFeature.IDP)) {
+            idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
        }

-        this.supervisor.finishMappingResult(pageRange);
+        this.supervisor.finishMappingResult(batch);
    }


@ -64,10 +79,10 @@ public class LayerFactory {
        if (settings.isDebug()) {
            debugLayers.add(ocrDebugLayerFactory.getOcrDebugLayer());
        }
-        if (settings.isIdpEnabled()) {
+        if (features.contains(AzureOcrFeature.IDP)) {
            debugLayers.add(idpLayerFactory.getIdpLayer());
        }
-        return new OcrResult(List.of(ocrTextLayer), debugLayers);
+        return new OcrResult(List.of(ocrTextLayer), debugLayers, angles);
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrDebugLayer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrDebugLayer.java
@ -31,7 +31,7 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
                                    word.getFontMetricsProvider(),
                                    Optional.of(word.getTextMatrix()),
                                    Optional.of(RenderingMode.FILL)));
-        bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox()));
+        bboxOnPage.getColoredLines().addAll(LineUtils.quadPointAsLines(word.getTransformedTextBBox(), word.isSnugBBox()));
    }


@ -57,4 +57,11 @@ public class OcrDebugLayer extends OcrDebugLayerConfig {
        };
    }

+
+    @Override
+    public boolean isVisibleByDefault() {
+
+        return true;
+    }
+
 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrResult.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/OcrResult.java
@ -1,9 +1,10 @@
 package com.knecon.fforesight.service.ocr.processor.visualizations.layers;

 import java.util.List;
+import java.util.Map;

 import com.knecon.fforesight.service.viewerdoc.layers.LayerGroup;

-public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers) {
+public record OcrResult(List<LayerGroup> regularLayers, List<LayerGroup> debugLayers, Map<Integer, Double> anglesPerPage) {

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/LineUtils.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/LineUtils.java
@ -14,12 +14,19 @@ import lombok.experimental.UtilityClass;
@UtilityClass
 public class LineUtils {

-    public List<ColoredLine> quadPointAsLines(QuadPoint rect) {
+    public List<ColoredLine> quadPointAsLines(QuadPoint rect, boolean tight) {

-        return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
+        if (tight) {
+            return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
+                           new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.GREEN, 1));
+        }
+
+        return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.BLUE, 1),
                       new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
-                       new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
-                       new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
+                       new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.BLUE, 1),
+                       new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.BLUE, 1));
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/utils/RotationCorrectionUtility.java
@ -0,0 +1,205 @@
+package com.knecon.fforesight.service.ocr.processor.visualizations.utils;
+
+import java.awt.geom.AffineTransform;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import com.knecon.fforesight.service.viewerdoc.LayerIdentifier;
+import com.pdftron.common.PDFNetException;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.Page;
+import com.pdftron.sdf.Obj;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.SneakyThrows;
+import lombok.experimental.UtilityClass;
+
+@UtilityClass
+public class RotationCorrectionUtility {
+
+    public static final LayerIdentifier KNECON_ROTATION_CORRECTION = new LayerIdentifier(null, "ROTATION_CORRECTION");
+
+
+    @SneakyThrows
+    public void rotatePages(Path inputFile, Path outputFile, Map<Integer, Double> anglesPerPage) {
+
+        Path tmp = Files.createTempFile("tempDocument", ".pdf");
+        Files.copy(inputFile, tmp, StandardCopyOption.REPLACE_EXISTING);
+        try (var in = new FileInputStream(tmp.toFile()); var out = new FileOutputStream(outputFile.toFile())) {
+            rotatePages(in, out, anglesPerPage);
+        }
+        Files.deleteIfExists(tmp);
+    }
+
+
+    @SneakyThrows
+    public void rotatePages(InputStream in, OutputStream out, Map<Integer, Double> anglesPerPage) {
+
+        try (PDFDoc doc = new PDFDoc(in)) {
+            anglesPerPage.forEach((pageNumber, angle) -> rotatePage(pageNumber, doc, angle));
+            doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
+        }
+    }
+
+
+    @SneakyThrows
+    public void rotatePage(int pageNumber, PDFDoc doc, double angle) {
+
+        int quadrants = getQuadrantRotation(angle);
+        Page page = doc.getPage(pageNumber);
+        page.setRotation((quadrants + page.getRotation()) % 4);
+        double remainingAngle = getRemainingAngle(angle, quadrants);
+
+        Obj contents = page.getContents();
+        String content = buildRotationContent(remainingAngle, page);
+        Obj rotationStream = doc.createIndirectStream(content.getBytes());
+        Obj newContentsArray = doc.createIndirectArray();
+        newContentsArray.pushBack(rotationStream);
+        addPreviousContents(contents, newContentsArray);
+        String closingContent = buildClosingContent();
+        Obj closingStream = doc.createIndirectStream(closingContent.getBytes());
+        newContentsArray.pushBack(closingStream);
+        page.getSDFObj().erase("Contents");
+        page.getSDFObj().put("Contents", newContentsArray);
+    }
+
+
+    private String buildClosingContent() {
+
+        List<String> closingCommands = new LinkedList<>();
+        closingCommands.add("Q");
+        return String.join("\n", closingCommands);
+    }
+
+
+    private String buildRotationContent(double angle, Page page) throws PDFNetException {
+
+        List<String> commands = new LinkedList<>();
+
+        double scale = getScalingFactor(angle, page);
+        commands.add("q");
+        commands.add("/%s <<>> BDC".formatted(KNECON_ROTATION_CORRECTION.markedContentName()));
+        commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(page.getPageWidth() / 2, page.getPageHeight() / 2)));
+        commands.add(buildMatrixCommands(AffineTransform.getRotateInstance(Math.toRadians(angle))));
+        commands.add(buildMatrixCommands(AffineTransform.getScaleInstance(scale, scale)));
+        commands.add(buildMatrixCommands(AffineTransform.getTranslateInstance(-page.getPageWidth() / 2, -page.getPageHeight() / 2)));
+        commands.add("EMC");
+        return String.join("\n", commands);
+    }
+
+
+    private void addPreviousContents(Obj contents, Obj newContentsArray) throws PDFNetException {
+
+        switch (contents.getType()) {
+            case Obj.e_array -> {
+                for (int i = 0; i < contents.size(); i++) {
+                    newContentsArray.pushBack(contents.getAt(i));
+                }
+            }
+            case Obj.e_stream -> newContentsArray.pushBack(contents);
+            default -> throw new IllegalStateException("Unexpected value: " + contents.getType());
+        }
+    }
+
+
+    public static double getScalingFactor(double angle, Page page) throws PDFNetException {
+
+        double width = page.getPageWidth();
+        double height = page.getPageHeight();
+        return getScalingFactor(angle, width, height);
+    }
+
+
+    public static double getScalingFactor(double angle, double w, double h) {
+
+        if (Math.abs(angle) < 20) {
+            return 1;
+        }
+        double sin = Math.abs(Math.sin(Math.toRadians(angle)));
+        double cos = Math.abs(Math.cos(Math.toRadians(angle)));
+        double newWidth = w * cos + h * sin;
+        double newHeight = h * cos + w * sin;
+        return Math.min(w / newWidth, h / newHeight);
+    }
+
+
+    public static AffineTransform buildTransform(double angle, double originalWidth, double originalHeight) {
+
+        int quadrants = getQuadrantRotation(angle);
+
+        double h = originalHeight;
+        double w = originalWidth;
+
+        if (quadrants == 1 || quadrants == 3) {
+            w = originalHeight;
+            h = originalWidth;
+        }
+
+        AffineTransform quadrantRotation = switch (quadrants) {
+            case 1 -> new AffineTransform(0, 1, -1, 0, h, 0);
+            case 2 -> new AffineTransform(-1, 0, 0, -1, w, h);
+            case 3 -> new AffineTransform(0, -1, 1, 0, w - h, h);
+            default -> new AffineTransform();
+        };
+
+        double remainder = getRemainingAngle(angle, quadrants);
+        double scale = getScalingFactor(remainder, w, h);
+
+        AffineTransform transform = new AffineTransform();
+        transform.translate(w / 2, h / 2);
+        transform.rotate(Math.toRadians(remainder));
+        transform.scale(scale, scale);
+        transform.translate(-w / 2, -h / 2);
+        transform.concatenate(quadrantRotation);
+
+        return transform;
+    }
+
+
+    public static int getQuadrantRotation(double angle) {
+
+        double remainder = angle % 360;
+
+        if (remainder < 0) {
+            remainder += 360;
+        }
+
+        if (remainder > 315 || remainder <= 45) {
+            return 0;
+        } else if (remainder > 45 && remainder <= 135) {
+            return 1;
+        } else if (remainder > 135 && remainder <= 225) {
+            return 2;
+        } else {
+            return 3;
+        }
+    }
+
+
+    public static double getRemainingAngle(double angle, int quadrants) {
+
+        double referenceAngle = 90 * quadrants;
+        return angle - referenceAngle;
+    }
+
+
+    public static double getRemainingAngle(double angle) {
+
+        return getRemainingAngle(angle, getQuadrantRotation(angle));
+    }
+
+
+    private String buildMatrixCommands(AffineTransform at) {
+
+        return "%f %f %f %f %f %f cm".formatted(at.getScaleX(), at.getShearX(), at.getShearY(), at.getScaleY(), at.getTranslateX(), at.getTranslateY());
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/ImageProcessingPipelineTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/ImageProcessingPipelineTest.java
@ -4,8 +4,8 @@ import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.LinkedList;
+import java.util.List;

 import org.apache.pdfbox.Loader;
 import org.junit.jupiter.api.BeforeEach;
@ -13,11 +13,16 @@ import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.springframework.core.io.ClassPathResource;

+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
+import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.GhostScriptService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
 import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.PDFNet;
 import com.sun.jna.NativeLibrary;

 import lombok.SneakyThrows;
@ -31,10 +36,7 @@ class ImageProcessingPipelineTest {
    @BeforeEach
    public void setup() {

-        System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
-        try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) {
-            assert leptonicaLib != null;
-        }
+        new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();

        ImageProcessingService imageProcessingService = new ImageProcessingService();
        GhostScriptService ghostScriptService = new GhostScriptService();
@ -46,7 +48,7 @@ class ImageProcessingPipelineTest {
    @SneakyThrows
    public void testImageProcessingPipeline() {

-        String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340.pdf";
+        String fileName = "/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340/VV-331340_OCRED_first15.pdf";

        File file;
        if (fileName.startsWith("files")) {
@ -63,21 +65,26 @@ class ImageProcessingPipelineTest {

        Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);

-        int numberOfpages;
-        try (var doc = Loader.loadPDF(file)) {
-            numberOfpages = doc.getNumberOfPages();
-        }
-        Set<Integer> pageNumbers = new HashSet<>();
-        for (int i = 1; i <= numberOfpages; i++) {
-            if (i % 2 == 0) {
-                continue;
+        try (var doc = new PDFDoc(fileName)) {
+            List<Integer> pageNumbers = new LinkedList<>();
+            for (int i = 1; i <= doc.getPageCount(); i++) {
+                if (i % 2 == 0) {
+                    continue;
+                }
+                pageNumbers.add(i);
            }
-            pageNumbers.add(i);
+            PageBatch batch = BatchFactory.create(0, doc, pageNumbers, tmpDir);
+
+            ImageProcessingSupervisor supervisor = imageProcessingPipeline.addToPipeline(batch);
+
+            batch.forEach(pageNumber -> {
+                try {
+                    assert supervisor.awaitProcessedPage(pageNumber) != null;
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            });
        }
-
-        ImageProcessingSupervisor supervisor = imageProcessingPipeline.run(pageNumbers, tmpDir.resolve("images"), documentFile.toFile());
-
-        supervisor.awaitAll();
    }

 }
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/PageRotationTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/PageRotationTest.java
@ -0,0 +1,70 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import static com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility.KNECON_ROTATION_CORRECTION;
+
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.viewerdoc.service.PageContentCleaner;
+import com.pdftron.pdf.ElementReader;
+import com.pdftron.pdf.ElementWriter;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.PDFNet;
+import com.pdftron.pdf.Page;
+import com.pdftron.pdf.PageIterator;
+import com.pdftron.sdf.SDFDoc;
+
+import lombok.SneakyThrows;
+
+@Disabled // leptonica is not available in build server
+public class PageRotationTest {
+
+    @BeforeAll
+    public static void setUp() {
+
+        PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
+    }
+
+
+    @Test
+    public void putRotation() {
+
+        Map<Integer, Double> angles = new HashMap<>();
+        for (int i = 1; i <= 100; i++) {
+            double a = -90 + (i * ((double) 180 / 100));
+            angles.put(i, a);
+        }
+        Path inputFile = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+        RotationCorrectionUtility.rotatePages(inputFile, Path.of("/tmp").resolve(inputFile.getFileName() + "_rotated.pdf"), angles);
+    }
+
+
+    @Test
+    @SneakyThrows
+    public void removeRotation() {
+
+        Path inputFile = Path.of("/tmp/VV-331340-first100.pdf_rotated.pdf");
+        try (var doc = new PDFDoc(inputFile.toFile()
+                                          .toString()); var reader = new ElementReader(); var writer = new ElementWriter(); PageIterator pageIterator = doc.getPageIterator()) {
+            PageContentCleaner cleaner = PageContentCleaner.builder()
+                    .reader(reader)
+                    .writer(writer)
+                    .markedContentToRemove(Set.of(KNECON_ROTATION_CORRECTION.markedContentName()))
+                    .build();
+
+            while (pageIterator.hasNext()) {
+                Page page = pageIterator.next();
+                cleaner.removeMarkedContent(page);
+            }
+            doc.save(inputFile.resolveSibling(inputFile.getFileName() + "_derotated.pdf").toFile().toString(), SDFDoc.SaveMode.LINEARIZED, null);
+        }
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/SnugBoxesTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/SnugBoxesTest.java
@ -0,0 +1,246 @@
+package com.knecon.fforesight.service.ocr.processor.service;
+
+import static com.knecon.fforesight.service.ocr.processor.service.OCRService.IMAGE_PIPELINE_DIR;
+import static com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline.PROCESSED_DIR;
+
+import java.awt.Color;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Line2D;
+import java.awt.geom.Rectangle2D;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.azure.ai.documentintelligence.models.AnalyzeResult;
+import com.azure.json.JsonOptions;
+import com.azure.json.JsonReader;
+import com.azure.json.implementation.DefaultJsonReader;
+import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService;
+import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer;
+import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
+import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
+import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
+import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
+import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResultFactory;
+import com.knecon.fforesight.service.ocr.processor.visualizations.layers.OcrDebugLayerFactory;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.RotationCorrectionUtility;
+import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
+import com.knecon.fforesight.service.viewerdoc.service.PDFTronViewerDocumentService;
+import com.pdftron.pdf.ColorPt;
+import com.pdftron.pdf.ColorSpace;
+import com.pdftron.pdf.Element;
+import com.pdftron.pdf.ElementBuilder;
+import com.pdftron.pdf.ElementWriter;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.Page;
+import com.sun.jna.Memory;
+import com.sun.jna.Native;
+import com.sun.jna.Pointer;
+import com.sun.jna.ptr.PointerByReference;
+
+import lombok.SneakyThrows;
+import net.sourceforge.lept4j.Box;
+import net.sourceforge.lept4j.Boxa;
+import net.sourceforge.lept4j.util.LeptUtils;
+
+@Disabled // leptonica is not available in build server
+public class SnugBoxesTest {
+
+    public static final int PAGE_NUMBER = 41;
+    public static final Path ORIGIN_FILE = Path.of("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+    public static final Path TEST_FOLDER = Path.of("/tmp/OCR_TEST/").resolve(ORIGIN_FILE.getFileName());
+    public static final Path PROCESSED_FOLDER = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve(PROCESSED_DIR);
+    public static final Path DESTINATION_FILE = TEST_FOLDER.resolve("SnugBoxesTest.pdf");
+    public static final Path RESULT_FILE = TEST_FOLDER.resolve(IMAGE_PIPELINE_DIR).resolve("azure_result_0.json");
+
+    PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
+
+
+    @BeforeAll
+    public static void setUp() {
+
+        new NativeLibrariesInitializer("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a").init();
+    }
+
+
+    @Test
+    @SneakyThrows
+    public void snugBoxes() {
+
+        String filePath = ORIGIN_FILE.toFile().toString();
+        File file = PROCESSED_FOLDER.resolve("output_0.%04d.tiff".formatted(PAGE_NUMBER)).toFile();
+        assert file.exists();
+        ImageFile imageFile = new ImageFile(PAGE_NUMBER, file.toString());
+        AnalyzeResult result = null;
+        try (var in = new FileInputStream(RESULT_FILE.toFile()); JsonReader reader = DefaultJsonReader.fromStream(in, new JsonOptions());) {
+            result = AnalyzeResult.fromJson(reader);
+        }
+
+        var resultPage = result.getPages()
+                .get(PAGE_NUMBER - 1);
+        WritableOcrResultFactory writableOcrResultFactory = new WritableOcrResultFactory(null, null, new OcrServiceSettings(), Set.of());
+        OcrDebugLayerFactory debugLayerFactory = new OcrDebugLayerFactory();
+        InvisibleElementRemovalService invisibleElementRemovalService = new InvisibleElementRemovalService();
+        try (var in = new FileInputStream(ORIGIN_FILE.toFile()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
+            invisibleElementRemovalService.removeInvisibleElements(in, out, false);
+        }
+        PageInformation pageInformation = getPageInformation(PAGE_NUMBER, DESTINATION_FILE.toFile().toString());
+        WritableOcrResultFactory.Lookups empty = WritableOcrResultFactory.Lookups.empty();
+
+        AffineTransform pageCtm = getPageCtm(PAGE_NUMBER, filePath, resultPage.getWidth());
+//        pageCtm.preConcatenate(rotationCorrection);
+//        pageCtm.preConcatenate(quadrantTransform);
+//        Pix pageImage = imageFile.readPix();
+//        AffineTransform imageTransform = WritableOcrResultFactory.buildImageTransform(resultPage, pageImage);
+//        List<Rectangle2D> rects = new LinkedList<>();
+//        for (DocumentWord word : resultPage.getWords()) {
+//            QuadPoint quadPoint = QuadPoint.fromPolygons(word.getPolygon());
+//            Rectangle2D rect = quadPoint.getTransformed(imageTransform).getBounds2D();
+//            if (rect.getX() > 0 && rect.getY() > 0 && rect.getMaxX() < pageImage.w && rect.getMaxY() < pageImage.h) {
+//                rects.add(rect);
+//            }
+//        }
+//        Boxa boxa = createBoxaFromRectangles(rects);
+//        Pix drawedPix = Leptonica1.pixDrawBoxa(pageImage, boxa, 5, 1);
+//        Leptonica1.pixWrite("/tmp/OCR_TEST/VV-331340-first100.pdf/image_pipeline/page_" + PAGE_NUMBER + ".tiff", drawedPix, 5);
+
+//
+
+        List<TextPositionInImage> words = writableOcrResultFactory.buildTextWithSnugBBoxes(resultPage, imageFile, pageCtm, empty, pageInformation);
+        var results = new WritableOcrResult(PAGE_NUMBER, -resultPage.getAngle(), words, Collections.emptyList());
+        debugLayerFactory.addAnalysisResult(List.of(results));
+
+//        try (var doc = new PDFDoc(tmpFile.toString()); var out = new FileOutputStream(DESTINATION_FILE.toFile())) {
+//            PageRotationHelper.rotatePage(PAGE_NUMBER, doc, -resultPage.getAngle());
+//            var rects = resultPage.getWords()
+//                    .stream()
+//                    .map(DocumentWord::getPolygon)
+//                    .map(QuadPoint::fromPolygons)
+//                    .map(qp -> qp.getTransformed(pageCtm))
+//                    .toList();
+//            drawRects(doc, rects, PAGE_NUMBER);
+//            doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
+//        }
+//        Files.deleteIfExists(tmpFile);
+
+        viewerDocumentService.addLayerGroups(DESTINATION_FILE.toFile(), DESTINATION_FILE.toFile(), List.of(debugLayerFactory.getOcrDebugLayer()));
+        RotationCorrectionUtility.rotatePages(DESTINATION_FILE, DESTINATION_FILE, Map.of(PAGE_NUMBER, -resultPage.getAngle()));
+    }
+
+//
+//    private static List<Rectangle2D> readRectsFromBoxa(Boxa boxa) {
+//
+//        Pointer[] pointers = boxa.box.getPointer().getPointerArray(0, boxa.n);
+//        List<Rectangle2D> boxes = new ArrayList<>(boxa.n);
+//        for (int i = 0; i < boxa.n; i++) {
+//            Box box = new Box(pointers[i]);
+//            boxes.add(new Rectangle2D.Double(box.x, box.y, box.w, box.h));
+//            LeptUtils.dispose(box);
+//        }
+//        return boxes;
+//    }
+//
+//
+//    @SuppressWarnings("PMD") // Memory will be de-allocated with boxa
+//    public static Boxa createBoxaFromRectangles(List<Rectangle2D> rectangles) {
+//
+//        if (rectangles.isEmpty()) {
+//            return new Boxa();
+//        }
+//
+//        int n = rectangles.size();  // Number of rectangles
+//        int nalloc = n;             // Allocating memory for exactly 'n' boxes
+//        int refcount = 1;           // Default refcount
+//
+//        Pointer boxPointerArray = new Memory((long) Native.POINTER_SIZE * n);  // Memory for n pointers
+//
+//        for (int i = 0; i < n; i++) {
+//
+//            Rectangle2D rect = rectangles.get(i);
+//            var mem = new Memory(20L);
+//            mem.setInt(0, (int) rect.getX());
+//            mem.setInt(4, (int) rect.getY());
+//            mem.setInt(8, (int) rect.getWidth());
+//            mem.setInt(12, (int) rect.getHeight());
+//            mem.setInt(16, refcount);
+//
+//            // Write the pointer of each Box into the native memory
+//            boxPointerArray.setPointer((long) Native.POINTER_SIZE * i, mem);
+//        }
+//
+//        // Create a PointerByReference pointing to the native memory of the array
+//        PointerByReference boxPointerRef = new PointerByReference();
+//        boxPointerRef.setPointer(boxPointerArray);
+//
+//        // Create the Boxa instance
+//
+//        return new Boxa(n, nalloc, refcount, boxPointerRef);
+//    }
+
+
+    @SneakyThrows
+    private void drawRects(PDFDoc doc, List<QuadPoint> quadPoints, int pageNumber) {
+
+        try (ElementWriter writer = new ElementWriter(); ElementBuilder builder = new ElementBuilder()) {
+            Page page = doc.getPage(pageNumber);
+            writer.begin(page, ElementWriter.e_overlay);
+            for (QuadPoint quadPoint : quadPoints) {
+                quadPoint.asLines()
+                        .forEach(line -> {
+                            drawLine(line, builder, writer);
+                        });
+            }
+            writer.end();
+        }
+    }
+
+
+    @SneakyThrows
+    private static void drawLine(Line2D l, ElementBuilder builder, ElementWriter writer) {
+
+        float[] rgbComponents = Color.BLUE.getRGBColorComponents(null);
+
+        builder.pathBegin();
+        builder.moveTo(l.getX1(), l.getY1());
+        builder.lineTo(l.getX2(), l.getY2());
+        Element line = builder.pathEnd();
+
+        line.setPathStroke(true);
+        line.setPathFill(false);
+        line.getGState().setLineWidth(1);
+        line.getGState().setStrokeColorSpace(ColorSpace.createDeviceRGB());
+
+        try (ColorPt color = new ColorPt(rgbComponents[0], rgbComponents[1], rgbComponents[2])) {
+            line.getGState().setStrokeColor(color);
+        }
+        writer.writeElement(line);
+    }
+
+
+    @SneakyThrows
+    private static AffineTransform getPageCtm(int pageNumber, String file, double imageWidh) {
+
+        return WritableOcrResultFactory.buildResultToPageTransform(getPageInformation(pageNumber, file), imageWidh);
+    }
+
+
+    @SneakyThrows
+    private static PageInformation getPageInformation(int pageNumber, String file) {
+
+        try (var in = new FileInputStream(file); var doc = new PDFDoc(in)) {
+            return PageInformation.fromPage(pageNumber, doc.getPage(pageNumber));
+        }
+    }
+
+}
--- a/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/Type0FontMetricsProviderTest.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/test/java/com/knecon/fforesight/service/ocr/processor/service/Type0FontMetricsProviderTest.java
@ -20,7 +20,7 @@ class Type0FontMetricsProviderTest {

        try (PDDocument document = Loader.loadPDF(new File(Type0FontMetricsProviderTest.class.getClassLoader().getResource("InvisibleText.pdf").getPath()))) {
            Type0FontMetricsProvider metricsFactory = Type0FontMetricsProvider.regular(document);
-            FontMetrics fontMetrics = metricsFactory.calculateMetrics("deine mutter", 100, 50);
+            FontMetrics fontMetrics = metricsFactory.calculateMetricsForAzureBBox("deine mutter", 100, 50);
        }

    }
--- a/azure-ocr-service/azure-ocr-service-server/build.gradle.kts
+++ b/azure-ocr-service/azure-ocr-service-server/build.gradle.kts
@ -8,6 +8,9 @@ plugins {
    id("org.sonarqube") version "4.3.0.3225"
    id("io.freefair.lombok") version "8.4"
 }
+pmd {
+    isConsoleOutput = true
+}

 configurations {
    all {
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
@ -7,6 +7,7 @@ import java.nio.file.Path;
 import java.time.OffsetDateTime;
 import java.time.temporal.ChronoUnit;

+import org.slf4j.MDC;
 import org.springframework.amqp.AmqpRejectAndDontRequeueException;
 import org.springframework.amqp.core.Message;
 import org.springframework.amqp.rabbit.annotation.RabbitHandler;
@ -54,8 +55,9 @@ public class OcrMessageReceiver {
        Path tmpDir = Files.createTempDirectory(null);

        try {
+            MDC.put("fileId", fileId);
            log.info("--------------------------------------------------------------------------");
-            log.info("Start ocr for file with dossierId {} and fileId {}", dossierId, fileId);
+            log.info("Starting OCR");

            ocrMessageSender.sendOCRStarted(fileId);

@ -65,7 +67,7 @@ public class OcrMessageReceiver {

            fileStorageService.downloadFiles(request, documentFile);

-            ocrService.runOcrOnDocument(dossierId, fileId, request.isRemoveWatermark(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);
+            ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), tmpDir, documentFile, viewerDocumentFile, analyzeResultFile);

            fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile);

@ -76,6 +78,7 @@ public class OcrMessageReceiver {
            in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER, OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
            throw new RuntimeException(e);
        } finally {
+            MDC.remove("fileId");
            FileSystemUtils.deleteRecursively(tmpDir);
        }
    }
--- a/azure-ocr-service/azure-ocr-service-server/src/main/resources/logback-spring.xml
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/resources/logback-spring.xml
@ -7,11 +7,21 @@
    <include resource="org/springframework/boot/logging/logback/console-appender.xml"/>

    <appender name="JSON" class="ch.qos.logback.core.ConsoleAppender">
-        <encoder class="net.logstash.logback.encoder.LogstashEncoder"/>
+        <encoder class="net.logstash.logback.encoder.LogstashEncoder">
+            <pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{yyyy-MM-dd HH:mm:ss}%replace( [file:%X{fileId}]){' \[file:\]', ''} [%thread] %-5level%logger{36} - %msg%n</pattern>
+        </encoder>
    </appender>

    <root level="INFO">
        <appender-ref ref="${logType}"/>
    </root>

+    <logger name="com.iqser.red.pdftronlogic.commons" level="ERROR"/>
+
 </configuration>
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java
@ -8,7 +8,9 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.MockitoAnnotations;
 import org.mockito.junit.jupiter.MockitoExtension;
+import org.springframework.amqp.rabbit.core.RabbitAdmin;
 import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.amqp.rabbit.listener.RabbitListenerEndpointRegistry;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
 import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
@ -52,8 +54,13 @@ public class AbstractTest {
    @MockBean
    protected RabbitTemplate rabbitTemplate;

-    private static String pdftronLicense;
+    @MockBean
+    private RabbitAdmin rabbitAdmin;

+    @MockBean
+    private RabbitListenerEndpointRegistry rabbitListenerEndpointRegistry;
+
+    private static String pdftronLicense;

    @BeforeEach
    public void openMocks() {
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
@ -9,23 +9,28 @@ import java.nio.file.Path;
 import java.nio.file.StandardCopyOption;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;

 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.slf4j.MDC;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
 import org.springframework.core.io.ClassPathResource;

 import com.knecon.fforesight.service.ocr.processor.service.OCRService;
 import com.knecon.fforesight.service.ocr.processor.utils.OsUtils;
+import com.knecon.fforesight.service.ocr.v1.api.model.AzureOcrFeature;

 import lombok.SneakyThrows;

-@Disabled // in order to run, the azure.key must be set first in the application.yml
+@Disabled
+// in order to run, the azure.key must be set first in the application.yml and you must set the env variable VCPKG_DYNAMIC_LIB to your tesseract and leptonica installation folder
@SpringBootTest()
 public class OcrServiceIntegrationTest extends AbstractTest {

+    public static final Set<AzureOcrFeature> FEATURES = Set.of(AzureOcrFeature.ROTATION_CORRECTION, AzureOcrFeature.FONT_STYLE_DETECTION);
    @Autowired
    private OCRService ocrService;

@ -34,7 +39,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWith2000PageFile() {

-        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/VV-331340-first100.pdf");
+        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/brokenText.pdf");
    }


@ -50,7 +55,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWithFile() {

-        testOCR("/home/kschuettler/Dokumente/TestFiles/syn-dm-testfiles/1.A16148F - Toxicidade oral aguda.pdf");
+        testOCR("/home/kschuettler/Dokumente/LayoutparsingEvaluation/RAW_FILES/Difficult Headlines/VV-284053.pdf/VV-284053.pdf.ORIGIN.pdf");
    }


@ -58,7 +63,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWithFolder() {

-        String dir = "/home/kschuettler/Dokumente/TestFiles/BASF/Documine_Test_docs/2013-1110704.pdf";
+        String dir = "/home/kschuettler/Dokumente/TestFiles/OCR/TestSet";
        List<File> foundFiles = Files.walk(Path.of(dir))
                .sorted(Comparator.comparingLong(this::getFileSize))
                .map(Path::toFile)
@ -97,6 +102,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    private String testOCR(File file) {

+        MDC.put("fileId", "test");
+
        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(file.toPath().getFileName());

        assert tmpDir.toFile().exists() || tmpDir.toFile().mkdirs();
@ -108,7 +115,8 @@ public class OcrServiceIntegrationTest extends AbstractTest {
        Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
        Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);

-        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
+        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", FEATURES, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile(), analyzeResultFile.toFile());
+        MDC.remove("fileId");
        System.out.println("File:" + documentFile);
        System.out.println("\n\n");
        try (var fileStream = new FileInputStream(documentFile.toFile())) {
--- a/publish-custom-image.sh
+++ b/publish-custom-image.sh
@ -28,7 +28,7 @@ if [ -z "$1" ]; then
 fi

 namespace=${1}
-deployment_name="ocr-service-v1"
+deployment_name="azure-ocr-service"

 echo "deploying to ${namespace}"