Merge branch 'RED-8670' into 'main'

RED-8670: add features to status update See merge request fforesight/azure-ocr-service!23
2025-01-09 11:27:33 +01:00 · 2025-01-09 11:27:33 +01:00 · 23e63db6c5
commit 23e63db6c5
parent 98123a5938 635fd4abf8
18 changed files with 154 additions and 90 deletions
--- a/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
+++ b/azure-ocr-service/azure-ocr-service-api/src/main/java/com/knecon/fforesight/service/ocr/v1/api/model/QuadPoint.java
@ -8,6 +8,8 @@ import java.util.List;
 import java.util.Objects;
 import java.util.stream.Stream;

+import lombok.Getter;
+
 public final class QuadPoint {

    public enum Direction {
@ -49,6 +51,8 @@ public final class QuadPoint {
    private final Point2D b;
    private final Point2D c;
    private final Point2D d;
+    @Getter
+    private final Direction direction;


    // This constructor assumes, the points form a convex polygon, I will omit the assertion for performance reasons.
@ -58,6 +62,19 @@ public final class QuadPoint {
        this.b = b;
        this.c = c;
        this.d = d;
+        this.direction = calculateDirection();
+    }
+
+
+    private Direction calculateDirection() {
+
+        if (isHorizontal()) {
+            return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
+        }
+        if (isVertical()) {
+            return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
+        }
+        return Direction.NONE;
    }


@ -86,7 +103,9 @@ public final class QuadPoint {

    public static QuadPoint fromPolygons(List<Double> polygon) {

-        assert polygon.size() == 8;
+        if (polygon.size() != 8) {
+            throw new AssertionError();
+        }
        return new QuadPoint(new Point2D.Double(polygon.get(0), polygon.get(1)),
                             new Point2D.Double(polygon.get(6), polygon.get(7)),
                             new Point2D.Double(polygon.get(4), polygon.get(5)),
@ -132,18 +151,6 @@ public final class QuadPoint {
    }


-    public Direction getDirection() {
-
-        if (isHorizontal()) {
-            return a.getX() < d.getX() ? Direction.RIGHT : Direction.LEFT;
-        }
-        if (isVertical()) {
-            return a.getY() < d.getY() ? Direction.UP : Direction.DOWN;
-        }
-        return Direction.NONE;
-    }
-
-
    public Stream<Line2D> asLines() {

        return Stream.of(new Line2D.Double(a(), b()), new Line2D.Double(b(), c()), new Line2D.Double(c(), d()), new Line2D.Double(d(), a()));
@ -231,6 +238,74 @@ public final class QuadPoint {
    }


+    public Line2D getRightLine() {
+
+        return new Line2D.Double(getTopRight(), getLowerRight());
+    }
+
+
+    public Line2D getLeftLine() {
+
+        return new Line2D.Double(getTopLeft(), getLowerLeft());
+    }
+
+
+    public Line2D getBottomLine() {
+
+        return new Line2D.Double(getLowerLeft(), getLowerRight());
+    }
+
+
+    public Line2D getTopLine() {
+
+        return new Line2D.Double(getTopLeft(), getTopRight());
+    }
+
+
+    public Point2D getTopLeft() {
+
+        return switch (direction) {
+            case DOWN -> a;
+            case LEFT -> d;
+            case UP -> c;
+            default -> b;
+        };
+    }
+
+
+    public Point2D getTopRight() {
+
+        return switch (direction) {
+            case DOWN -> b;
+            case LEFT -> a;
+            case UP -> d;
+            default -> c;
+        };
+    }
+
+
+    public Point2D getLowerRight() {
+
+        return switch (direction) {
+            case DOWN -> c;
+            case LEFT -> b;
+            case UP -> a;
+            default -> d;
+        };
+    }
+
+
+    public Point2D getLowerLeft() {
+
+        return switch (direction) {
+            case DOWN -> d;
+            case LEFT -> c;
+            case UP -> b;
+            default -> a;
+        };
+    }
+
+
    /**
     * Determines if the given QuadPoint aligns with this QuadPoint within a given threshold.
     * It does os by trying every possible combination of aligning sides. It starts with the most likely combination of ab and cd.
--- a/azure-ocr-service/azure-ocr-service-processor/build.gradle.kts
+++ b/azure-ocr-service/azure-ocr-service-processor/build.gradle.kts
@ -19,7 +19,7 @@ dependencies {
    implementation("com.amazonaws:aws-java-sdk-kms:1.12.440")
    implementation("com.google.guava:guava:31.1-jre")
    implementation("com.knecon.fforesight:viewer-doc-processor:0.193.0")
-    implementation("com.azure:azure-ai-documentintelligence:1.0.0-beta.4")
+    implementation("com.azure:azure-ai-documentintelligence:1.0.0")

    implementation("com.iqser.red.commons:pdftron-logic-commons:2.32.0")

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceSettings.java
@ -19,7 +19,7 @@ public class OcrServiceSettings {
    boolean debug; // writes the ocr layer visibly to the viewer doc pdf
    boolean drawTablesAsLines; // writes the tables to the PDF as invisible lines.
    boolean snuggify = true; // attempts to shrink the word boxes returned by azure to fit the actual word pixels snug
-    boolean useCaches = true; // skips azure api, pdf rendering and image processing, when the files are already present
+    boolean useCaches; // skips azure api, pdf rendering and image processing, when the files are already present
    boolean azureFontStyleDetection; // omits all image processing and uses azures FONT_STYLE feature (costs 0.6ct per page)
    String contentFormat; // Either markdown or text. But, for whatever reason, with markdown enabled, key-values are not written by azure....

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AsyncOcrService.java
@ -123,7 +123,7 @@ public class AsyncOcrService {

    private static void handleCompleted(BatchContext batchContext) {

-        log.info("Completed batch {} with pages {}", batchContext.batch.getIndex(), batchContext.batch);
+        log.info("Batch {}: Completed with pages {}", batchContext.batch.getIndex(), batchContext.batch);
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/AzureOcrResource.java
@ -1,21 +1,19 @@
 package com.knecon.fforesight.service.ocr.processor.service;

 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import java.util.Set;

 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
-import org.yaml.snakeyaml.events.Event;

 import com.azure.ai.documentintelligence.DocumentIntelligenceAsyncClient;
 import com.azure.ai.documentintelligence.DocumentIntelligenceClientBuilder;
-import com.azure.ai.documentintelligence.models.AnalyzeDocumentRequest;
+import com.azure.ai.documentintelligence.models.AnalyzeDocumentOptions;
+import com.azure.ai.documentintelligence.models.AnalyzeOperationDetails;
 import com.azure.ai.documentintelligence.models.AnalyzeResult;
-import com.azure.ai.documentintelligence.models.AnalyzeResultOperation;
-import com.azure.ai.documentintelligence.models.ContentFormat;
 import com.azure.ai.documentintelligence.models.DocumentAnalysisFeature;
+import com.azure.ai.documentintelligence.models.DocumentContentFormat;
 import com.azure.ai.documentintelligence.models.StringIndexType;
 import com.azure.core.credential.AzureKeyCredential;
 import com.azure.core.util.BinaryData;
@ -46,29 +44,23 @@ public class AzureOcrResource {


    @SneakyThrows
-    public PollerFlux<AnalyzeResultOperation, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {
+    public PollerFlux<AnalyzeOperationDetails, AnalyzeResult> callAzureAsync(BinaryData data, Set<AzureOcrFeature> features) {

-        AnalyzeDocumentRequest analyzeRequest = new AnalyzeDocumentRequest().setBase64Source(data.toBytes());
-
-        return asyncClient.beginAnalyzeDocument(getModelId(features),
-                                                null,
-                                                null,
-                                                StringIndexType.UTF16CODE_UNIT,
-                                                buildFeatures(features),
-                                                null,
-                                                buildContentFormat(),
-                                                Collections.emptyList(),
-                                                analyzeRequest);
+        AnalyzeDocumentOptions analyzeDocumentOptions = new AnalyzeDocumentOptions(data.toBytes());
+        analyzeDocumentOptions.setStringIndexType(StringIndexType.UTF16_CODE_UNIT);
+        analyzeDocumentOptions.setDocumentAnalysisFeatures(buildFeatures(features));
+        analyzeDocumentOptions.setOutputContentFormat(buildContentFormat());
+        return asyncClient.beginAnalyzeDocument(getModelId(features), analyzeDocumentOptions);

    }


-    private ContentFormat buildContentFormat() {
+    private DocumentContentFormat buildContentFormat() {

        if (Objects.equal(settings.getContentFormat(), "markdown")) {
-            return ContentFormat.MARKDOWN;
+            return DocumentContentFormat.MARKDOWN;
        }
-        return ContentFormat.TEXT;
+        return DocumentContentFormat.TEXT;
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
@ -60,10 +60,10 @@ public class OCRService {
     * @param tmpDir             working directory for all files
     * @param documentFile       the file to perform ocr on, results are written invisibly
     * @param viewerDocumentFile debugging file, results are written visibly in an optional content group
-     * @param analyzeResultFile  result file with additional information
+     * @param idpResultFile  result file with additional information
     */
    @Observed(name = "OCRService", contextualName = "run-ocr-on-document")
-    public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File analyzeResultFile) {
+    public void runOcrOnDocument(String dossierId, String fileId, Set<AzureOcrFeature> features, Path tmpDir, File documentFile, File viewerDocumentFile, File idpResultFile) {

        if (features.contains(AzureOcrFeature.REMOVE_WATERMARKS)) {
            removeWatermark(documentFile);
@ -71,10 +71,9 @@ public class OCRService {

        removeInvisibleElements(documentFile);

-        log.info("Starting OCR");
        long ocrStart = System.currentTimeMillis();

-        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, analyzeResultFile, features).getStatistics();
+        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId, idpResultFile, features).getStatistics();

        long ocrEnd = System.currentTimeMillis();
        log.info("OCR successful, took {}", humanizeDuration(ocrEnd - ocrStart));
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrExecutionSupervisor.java
@ -102,6 +102,7 @@ public class OcrExecutionSupervisor {
        batch.forEach(pageIndex -> countDownPagesToProcess.countDown());
        statistics.getBatchStats(batch).finishMappingResult();
        ocrMessageSender.sendUpdate(fileId, this.processedPages(), getTotalPageCount(), features);
+        log.info("Batch {}: Finished mapping result with pages {}", batch.getIndex(), batch);
    }


--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultPostProcessingPipeline.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultPostProcessingPipeline.java
@ -18,13 +18,13 @@ import java.util.stream.Stream;

 import com.azure.ai.documentintelligence.models.AnalyzeResult;
 import com.azure.ai.documentintelligence.models.BoundingRegion;
+import com.azure.ai.documentintelligence.models.DocumentFontStyle;
 import com.azure.ai.documentintelligence.models.DocumentPage;
 import com.azure.ai.documentintelligence.models.DocumentSpan;
 import com.azure.ai.documentintelligence.models.DocumentStyle;
 import com.azure.ai.documentintelligence.models.DocumentTable;
 import com.azure.ai.documentintelligence.models.DocumentTableCell;
 import com.azure.ai.documentintelligence.models.DocumentWord;
-import com.azure.ai.documentintelligence.models.FontWeight;
 import com.google.common.base.Functions;
 import com.knecon.fforesight.service.ocr.processor.OcrServiceSettings;
 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
@ -32,10 +32,10 @@ import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.PageInformation;
 import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
 import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
+import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.FontStyleDetector;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingPipeline;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.ImageProcessingSupervisor;
-import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.BBoxSnuggificationService;
 import com.knecon.fforesight.service.ocr.processor.service.imageprocessing.StrokeWidthCalculator;
 import com.knecon.fforesight.service.ocr.processor.visualizations.WritableOcrResult;
 import com.knecon.fforesight.service.ocr.processor.visualizations.fonts.FontMetricsProvider;
@ -323,16 +323,13 @@ public class OcrResultPostProcessingPipeline {
            return Lookups.empty();
        }

-        SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(analyzeResult.getStyles()
-                                                                       .stream()
-                                                                       .filter(style -> Objects.equals(style.getFontWeight(), FontWeight.BOLD))
-                                                                       .map(DocumentStyle::getSpans)
-                                                                       .flatMap(Collection::stream), Function.identity());
+        // Azure stopped supporting bold text detection in 1.0.0 release
+        SpanLookup<DocumentSpan> boldLookup = new SpanLookup<>(Stream.empty(), Function.identity());

        SpanLookup<DocumentSpan> italicLookup = new SpanLookup<>(analyzeResult.getStyles()
                                                                         .stream()
                                                                         .filter(style -> Objects.equals(style.getFontStyle(),
-                                                                                                         com.azure.ai.documentintelligence.models.FontStyle.ITALIC))
+                                                                                                         DocumentFontStyle.ITALIC))
                                                                         .map(DocumentStyle::getSpans)
                                                                         .flatMap(Collection::stream), Functions.identity());

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/BBoxSnuggificationService.java
@ -31,7 +31,7 @@ public class BBoxSnuggificationService {
    private static final double AVERAGE_ANGLE_THRESHOLD = 0.2; // Skips snuggification, if the average remaining word rotation of a word, written from left-to-right is bigger than this
    public static final int INDIVIDUAL_ANGLE_THRESHOLD = 5; // skips snuggification for word, if the remaining rotation is larger than this angle
    public static final int MAX_SHRINK_PIXELS = 40; // Number of pixels that are allowed to be removed from the top or bottom of an image
-    private static final int MINIMUM_WORD_Pixels = 5;
+    private static final int MINIMUM_WORD_PIXELS = 5; // Number of pixels that are required for snuggification

    private enum Operation {
        HORIZONTAL,
@ -48,6 +48,11 @@ public class BBoxSnuggificationService {
            return Optional.empty();
        }

+        if (origin.getContent().equals("-") || origin.getContent().equals(",")) {
+            // very slim characters should not be snuggified, or the fontsize may be off significantly
+            return Optional.empty();
+        }
+
        QuadPoint originTransformed = QuadPoint.fromPolygons(origin.getPolygon()).getTransformed(resultToImageTransform);
        double remainingAngle = Math.abs(RotationCorrectionUtility.getRemainingAngle(originTransformed.getAngle()));
        QuadPoint.Direction direction = originTransformed.getDirection();
@ -133,7 +138,7 @@ public class BBoxSnuggificationService {
        if (start == 0 && end == wordImage.w) {
            return Optional.empty();
        }
-        if (Math.abs(start - end) < MINIMUM_WORD_Pixels) {
+        if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
            return Optional.empty();
        }
        return Optional.of(new Rectangle2D.Double(origin.getX() + start, origin.getY(), origin.getWidth() - start - (wordImage.w - end), origin.getHeight()));
@ -159,7 +164,7 @@ public class BBoxSnuggificationService {
        if (start == 0 && end == wordImage.h) {
            return Optional.empty();
        }
-        if (Math.abs(start - end) < MINIMUM_WORD_Pixels) {
+        if (Math.abs(start - end) < MINIMUM_WORD_PIXELS) {
            return Optional.empty();
        }
        return Optional.of(new Rectangle2D.Double(origin.getX(), origin.getY() + start, origin.getWidth(), origin.getHeight() - start - (wordImage.h - end)));
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptOutputHandler.java
@ -15,7 +15,6 @@ import java.util.regex.Pattern;
 import org.slf4j.MDC;

 import com.knecon.fforesight.service.ocr.processor.model.ImageFile;
-import com.knecon.fforesight.service.ocr.processor.model.PageBatch;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -76,13 +75,14 @@ public class GhostScriptOutputHandler extends Thread {
                if (line == null) {
                    break;
                }
-
-                if (type.equals(Type.ERROR)) {
-                    log.error("{}_{}>{}", processName, type.name(), line);
-                } else {
-                    log.debug("{}_{}>{}", processName, type.name(), line);
-                    addProcessedImageToQueue(line);
+                switch (type) {
+                    case STD_OUT -> {
+                        log.debug("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
+                        addProcessedImageToQueue(line);
+                    }
+                    case ERROR -> log.error("Batch {}: {}_{}>{}", batchIdx, processName, type.name(), line);
                }
+
            }
        }
        is.close();
@ -92,7 +92,7 @@ public class GhostScriptOutputHandler extends Thread {
            if (!pagesToProcess.isEmpty()) {
                errorHandler.accept(String.format("Ghostscript finished for batch %d, but pages %s remain unprocessed.", batchIdx, formatPagesToProcess()));
            } else {
-                log.info("Batch: {} rendered successfully!", batchIdx);
+                log.info("Batch {}: rendered successfully!", batchIdx);
            }
        }

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/GhostScriptService.java
@ -79,7 +79,7 @@ public class GhostScriptService {

        concurrencySemaphore.acquire();
        log.info("Batch {}: starting GhostScript rendering with page(s) {}", batch.getIndex(), batch);
-        executeProcess(batch.getIndex(), buildCmdArgs(batch, batch.getBatchDoc()), successHandler, errorHandler);
+        executeProcess(batch, buildCmdArgs(batch, batch.getBatchDoc()), successHandler, errorHandler);
    }


@ -106,27 +106,27 @@ public class GhostScriptService {


    @SneakyThrows
-    private void executeProcess(int batchIdx, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {
+    private void executeProcess(PageBatch batch, ProcessCmdsAndRenderedImageFiles processInfo, Consumer<ImageFile> successHandler, Consumer<String> errorHandler) {

        Process p = Runtime.getRuntime().exec(processInfo.cmdArgs());
        InputStream stdOut = p.getInputStream();
-        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batchIdx, stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
+        GhostScriptOutputHandler stdOutLogger = GhostScriptOutputHandler.stdOut(batch.getIndex(), stdOut, processInfo.renderedPageImageFiles(), successHandler, errorHandler);
        InputStream stdError = p.getErrorStream();
-        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batchIdx, stdError, errorHandler);
+        GhostScriptOutputHandler stdErrorLogger = GhostScriptOutputHandler.stdError(batch.getIndex(), stdError, errorHandler);

        stdOutLogger.start();
        stdErrorLogger.start();
-        handleFinished(p);
+        handleFinished(p, errorHandler, batch, successHandler);
    }


-    private void handleFinished(Process p) {
+    private void handleFinished(Process p, Consumer<String> errorHandler, PageBatch batch, Consumer<ImageFile> successHandler) {

        Thread finishedThread = new Thread(() -> {
            try {
-                p.waitFor();
+                p.waitFor(2, TimeUnit.MINUTES);
            } catch (InterruptedException e) {
-                log.error("GhostScript process was interrupted", e);
+                errorHandler.accept("Batch %d: Ghostscript rendering has been terminated after 2 minutes \n %s".formatted(batch.getIndex(), e.getMessage()));
            } finally {
                concurrencySemaphore.release();
            }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingService.java
@ -87,7 +87,7 @@ public class ImageProcessingService {
                LeptUtils.disposePix(processedPix);
            }
        } catch (Exception e) {
-            supervisor.markError(e.getMessage());
+            supervisor.markError("Page %d could not be processed due to: %s".formatted(unprocessedImage.pageNumber(), e.getMessage()));
        } finally {
            supervisor.markPageFinished(processedImage);
            log.debug("Finished page: {}", processedImage.pageNumber());
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/imageprocessing/ImageProcessingSupervisor.java
@ -69,6 +69,7 @@ public class ImageProcessingSupervisor {

    public void markError(String errorMessage) {

+        log.error(errorMessage);
        this.errors.add(errorMessage);
    }

--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpLayer.java
@ -12,8 +12,6 @@ import com.azure.ai.documentintelligence.models.DocumentBarcode;
 import com.azure.ai.documentintelligence.models.DocumentFigure;
 import com.azure.ai.documentintelligence.models.DocumentKeyValuePair;
 import com.azure.ai.documentintelligence.models.DocumentLine;
-import com.azure.ai.documentintelligence.models.DocumentList;
-import com.azure.ai.documentintelligence.models.DocumentListItem;
 import com.azure.ai.documentintelligence.models.DocumentParagraph;
 import com.azure.ai.documentintelligence.models.DocumentSection;
 import com.azure.ai.documentintelligence.models.DocumentTable;
@ -23,8 +21,8 @@ import com.azure.ai.documentintelligence.models.DocumentWord;
 import com.azure.ai.documentintelligence.models.ParagraphRole;
 import com.knecon.fforesight.service.ocr.processor.model.PageBatch;
 import com.knecon.fforesight.service.ocr.processor.model.SpanLookup;
-import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
 import com.knecon.fforesight.service.ocr.processor.visualizations.utils.LineUtils;
+import com.knecon.fforesight.service.ocr.processor.visualizations.utils.Rectangle2DBBoxCollector;
 import com.knecon.fforesight.service.ocr.v1.api.model.QuadPoint;
 import com.knecon.fforesight.service.viewerdoc.layers.IdpLayerConfig;
 import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
@ -69,14 +67,6 @@ public class IdpLayer extends IdpLayerConfig {
    }


-    public void addList(DocumentList list, PageBatch pageOffset) {
-
-        for (DocumentListItem item : list.getItems()) {
-            addBoundingRegion(item.getBoundingRegions(), lists, PARAGRAPH_COLOR, pageOffset);
-        }
-    }
-
-
    public void addBarcode(int pageNumber, DocumentBarcode barcode) {

        addPolygon(pageNumber, barcode.getPolygon(), barcodes, IMAGE_COLOR);
@ -85,8 +75,11 @@ public class IdpLayer extends IdpLayerConfig {

    public void addKeyValue(DocumentKeyValuePair keyValue, PageBatch pageOffset) {

+        if (keyValue.getKey() == null || keyValue.getKey().getContent().isEmpty()) {
+            return;
+        }
        addBoundingRegion(keyValue.getKey().getBoundingRegions(), keyValuePairs, KEY_COLOR, pageOffset);
-        if (keyValue.getValue() != null) {
+        if (keyValue.getValue() != null && !keyValue.getValue().getContent().isEmpty()) {
            addBoundingRegion(keyValue.getValue().getBoundingRegions(), keyValuePairs, VALUE_COLOR, pageOffset);

            if (keyValue.getKey().getBoundingRegions()
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/IdpResultFactory.java
@ -116,12 +116,12 @@ public class IdpResultFactory {
    private void addKeyValuePair(DocumentKeyValuePair documentKeyValuePair, PageBatch batch) {

        TextRegion key = null;
-        if (documentKeyValuePair.getKey() != null) {
+        if (documentKeyValuePair.getKey() != null && !documentKeyValuePair.getKey().getContent().isEmpty()) {
            Region region = toRegionFromRegions(batch, documentKeyValuePair.getKey().getBoundingRegions());
            key = new TextRegion(region, cleanString(documentKeyValuePair.getKey().getContent()));
        }
        TextRegion value = null;
-        if (documentKeyValuePair.getValue() != null) {
+        if (documentKeyValuePair.getValue() != null && !documentKeyValuePair.getValue().getContent().isEmpty()) {
            Region region = toRegionFromRegions(batch, documentKeyValuePair.getValue().getBoundingRegions());
            value = new TextRegion(region, cleanString(documentKeyValuePair.getValue().getContent()));
        }
--- a/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
+++ b/azure-ocr-service/azure-ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/visualizations/layers/LayerFactory.java
@ -67,10 +67,8 @@ public class LayerFactory {
            ocrDebugLayerFactory.addAnalysisResult(results);
        }
        if (features.contains(AzureOcrFeature.IDP)) {
-            log.info("Batch {}: Start building IDP stuff", batch.getIndex());
            idpLayerFactory.addAnalyzeResult(analyzeResult, batch);
            idpResultFactory.addAnalyzeResult(analyzeResult, batch);
-            log.info("Batch {}: Finished building IDP stuff", batch.getIndex());
        }

        this.supervisor.finishMappingResult(batch);
--- a/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
@ -5,6 +5,8 @@ import java.io.IOException;
 import java.nio.file.Path;
 import java.time.OffsetDateTime;
 import java.time.temporal.ChronoUnit;
+import java.util.Objects;
+import java.util.stream.Collectors;

 import org.slf4j.MDC;
 import org.springframework.amqp.AmqpRejectAndDontRequeueException;
@ -59,19 +61,19 @@ public class OcrMessageReceiver {

        try {
            MDC.put("fileId", fileId);
-            log.info("--------------------------------------------------------------------------");
-
+            log.info("--------------------------------- Starting OCR ---------------------------------");
+            log.info("Features: {}", request.getFeatures().stream().map(Objects::toString).collect(Collectors.joining(", ")));
            ocrMessageSender.sendOCRStarted(fileId, request.getFeatures());

            File documentFile = runDir.resolve(DOCUMENT_FILE_NAME).toFile();
            File viewerDocumentFile = runDir.resolve(VIEWER_DOCUMENT_FILE_NAME).toFile();
-            File analyzeResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile();
+            File idpResultFile = runDir.resolve(IDP_RESULT_FILE_NAME).toFile();

            fileStorageService.downloadFiles(request, documentFile);

-            ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), runDir, documentFile, viewerDocumentFile, analyzeResultFile);
+            ocrService.runOcrOnDocument(dossierId, fileId, request.getFeatures(), runDir, documentFile, viewerDocumentFile, idpResultFile);

-            fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, analyzeResultFile);
+            fileStorageService.storeFiles(request, documentFile, viewerDocumentFile, idpResultFile);

            ocrMessageSender.sendOcrResponse(request);
        } catch (Exception e) {
@ -81,6 +83,7 @@ public class OcrMessageReceiver {
            throw new RuntimeException(e);
        } finally {
            log.info("Done");
+            log.info("--------------------------------- Done ---------------------------------");
            MDC.remove("fileId");
            FileSystemUtils.deleteRecursively(runDir);
        }
--- a/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
+++ b/azure-ocr-service/azure-ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
@ -58,7 +58,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcrWithFile() {

-        testOCR("/home/kschuettler/Dokumente/402Study.pdf");
+        testOCR("/home/kschuettler/Dokumente/TestFiles/OCR/TestSet/VV-331340-first100.pdf");
    }