Merge branch 'RED-8156' into 'master'

RED-8156: add debug layers to viewer document See merge request fforesight/ocr-service!40
2024-02-08 11:43:15 +01:00 · 2024-02-08 11:43:15 +01:00 · c4c20d15ae
commit c4c20d15ae
parent 724bb58969 d2f2def1c2
12 changed files with 316 additions and 325 deletions
--- a/ocr-service-v1/ocr-service-processor/build.gradle.kts
+++ b/ocr-service-v1/ocr-service-processor/build.gradle.kts
@ -25,5 +25,6 @@ dependencies {
    api("com.amazonaws:aws-java-sdk-kms:1.12.440")
    api("com.google.guava:guava:31.1-jre")
    api("com.iqser.red.commons:pdftron-logic-commons:2.23.0")
+    api("com.knecon.fforesight:viewer-doc-processor:0.89.0")
    testImplementation("org.junit.jupiter:junit-jupiter:5.8.1")
 }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceProcessorConfiguration.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/OcrServiceProcessorConfiguration.java
@ -1,14 +1,26 @@
 package com.knecon.fforesight.service.ocr.processor;

+import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.ComponentScan;
 import org.springframework.context.annotation.Configuration;

 import com.knecon.fforesight.service.ocr.processor.settings.OcrServiceSettings;
+import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;
+
+import io.micrometer.observation.ObservationRegistry;

@Configuration
@ComponentScan
@EnableConfigurationProperties(OcrServiceSettings.class)
 public class OcrServiceProcessorConfiguration {

+    @Bean
+    @Autowired
+    public ViewerDocumentService viewerDocumentService(ObservationRegistry registry) {
+
+        return new ViewerDocumentService(registry);
+    }
+
 }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrResultToWrite.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrResultToWrite.java
@ -25,11 +25,18 @@ public record OcrResultToWrite(List<TextPositionInImage> textPositionInImage, Qu
                .collect(Collectors.toMap(Map.Entry::getKey,
                        entry -> entry.getValue()
                                .stream()
-                                .map(ocrResult -> new OcrResultToWrite(ocrResult.getAllWords()
-                                        .stream()
-                                        .filter(word -> !word.isBlank())
-                                        .map(word -> new TextPositionInImage(word, ocrResult.image().getImageCTM(), fontMetricsFactory, FontStyle.REGULAR))
-                                        .toList(), ocrResult.image().getImageCoordinatesInInitialUserSpace()))
+                                .map(ocrResult -> new OcrResultToWrite(toTextPositionInImage(ocrResult, fontMetricsFactory), ocrResult.image().getImageCoordinatesInInitialUserSpace()))
                                .toList()));
    }
+
+
+    private static List<TextPositionInImage> toTextPositionInImage(OcrResult ocrResult, FontMetricsFactory fontMetricsFactory) {
+
+        return ocrResult.getAllWords()
+                .stream()
+                .filter(word -> !word.isBlank())
+                .map(word -> new TextPositionInImage(word, ocrResult.image().getImageCTM(), fontMetricsFactory, FontStyle.REGULAR))
+                .toList();
+    }
+
 }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/FileStorageService.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/FileStorageService.java
@ -1,13 +1,11 @@
 package com.knecon.fforesight.service.ocr.processor.service;

-import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.InputStream;
 import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.nio.file.StandardOpenOption;
+import java.nio.file.StandardCopyOption;

-import org.apache.commons.io.IOUtils;
 import org.springframework.stereotype.Service;

 import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
@ -31,47 +29,38 @@ public class FileStorageService {
        return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
    }

-
-    @SneakyThrows
-    public byte[] getOriginalFile(String dossierId, String fileId) {
-
-        try (InputStream inputStream = getInputStream(getStorageId(dossierId, fileId, FileType.ORIGIN))) {
-            return IOUtils.toByteArray(inputStream);
-        }
-    }
-
-
-    @SneakyThrows
-    public InputStream getOriginalFileAsStream(String dossierId, String fileId) {
-
-        return getInputStream(getStorageId(dossierId, fileId, FileType.ORIGIN));
-    }
-
-
-    public void storeOriginalFile(String dossierId, String fileId, InputStream stream) {
-
-        storageService.storeObject(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.ORIGIN), stream);
-    }
-
-
    public boolean untouchedFileExists(String dossierId, String fileId) {

        return storageService.objectExists(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.UNTOUCHED));
    }

+    @SneakyThrows
+    public void storeFiles(String dossierId, String fileId, File documentFile, File viewerDocumentFile) {

-    public void storeUntouchedFile(String dossierId, String fileId, byte[] data) {
-
-        storageService.storeObject(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.UNTOUCHED), new ByteArrayInputStream(data));
+        try (var in = new FileInputStream(documentFile)) {
+            storageService.storeObject(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.ORIGIN), in);
+        }
+        try (var in = new FileInputStream(viewerDocumentFile)) {
+            storageService.storeObject(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT), in);
+        }
    }


    @SneakyThrows
-    private InputStream getInputStream(String storageId) {
+    public void downloadFiles(String dossierId, String fileId, File documentFile, File viewerDocumentFile) {

-        File tempFile = File.createTempFile("temp", ".data");
-        storageService.downloadTo(TenantContext.getTenantId(), storageId, tempFile);
-        return Files.newInputStream(Paths.get(tempFile.getPath()), StandardOpenOption.DELETE_ON_CLOSE);
+        storageService.downloadTo(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.ORIGIN), documentFile);
+        if (storageService.objectExists(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT))) {
+            storageService.downloadTo(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.VIEWER_DOCUMENT), viewerDocumentFile);
+        } else {
+            Files.copy(documentFile.toPath(), viewerDocumentFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+        }
+
+        if (!untouchedFileExists(dossierId, fileId)) {
+            try (var in = new FileInputStream(documentFile)) {
+                storageService.storeObject(TenantContext.getTenantId(), getStorageId(dossierId, fileId, FileType.UNTOUCHED), in);
+            }
+        }
    }

 }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java
@ -1,12 +1,12 @@
 package com.knecon.fforesight.service.ocr.processor.service;

-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@ -27,6 +27,7 @@ import com.knecon.fforesight.service.ocr.processor.model.OcrResult;
 import com.knecon.fforesight.service.ocr.processor.service.scriptdetection.FontStyleDetector;
 import com.knecon.fforesight.service.ocr.processor.service.threads.OCRThread;
 import com.knecon.fforesight.service.ocr.processor.settings.OcrServiceSettings;
+import com.pdftron.pdf.PDFDoc;

 import io.micrometer.observation.ObservationRegistry;
 import io.micrometer.observation.annotation.Observed;
@ -58,55 +59,66 @@ public class OCRService {
     * looking for stitchedImages (if so converting the current page to an image with ghostscript and work on this instead),
     * perform tesseract-ocr on these images (via threads) and write the generated ocr-text as invisible elements.
     *
-     * @param dossierId Id of dossier
-     * @param fileId    Id of file
-     * @param out       OutputStream where to write to
+     * @param dossierId          Id of dossier
+     * @param fileId             Id of file
+     * @param tmpDir             working directory for all files
+     * @param documentFile       the file to perform ocr on, results are written invisibly
+     * @param viewerDocumentFile debugging file, results are written visibly in an optional content group
     */
    @Observed(name = "OCRService", contextualName = "run-ocr-on-document")
    @SneakyThrows
-    public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) {
-
-        try (InputStream fileStream = removeWatermarkIfEnabled(dossierId, fileId); ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
-
-            invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false, false);
-
-            try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) {
-                log.info("Starting OCR for file {}", fileId);
-                long ocrStart = System.currentTimeMillis();
-                Statistics stats = runOcr(transferInputStream, out, fileId, dossierId);
-                long ocrEnd = System.currentTimeMillis();
-                log.info("ocr successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, String.format("%.1f", (ocrEnd - ocrStart) / 1000.0));
-                log.info("Runtime breakdown: {}", stats);
-            }
-        }
-    }
-
-
-    private InputStream removeWatermarkIfEnabled(String dossierId, String fileId) throws IOException {
+    public void runOcrOnDocument(String dossierId, String fileId, Path tmpDir, File documentFile, File viewerDocumentFile) {

        if (settings.isRemoveWatermark()) {
-            try (var in = fileStorageService.getOriginalFileAsStream(dossierId, fileId); var transferOutputStream = new ByteArrayOutputStream()) {
-                watermarkRemovalService.removeWatermarks(in, transferOutputStream);
-                return new ByteArrayInputStream(transferOutputStream.toByteArray());
-            }
+            removeWatermarkIfEnabled(documentFile);
        }
-        return fileStorageService.getOriginalFileAsStream(dossierId, fileId);
+        removeInvisibleElements(documentFile);
+
+        log.info("Starting OCR for file {}", fileId);
+        long ocrStart = System.currentTimeMillis();
+        Statistics stats = runOcr(tmpDir, documentFile, viewerDocumentFile, fileId, dossierId);
+        long ocrEnd = System.currentTimeMillis();
+        log.info("ocr successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, String.format("%.1f", (ocrEnd - ocrStart) / 1000.0));
+        log.info("Runtime breakdown: {}", stats);
+
    }


    @SneakyThrows
-    public Statistics runOcr(InputStream in, OutputStream out, String fileId, String dossierId) {
+    private void removeInvisibleElements(File originFile) {
+
+        Path tmpFile = Files.createTempFile("invisibleElements", ".pdf");
+        try (var in = new FileInputStream(originFile); var out = new FileOutputStream(tmpFile.toFile())) {
+            invisibleElementRemovalService.removeInvisibleElements(in, out, false, false);
+        }
+        Files.copy(tmpFile, originFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+        assert tmpFile.toFile().delete();
+    }
+
+
+    @SneakyThrows
+    private void removeWatermarkIfEnabled(File originFile) {
+
+        Path tmpFile = Files.createTempFile("removeWatermarks", ".pdf");
+        try (var in = new FileInputStream(originFile); var out = new FileOutputStream(tmpFile.toFile())) {
+            watermarkRemovalService.removeWatermarks(in, out);
+        }
+        Files.copy(tmpFile, originFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
+        assert tmpFile.toFile().delete();
+    }
+
+
+    @SneakyThrows
+    public Statistics runOcr(Path tmpDir, File documentFile, File viewerDocumentFile, String fileId, String dossierId) {

        long timestamp;
-        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve(dossierId + "-" + fileId);
+
        Path tmpImageDir = tmpDir.resolve("images");
        Path tesseractOutputDir = tmpDir.resolve("tesseract_output");

        tesseractOutputDir.toFile().mkdirs();
        tmpImageDir.toFile().mkdirs();

-        File documentFile = OsUtils.writeFileToTmpFolder(in, tmpDir);
-
        Statistics stats;
        try (PDDocument document = Loader.loadPDF(documentFile)) {
            OcrProgressLogger logger = new OcrProgressLogger(document.getNumberOfPages(), ocrMessageSender, fileId);
@ -150,12 +162,11 @@ public class OCRService {
            stats.increaseFontStyleDetectionDuration(System.currentTimeMillis() - timestamp);

            timestamp = System.currentTimeMillis();
-            var dictionariesToUpdate = ocrResultWriter.drawOcrResultsToPdf(document, imageWithTextPositionsPerPage);
+            ocrResultWriter.drawOcrResultsToPdf(documentFile, viewerDocumentFile, imageWithTextPositionsPerPage);
+
            log.info("Saving document");
-            document.saveIncremental(out, dictionariesToUpdate);
            stats.increaseWritingTextDuration(System.currentTimeMillis() - timestamp);

-            FileSystemUtils.deleteRecursively(tmpDir);
            logger.sendFinished();
            return stats;
        }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java
@ -1,29 +1,31 @@
 package com.knecon.fforesight.service.ocr.processor.service;

 import java.awt.Color;
+import java.awt.geom.Line2D;
 import java.awt.geom.Point2D;
+import java.io.File;
 import java.util.Collection;
-import java.util.HashSet;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
+import java.util.Optional;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;

-import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageContentStream;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup;
-import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentProperties;
 import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode;
 import org.springframework.stereotype.Service;

 import com.knecon.fforesight.service.ocr.processor.model.OcrResultToWrite;
 import com.knecon.fforesight.service.ocr.processor.model.QuadPoint;
 import com.knecon.fforesight.service.ocr.processor.model.TextPositionInImage;
-import com.knecon.fforesight.service.ocr.processor.settings.OcrServiceSettings;
+import com.knecon.fforesight.service.ocr.processor.service.fonts.FontStyle;
+import com.knecon.fforesight.service.viewerdoc.ContentStreams;
+import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
+import com.knecon.fforesight.service.viewerdoc.model.PlacedText;
+import com.knecon.fforesight.service.viewerdoc.model.Visualizations;
+import com.knecon.fforesight.service.viewerdoc.model.VisualizationsOnPage;
+import com.knecon.fforesight.service.viewerdoc.service.ViewerDocumentService;

 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
@ -37,180 +39,114 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class OcrResultWriter {

-    static String ocrLayerName = "knecon OCR";
-    OcrServiceSettings settings;
+    ViewerDocumentService viewerDocumentService;


    @SneakyThrows
-    public Set<COSDictionary> drawOcrResultsToPdf(PDDocument document, Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {
+    public void drawOcrResultsToPdf(File document, File viewerDocument, Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {

-        Set<COSDictionary> dictionariesToUpdate = new HashSet<>();
-        imagesWithResultsPerPage.keySet().forEach(pageNumber -> drawResultsPerPage(document, pageNumber, imagesWithResultsPerPage.get(pageNumber), dictionariesToUpdate));
-        dictionariesToUpdate.add(document.getDocumentInformation().getCOSObject());
-        return dictionariesToUpdate;
+        Map<Integer, VisualizationsOnPage> ocrVisualizationsOnPages = createVisualizations(imagesWithResultsPerPage);
+        Map<Integer, VisualizationsOnPage> ocrTextDebugVisualizationsOnPages = createDebugTextVisualizations(imagesWithResultsPerPage);
+        Map<Integer, VisualizationsOnPage> ocrBBoxDebugVisualizationsOnPages = createDebugBBoxVisualizations(imagesWithResultsPerPage);
+
+        Visualizations visualizations = new Visualizations(ContentStreams.KNECON_OCR, ocrVisualizationsOnPages, false);
+
+        List<Visualizations> debugVisualizations = List.of(visualizations,
+                new Visualizations(ContentStreams.KNECON_OCR_TEXT_DEBUG, ocrTextDebugVisualizationsOnPages, false),
+                new Visualizations(ContentStreams.KNECON_OCR_BBOX_DEBUG, ocrBBoxDebugVisualizationsOnPages, false));
+
+        viewerDocumentService.addVisualizationsOnPage(document, document, visualizations);
+        viewerDocumentService.addVisualizationsOnPage(viewerDocument, viewerDocument, debugVisualizations);
+    }
+
+
+    private Map<Integer, VisualizationsOnPage> createVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {
+
+        return imagesWithResultsPerPage.keySet()
+                .stream()
+                .collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createVisualizations(imagesWithResultsPerPage.get(pageNumber))));
+    }
+
+
+    private static Function<Integer, Integer> pageNumber1IdxTo0IdxMapper() {
+        // PDFBox uses a 0-based index for page numbers internally, while we use a 1-based index
+        return p -> p - 1;
+    }
+
+
+    private VisualizationsOnPage createVisualizations(List<OcrResultToWrite> ocrResultsToWrite) {
+
+        List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList();
+        List<PlacedText> placedTexts = words.stream()
+                .map(word -> new PlacedText(word.getText(),
+                        null,
+                        Color.BLACK,
+                        (float) word.getFontSize(),
+                        word.getFontMetricsFactory(),
+                        Optional.of(word.getTextMatrix()),
+                        Optional.of(RenderingMode.NEITHER)))
+                .toList();
+        return VisualizationsOnPage.builder().placedTexts(placedTexts).build();
+    }
+
+
+    private Map<Integer, VisualizationsOnPage> createDebugTextVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {
+
+        return imagesWithResultsPerPage.keySet()
+                .stream()
+                .collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createDebugTextVisualizations(imagesWithResultsPerPage.get(pageNumber))));
+    }
+
+
+    private VisualizationsOnPage createDebugTextVisualizations(List<OcrResultToWrite> ocrResultsToWrite) {
+
+        List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList();
+        List<PlacedText> placedTexts = words.stream()
+                .map(word -> new PlacedText(word.getText(),
+                        null,
+                        word.getFontStyle().equals(FontStyle.REGULAR) ? Color.BLUE : Color.RED,
+                        (float) word.getFontSize(),
+                        word.getFontMetricsFactory(),
+                        Optional.of(word.getTextMatrix()),
+                        Optional.of(RenderingMode.FILL)))
+                .toList();
+        return VisualizationsOnPage.builder().placedTexts(placedTexts).build();
+    }
+
+
+    private Map<Integer, VisualizationsOnPage> createDebugBBoxVisualizations(Map<Integer, List<OcrResultToWrite>> imagesWithResultsPerPage) {
+
+        return imagesWithResultsPerPage.keySet()
+                .stream()
+                .collect(Collectors.toMap(pageNumber1IdxTo0IdxMapper(), pageNumber -> createDebugBBoxVisualizations(imagesWithResultsPerPage.get(pageNumber))));
+    }
+
+
+    private VisualizationsOnPage createDebugBBoxVisualizations(List<OcrResultToWrite> ocrResultsToWrite) {
+
+        List<TextPositionInImage> words = ocrResultsToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList();
+        List<ColoredLine> coloredLines = Stream.concat(//
+                words.stream().map(TextPositionInImage::getTransformedTextBBox).map(this::quadPointAsLines),//
+                ocrResultsToWrite.stream().map(OcrResultToWrite::imageBoundingBox).map(this::createGrid)//
+        ).flatMap(Collection::stream).toList();
+        return VisualizationsOnPage.builder().coloredLines(coloredLines).build();
+    }
+
+
+    private List<ColoredLine> quadPointAsLines(QuadPoint rect) {
+
+        return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1),
+                new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1),
+                new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1),
+                new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1));
    }


    @SneakyThrows
-    private void drawResultsPerPage(PDDocument document, Integer pageNumber, List<OcrResultToWrite> ocrResultToWrite, Set<COSDictionary> dictionariesToUpdate) {
+    private List<ColoredLine> createGrid(QuadPoint rect) {

-        var pdPage = document.getPage(pageNumber - 1);
+        List<ColoredLine> lines = new LinkedList<>(quadPointAsLines(rect));

-        PDOptionalContentGroup textDebugLayer = new PDOptionalContentGroup(ocrLayerName);
-        PDOptionalContentGroup bBoxDebugLayer = new PDOptionalContentGroup(ocrLayerName + "BBox");
-        if (settings.isDebug()) {
-            textDebugLayer = addOptionalGroup(ocrLayerName, document, pdPage, dictionariesToUpdate);
-            bBoxDebugLayer = addOptionalGroup(ocrLayerName + " BBox", document, pdPage, dictionariesToUpdate);
-        }
-
-        escapeContentStreams(document, pdPage);
-
-        List<TextPositionInImage> words = ocrResultToWrite.stream().map(OcrResultToWrite::textPositionInImage).flatMap(Collection::stream).toList();
-        try (var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true)) {
-
-            // write invisible ocr text inside tagged content
-            contentStream.beginMarkedContent(settings.getOcrMarkedContentTag());
-            contentStream.saveGraphicsState();
-            contentStream.setNonStrokingColor(Color.BLUE);
-            contentStream.setStrokingColor(Color.BLUE);
-            contentStream.setLineWidth(1);
-            words.forEach(word -> drawInvisibleWord(word, contentStream));
-            contentStream.restoreGraphicsState();
-            contentStream.endMarkedContent();
-
-            if (settings.isDebug()) { // must not be written, as it will interfere with layout parsing
-                // write visible ocr text inside optional group
-                contentStream.beginMarkedContent(COSName.OC, textDebugLayer);
-                contentStream.saveGraphicsState();
-                words.forEach(word -> drawVisibleWord(word, contentStream));
-                contentStream.restoreGraphicsState();
-                contentStream.endMarkedContent();
-
-                // write word bounding boxes (tesseract output) inside optional group
-                contentStream.beginMarkedContent(COSName.OC, bBoxDebugLayer);
-                contentStream.saveGraphicsState();
-                ocrResultToWrite.stream()
-                        .map(OcrResultToWrite::imageBoundingBox)
-                        .forEach(imagePosition -> drawGrid(contentStream, imagePosition));
-                words.stream().map(TextPositionInImage::getTransformedTextBBox).forEach(word -> drawRectangle(contentStream, word));
-                contentStream.restoreGraphicsState();
-                contentStream.endMarkedContent();
-            }
-        }
-        dictionariesToUpdate.add(pdPage.getCOSObject());
-        dictionariesToUpdate.add(pdPage.getResources().getCOSObject());
-    }
-
-
-    @SneakyThrows
-    private static void escapeContentStreams(PDDocument document, PDPage pdPage) {
-        // We need to append to the contentstream, otherwise the content could be overlapped by images
-        // But we also need to save the graphics state before, such that our appended content cannot be affected by previous contentstreams with side-effects, such as not escaped matrix transformations
-        try (var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.PREPEND, false)) {
-            contentStream.saveGraphicsState();
-        }
-        try (var contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, false)) {
-            contentStream.restoreGraphicsState();
-        }
-    }
-
-
-    private PDOptionalContentGroup addOptionalGroup(String ocrLayerName, PDDocument document, PDPage pdPage, Set<COSDictionary> dictionariesToUpdate) {
-
-        PDDocumentCatalog catalog = document.getDocumentCatalog();
-        PDOptionalContentProperties ocprops = catalog.getOCProperties();
-        if (ocprops == null) {
-            ocprops = new PDOptionalContentProperties();
-            catalog.setOCProperties(ocprops);
-        }
-        PDOptionalContentGroup layer = null;
-        if (ocprops.hasGroup(ocrLayerName)) {
-            layer = ocprops.getGroup(ocrLayerName);
-        } else {
-            layer = new PDOptionalContentGroup(ocrLayerName);
-            ocprops.addGroup(layer);
-        }
-
-        // enable debug layers by default only when DEBUG flag is set.
-        ocprops.setGroupEnabled(layer, settings.isDebug());
-        PDResources resources = pdPage.getResources();
-        if (resources == null) {
-            resources = new PDResources();
-            pdPage.setResources(resources);
-        }
-        dictionariesToUpdate.add(catalog.getCOSObject());
-        return layer;
-    }
-
-
-    @SneakyThrows
-    private void drawRectangle(PDPageContentStream contentStream, QuadPoint rect) {
-
-        contentStream.saveGraphicsState();
-        contentStream.setLineWidth(1);
-        contentStream.moveTo((float) rect.a().getX(), (float) rect.a().getY());
-        contentStream.lineTo((float) rect.b().getX(), (float) rect.b().getY());
-        contentStream.setStrokingColor(Color.ORANGE);
-        contentStream.stroke();
-        contentStream.moveTo((float) rect.b().getX(), (float) rect.b().getY());
-        contentStream.lineTo((float) rect.c().getX(), (float) rect.c().getY());
-        contentStream.setStrokingColor(Color.BLUE);
-        contentStream.stroke();
-        contentStream.moveTo((float) rect.c().getX(), (float) rect.c().getY());
-        contentStream.lineTo((float) rect.d().getX(), (float) rect.d().getY());
-        contentStream.setStrokingColor(Color.GREEN);
-        contentStream.stroke();
-        contentStream.moveTo((float) rect.d().getX(), (float) rect.d().getY());
-        contentStream.lineTo((float) rect.a().getX(), (float) rect.a().getY());
-        contentStream.setStrokingColor(Color.MAGENTA);
-        contentStream.stroke();
-        contentStream.restoreGraphicsState();
-    }
-
-
-    private void drawInvisibleWord(TextPositionInImage word, PDPageContentStream contentStream) {
-
-        drawWord(word, contentStream, RenderingMode.NEITHER);
-    }
-
-
-    private void drawVisibleWord(TextPositionInImage word, PDPageContentStream contentStream) {
-
-        drawWord(word, contentStream, RenderingMode.FILL);
-    }
-
-
-    //    @SneakyThrows
-    private void drawWord(TextPositionInImage position, PDPageContentStream contentStream, RenderingMode renderingMode) {
-
-        try {
-            contentStream.setNonStrokingColor(switch (position.getFontStyle()) {
-                case BOLD -> Color.RED;
-                case ITALIC -> Color.GREEN;
-                default -> Color.BLUE;
-            });
-            contentStream.beginText();
-            contentStream.setRenderingMode(renderingMode);
-            contentStream.setFont(position.getFont(), (float) position.getFontSize());
-            contentStream.setTextMatrix(position.getTextMatrix());
-            contentStream.showText(position.getText());
-            contentStream.endText();
-
-        } catch (Exception e) {
-            log.error("Failed to write text {}", position.getText());
-            log.error(e.getMessage());
-        }
-    }
-
-
-    @SneakyThrows
-    private void drawGrid(PDPageContentStream contentStream, QuadPoint rect) {
-
-        drawRectangle(contentStream, rect);
-
-        contentStream.saveGraphicsState();
-        contentStream.setStrokingColor(Color.BLACK);
-        contentStream.setLineWidth(0.2F);
        int nRows = 8;
        int nCols = 8;

@ -218,7 +154,7 @@ public class OcrResultWriter {
        Point2D start = add(rect.a(), abStep);
        Point2D end = add(rect.d(), abStep);
        for (int row = 0; row < nRows; ++row) {
-            drawLine(start, end, contentStream);
+            lines.add(new ColoredLine(new Line2D.Double(start, end), Color.BLACK, 0.2f));
            start = add(start, abStep);
            end = add(end, abStep);
        }
@ -226,21 +162,12 @@ public class OcrResultWriter {
        start = add(rect.a(), adStep);
        end = add(rect.b(), adStep);
        for (int col = 0; col < nCols; ++col) {
-            drawLine(start, end, contentStream);
+            lines.add(new ColoredLine(new Line2D.Double(start, end), Color.BLACK, 0.2f));
            start = add(start, adStep);
            end = add(end, adStep);
        }
-        contentStream.restoreGraphicsState();

-    }
-
-
-    @SneakyThrows
-    private void drawLine(Point2D a, Point2D b, PDPageContentStream contentStream) {
-
-        contentStream.moveTo((float) a.getX(), (float) a.getY());
-        contentStream.lineTo((float) b.getX(), (float) b.getY());
-        contentStream.stroke();
+        return lines;
    }


--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/fonts/FontMetricsFactory.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/fonts/FontMetricsFactory.java
@ -4,11 +4,12 @@ import org.apache.pdfbox.pdmodel.font.PDFont;

 import com.knecon.fforesight.service.ocr.processor.model.FontMetrics;
 import com.knecon.fforesight.service.ocr.processor.model.HeightAndDescent;
+import com.knecon.fforesight.service.viewerdoc.model.EmbeddableFont;

 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;

-public interface FontMetricsFactory {
+public interface FontMetricsFactory extends EmbeddableFont {

    default FontMetrics calculateMetrics(String text, double textWidth, double textHeight) {

--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/fonts/Type0FontMetricsFactory.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/fonts/Type0FontMetricsFactory.java
@ -1,8 +1,6 @@
 package com.knecon.fforesight.service.ocr.processor.service.fonts;

 import java.io.ByteArrayInputStream;
-import java.util.Collections;
-import java.util.List;
 import java.util.Set;

 import org.apache.fontbox.ttf.GlyphData;
@ -15,47 +13,63 @@ import org.apache.pdfbox.pdmodel.font.PDType0Font;

 import com.knecon.fforesight.service.ocr.processor.model.HeightAndDescent;

+import lombok.AllArgsConstructor;
 import lombok.RequiredArgsConstructor;
 import lombok.SneakyThrows;
 import lombok.extern.slf4j.Slf4j;
-import software.amazon.awssdk.services.s3.endpoints.internal.Value;

@Slf4j
@RequiredArgsConstructor
+@AllArgsConstructor
 public class Type0FontMetricsFactory implements FontMetricsFactory {

-    private final PDType0Font type0Font;
-    private final TrueTypeFont trueTypeFont;
+    private final String resourcePath;
+    private PDType0Font type0Font;
+    private TrueTypeFont trueTypeFont;
+    private PDDocument documentThisIsEmbeddedIn;

    // for this specific font back-/forward-slashes have a lot of descent screwing up the font size and therefore bold detection. So if we find such a character we ignore its descent.
    private static final Set<Integer> slashGlyphIds = Set.of(18, 63);


+    @SneakyThrows
    public static Type0FontMetricsFactory regular(PDDocument document) {

-        return createFromResource("fonts/cmu-regular.ttf", document);
+        String resourcePath = "fonts/cmu-regular.ttf";
+        return createFromResourcePath(resourcePath, document);
    }


+    @SneakyThrows
    public static Type0FontMetricsFactory bold(PDDocument document) {

-        return createFromResource("fonts/cmu-bold.ttf", document);
+        String resourcePath = "fonts/cmu-bold.ttf";
+        return createFromResourcePath(resourcePath, document);
    }


    @SneakyThrows
    @SuppressWarnings("PMD.CloseResource")
-    // Todo i think this is not ok to never close the font...
-    private static Type0FontMetricsFactory createFromResource(String resourcePath, PDDocument document) {
+    private static TrueTypeFont readFromResourcePath(String resourcePath) {

+        // The ttf is closed with the document, see PDType0Font line 134
        try (var in = Thread.currentThread().getContextClassLoader().getResourceAsStream(resourcePath); var buffer = new RandomAccessReadBuffer(in)) {
-            TrueTypeFont trueTypeFont = new TTFParser().parse(buffer); // since Type0Font can be descendant from any font, we need to remember the original TrueTypeFont for the glyph information
-            PDType0Font type0Font = PDType0Font.load(document, trueTypeFont, true); // use Type0Font for unicode support
-            return new Type0FontMetricsFactory(type0Font, trueTypeFont);
+            return new TTFParser().parse(buffer);
        }
    }


+    @SneakyThrows
+    @SuppressWarnings("PMD.CloseResource")
+    private static Type0FontMetricsFactory createFromResourcePath(String resourcePath, PDDocument document) {
+
+        TrueTypeFont trueTypeFont = readFromResourcePath(resourcePath);
+        // since Type0Font can be descendant from any font, we need to remember the original TrueTypeFont for the glyph information
+        return new Type0FontMetricsFactory(resourcePath, PDType0Font.load(document, trueTypeFont, true), trueTypeFont, document); // use Type0Font for unicode support)
+
+    }
+
+
    @SneakyThrows
    public HeightAndDescent calculateHeightAndDescent(String text) {

@ -99,4 +113,28 @@ public class Type0FontMetricsFactory implements FontMetricsFactory {
        return type0Font;
    }

+
+    @Override
+    @SneakyThrows
+    public PDFont embed(PDDocument document) {
+
+        if (documentThisIsEmbeddedIn.equals(document)) {
+            return getFont();
+        }
+
+        // no need to close, the font will be closed with the document it is embedded in
+
+        this.trueTypeFont = readFromResourcePath(resourcePath);
+        this.type0Font = PDType0Font.load(document, trueTypeFont, true);
+        this.documentThisIsEmbeddedIn = document;
+        return getFont();
+    }
+
+
+    @SneakyThrows
+    public void close() {
+
+        trueTypeFont.close();
+    }
+
 }
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/scriptdetection/FontStyleDetector.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/scriptdetection/FontStyleDetector.java
@ -49,11 +49,11 @@ public class FontStyleDetector {
     * (<a href="https://en.wikipedia.org/wiki/Opening_(morphology)">Opening (Morphology)</a>).
     * We then threshold the ratio of remaining pixels to determine whether a word is bold or not.
     * <p>
-     * I did take some liberties though. Firstly, the paper uses text height without ascender/descender height for the clustering. I'm using the previously implemented font size.
-     * But this is based on text width. Thus, I'm also using the height scaling factor to scale the font size by the text height.
+     * I did take some liberties though. Firstly, the paper uses text height without ascender/descender height for the clustering. I'm using the previously implemented font size estimation.
+     * But that is calculated based on text width. Thus, I'm also using the height scaling factor to scale the font size by the text height.
     * The paper does not describe its clustering algorithm, so I've decided on DBSCAN due to its good runtime and readily available implementation by apache commons math.
     * Moreover, the paper states that stroke width scales linearly with text height. I've come to the conclusion this is not the case.
-     * It seems it scales with the square root of the text height. Or at least this seemed to give the best results.
+     * It seems it scales with the square root of the text height. Or at least this seemed to give the best results for me.
     */
    public Map<Integer, List<OcrResultToWrite>> detectBold(List<OcrResult> ocrResults, PDDocument document) {

--- a/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
+++ b/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java
@ -2,18 +2,23 @@ package com.knecon.fforesight.service.ocr.v1.server.queue;

 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
+import java.nio.file.Path;
 import java.time.OffsetDateTime;
 import java.time.temporal.ChronoUnit;

+import org.apache.commons.io.FileUtils;
 import org.springframework.amqp.AmqpRejectAndDontRequeueException;
 import org.springframework.amqp.core.Message;
 import org.springframework.amqp.rabbit.annotation.RabbitHandler;
 import org.springframework.amqp.rabbit.annotation.RabbitListener;
 import org.springframework.http.HttpStatus;
 import org.springframework.stereotype.Service;
+import org.springframework.util.FileSystemUtils;

 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.knecon.fforesight.service.ocr.processor.service.OsUtils;
 import com.knecon.fforesight.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
 import com.knecon.fforesight.service.ocr.processor.service.FileStorageService;
 import com.knecon.fforesight.service.ocr.processor.service.OCRService;
@ -21,7 +26,6 @@ import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;

 import feign.FeignException;
-import io.micrometer.observation.annotation.Observed;
 import lombok.AccessLevel;
 import lombok.RequiredArgsConstructor;
 import lombok.experimental.FieldDefaults;
@ -33,10 +37,10 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
 public class OcrMessageReceiver {

-     FileStorageService fileStorageService;
-     ObjectMapper objectMapper;
-     FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
-     OCRService ocrService;
+    FileStorageService fileStorageService;
+    ObjectMapper objectMapper;
+    FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
+    OCRService ocrService;


    @RabbitHandler
@ -44,33 +48,34 @@ public class OcrMessageReceiver {
    public void receiveOcr(Message in) throws IOException {

        DocumentRequest ocrRequestMessage = objectMapper.readValue(in.getBody(), DocumentRequest.class);
-        log.info("--------------------------------------------------------------------------");
-        log.info("Start ocr for file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+        String dossierId = ocrRequestMessage.getDossierId();
+        String fileId = ocrRequestMessage.getFileId();
+        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve(dossierId + "-" + fileId);

        try {
-            setStatusOcrProcessing(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+            log.info("--------------------------------------------------------------------------");
+            log.info("Start ocr for file with dossierId {} and fileId {}", dossierId, fileId);

-            if (!fileStorageService.untouchedFileExists(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId())) {
-                byte[] originalFile = fileStorageService.getOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
-                fileStorageService.storeUntouchedFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), originalFile);
-            }
+            setStatusOcrProcessing(dossierId, fileId);

-            try (var transferStream = new ByteArrayOutputStream()) {
-                ocrService.runOcrOnDocument(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), transferStream);
-                try (var inputStream = new ByteArrayInputStream(transferStream.toByteArray())) {
-                    fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), inputStream);
-                }
-            } catch (IOException e) {
-                log.error("Failed to store file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
-                throw new RuntimeException(e);
-            }
+            tmpDir.toFile().mkdirs();
+            File documentFile = tmpDir.resolve("document.pdf").toFile();
+            File viewerDocumentFile = tmpDir.resolve("viewerDocument.pdf").toFile();

-            fileStatusProcessingUpdateClient.ocrSuccessful(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+            fileStorageService.downloadFiles(dossierId, fileId, documentFile, viewerDocumentFile);
+
+            ocrService.runOcrOnDocument(dossierId, fileId, tmpDir, documentFile, viewerDocumentFile);
+
+            fileStorageService.storeFiles(dossierId, fileId, documentFile, viewerDocumentFile);
+
+            fileStatusProcessingUpdateClient.ocrSuccessful(dossierId, fileId);
        } catch (Exception e) {
            log.warn("An exception occurred in ocr file stage: {}", e.getMessage());
            in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_HEADER, e.getMessage());
            in.getMessageProperties().getHeaders().put(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER, OffsetDateTime.now().truncatedTo(ChronoUnit.MILLIS));
            throw new RuntimeException(e);
+        } finally {
+            FileSystemUtils.deleteRecursively(tmpDir);
        }
    }

@ -80,6 +85,7 @@ public class OcrMessageReceiver {
    public void receiveOcrDLQ(Message failedMessage) throws IOException {

        DocumentRequest ocrRequestMessage = objectMapper.readValue(failedMessage.getBody(), DocumentRequest.class);
+
        log.info("OCR DQL received: {}", ocrRequestMessage);
        String errorMessage = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_HEADER);
        OffsetDateTime timestamp = failedMessage.getMessageProperties().getHeader(MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER);
--- a/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
+++ b/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java
@ -9,6 +9,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.Comparator;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
@ -25,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.knecon.fforesight.service.ocr.processor.service.FileStorageService;
 import com.knecon.fforesight.service.ocr.processor.service.OCRService;
 import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
+import com.knecon.fforesight.service.ocr.processor.service.OsUtils;
 import com.knecon.fforesight.tenantcommons.TenantContext;

 import io.micrometer.prometheus.PrometheusMeterRegistry;
@ -64,7 +66,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    @SneakyThrows
    public void testOcr() {

-        String text = testOCR("files/UNAPPROVED_VV-331155 (1).pdf");
+        String text = testOCR("files/402Study.pdf");
    }


@ -116,18 +118,17 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    private String testOCR(String fileName) {

        ClassPathResource pdfFileResource = new ClassPathResource(fileName);
-        var originId = FileStorageService.getStorageId(TEST_DOSSIER_ID, "file", FileType.ORIGIN);
-        try (var fileStream = pdfFileResource.getInputStream()) {
-            storageService.storeObject(TenantContext.getTenantId(), originId, fileStream);
-        }
+        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(Path.of(fileName).getFileName());
+        tmpDir.toFile().mkdirs();
+        var documentFile = tmpDir.resolve(Path.of("document.pdf"));
+        var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf"));
+        Files.copy(pdfFileResource.getFile().toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
+        Files.copy(pdfFileResource.getFile().toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);

-        Path tmpFileName = Path.of(getTemporaryDirectory()).resolve(Path.of(fileName).getFileName());
-        try (var out = new FileOutputStream(tmpFileName.toFile())) {
-            ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", out);
-            System.out.println("File:" + tmpFileName);
-        }
+        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
+        System.out.println("File:" + documentFile);

-        try (var fileStream = new FileInputStream(tmpFileName.toFile())) {
+        try (var fileStream = new FileInputStream(documentFile.toFile())) {
            return extractAllTextFromDocument(fileStream);
        }
    }
@ -166,20 +167,18 @@ public class OcrServiceIntegrationTest extends AbstractTest {
    }


-
    @SneakyThrows
    private void testOCRForFile(File file) {

-        var originId = FileStorageService.getStorageId(TEST_DOSSIER_ID, "file", FileType.ORIGIN);
-        try (var fileStream = new FileInputStream(file)) {
-            storageService.storeObject(TenantContext.getTenantId(), originId, fileStream);
-        }
+        Path tmpDir = Path.of(OsUtils.getTemporaryDirectory()).resolve("OCR_TEST").resolve(file.toPath().getFileName());
+        tmpDir.toFile().mkdirs();
+        var documentFile = tmpDir.resolve(Path.of("document.pdf"));
+        var viewerDocumentFile = tmpDir.resolve(Path.of("viewerDocument.pdf"));
+        Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
+        Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);

-        Path tmpFileName = Path.of(getTemporaryDirectory()).resolve(Path.of(file.getAbsolutePath()).getFileName());
-        try (var out = new FileOutputStream(tmpFileName.toFile())) {
-            ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", out);
-            System.out.println("File:" + tmpFileName);
-        }
+        ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
+        System.out.println("File:" + documentFile);
        System.out.println("\n\n");
    }

--- a/publish-custom-image.sh
+++ b/publish-custom-image.sh
@ -11,5 +11,5 @@ commit_hash=$(git rev-parse --short=5 HEAD)
 # Combine branch and commit hash
 buildName="${USER}-${branch}-${commit_hash}"

-gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName --no-build-cache
+gradle bootBuildImage --publishImage -PbuildbootDockerHostNetwork=true -Pversion=$buildName
 echo "nexus.knecon.com:5001/ff/${dir}-server:$buildName"