From 9010ee86918b01ca48492b8e77fe6060bcf74493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Wed, 24 Jan 2024 13:40:17 +0100 Subject: [PATCH] RED-8212: Pageborders from scanned documents are used for tables --- ocr-service-v1/ocr-service-processor/build.gradle.kts | 2 +- .../fforesight/service/ocr/processor/service/OCRService.java | 2 +- .../ocr/processor/service/threads/ImageProcessingThread.java | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ocr-service-v1/ocr-service-processor/build.gradle.kts b/ocr-service-v1/ocr-service-processor/build.gradle.kts index 8a974dc..580cc9b 100644 --- a/ocr-service-v1/ocr-service-processor/build.gradle.kts +++ b/ocr-service-v1/ocr-service-processor/build.gradle.kts @@ -24,6 +24,6 @@ dependencies { api("io.github.karols:hocr4j:0.2.0") api("com.amazonaws:aws-java-sdk-kms:1.12.440") api("com.google.guava:guava:31.1-jre") - api("com.iqser.red.commons:pdftron-logic-commons:2.20.0") + api("com.iqser.red.commons:pdftron-logic-commons:2.23.0") testImplementation("org.junit.jupiter:junit-jupiter:5.8.1") } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java index f98db93..b859051 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java @@ -68,7 +68,7 @@ public class OCRService { try (InputStream fileStream = removeWatermarkIfEnabled(dossierId, fileId); ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) { - invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false); + invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false, false); try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) { log.info("Starting OCR for file {}", fileId); diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java index 446f567..bc47241 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java @@ -237,6 +237,7 @@ public class ImageProcessingThread extends Thread { } + private static ITessAPI.TessBaseAPI initDetectionScriptHandle() { ITessAPI.TessBaseAPI handle = TessAPI1.TessBaseAPICreate();