Merge branch 'RED-8212' into 'master'

RED-8212: Pageborders from scanned documents are used for tables

See merge request fforesight/ocr-service!35
This commit is contained in:
Kilian Schüttler 2024-01-24 13:40:17 +01:00
commit 75bd2142ec
3 changed files with 3 additions and 2 deletions

View File

@ -24,6 +24,6 @@ dependencies {
api("io.github.karols:hocr4j:0.2.0")
api("com.amazonaws:aws-java-sdk-kms:1.12.440")
api("com.google.guava:guava:31.1-jre")
api("com.iqser.red.commons:pdftron-logic-commons:2.20.0")
api("com.iqser.red.commons:pdftron-logic-commons:2.23.0")
testImplementation("org.junit.jupiter:junit-jupiter:5.8.1")
}

View File

@ -68,7 +68,7 @@ public class OCRService {
try (InputStream fileStream = removeWatermarkIfEnabled(dossierId, fileId); ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false);
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false, false);
try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) {
log.info("Starting OCR for file {}", fileId);

View File

@ -237,6 +237,7 @@ public class ImageProcessingThread extends Thread {
}
private static ITessAPI.TessBaseAPI initDetectionScriptHandle() {
ITessAPI.TessBaseAPI handle = TessAPI1.TessBaseAPICreate();