From 26f5395d0afb828ca9d3598dcc506dc57c53437a Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Fri, 3 May 2024 12:49:50 +0200 Subject: [PATCH] certificate: adjust some magic numbers --- ...er.java => NativeLibrariesInitializer.java} | 18 +++++++++++++++++- .../service/ocr/processor/model/OcrImage.java | 2 +- .../service/threads/ImageProcessingThread.java | 4 ++-- .../service/ocr/v1/server/AbstractTest.java | 8 ++++++-- 4 files changed, 26 insertions(+), 6 deletions(-) rename ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/{PDFNetInitializer.java => NativeLibrariesInitializer.java} (53%) diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/PDFNetInitializer.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java similarity index 53% rename from ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/PDFNetInitializer.java rename to ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java index fbd71a9..7523cf3 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/PDFNetInitializer.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java @@ -6,12 +6,15 @@ import com.sun.jna.NativeLibrary; import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; +@Slf4j @Component @RequiredArgsConstructor -public class PDFNetInitializer { +public class NativeLibrariesInitializer { @Value("${pdftron.license:}") private String pdftronLicense; @@ -22,8 +25,21 @@ public class PDFNetInitializer { // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError. public void init() { + log.info("Initializing Native Libraries"); + log.info("Setting pdftron license: {}", pdftronLicense); PDFNet.setTempPath("/tmp/pdftron"); PDFNet.initialize(pdftronLicense); + + log.info("Setting jna.library.path: {}", System.getenv("VCPKG_DYNAMIC_LIB")); System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB")); + + log.info("Asserting Native Libraries loaded"); + NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica"); + assert leptonicaLib != null; + log.info("Leptonica library loaded from {}", leptonicaLib.getFile().getAbsolutePath()); + NativeLibrary tesseractLib = NativeLibrary.getInstance("tesseract"); + assert tesseractLib != null; + log.info("Tesseract library loaded from {}", leptonicaLib.getFile().getAbsolutePath()); } + } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrImage.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrImage.java index 7873b36..9934cd7 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrImage.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/OcrImage.java @@ -93,7 +93,7 @@ public interface OcrImage { if (getWidth() < 200 || getHeight() < 200) { return ITessAPI.TessPageSegMode.PSM_SINGLE_BLOCK; } - return ITessAPI.TessPageSegMode.PSM_AUTO; + return ITessAPI.TessPageSegMode.PSM_SPARSE_TEXT; } // TODO: evaluate if PSM can be dynamically chosen to increase performance diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java index 1f233e5..26a261b 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java @@ -45,7 +45,7 @@ public class ImageProcessingThread extends Thread { final BlockingQueue imageInputQueue; final BlockingQueue imageOutputQueue; final ITessAPI.TessBaseAPI detectionScriptHandle = initDetectionScriptHandle(); - final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 1.0f, 1); + final L_Kernel gaussianKernel = Leptonica1.makeGaussianKernel(2, 2, 0.7f, 1); final Statistics stats; final OcrServiceSettings settings; final PDDocument document; @@ -227,7 +227,7 @@ public class ImageProcessingThread extends Thread { if (pix.w < 100 || pix.h < 100) { binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); } else { - binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.2f, null); + binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.0f, null); if (binarized == null) { // Sometimes Otsu just fails, then we binarize directly binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); } diff --git a/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java b/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java index affca4e..515a090 100644 --- a/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java +++ b/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/AbstractTest.java @@ -24,10 +24,11 @@ import org.springframework.context.annotation.Primary; import org.springframework.test.context.junit.jupiter.SpringExtension; import com.iqser.red.commons.jackson.ObjectMapperFactory; -import com.knecon.fforesight.service.ocr.processor.initializer.PDFNetInitializer; +import com.knecon.fforesight.service.ocr.processor.initializer.NativeLibrariesInitializer; import com.iqser.red.storage.commons.StorageAutoConfiguration; import com.iqser.red.storage.commons.service.StorageService; import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService; +import com.knecon.fforesight.service.ocr.v1.server.queue.OcrMessageSender; import com.knecon.fforesight.tenantcommons.TenantsClient; import com.pdftron.pdf.PDFNet; @@ -36,7 +37,7 @@ import lombok.SneakyThrows; @ExtendWith({SpringExtension.class, MockitoExtension.class}) @SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) -@Import({AbstractTest.TestConfiguration.class, PDFNetInitializer.class}) +@Import({AbstractTest.TestConfiguration.class, NativeLibrariesInitializer.class}) @AutoConfigureObservability public class AbstractTest { @@ -46,6 +47,9 @@ public class AbstractTest { @MockBean private TenantsClient tenantsClient; + @MockBean + private OcrMessageSender ocrMessageSender; + @Autowired protected StorageService storageService;