diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java index ddef5ec..f98db93 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OCRService.java @@ -28,6 +28,8 @@ import com.knecon.fforesight.service.ocr.processor.service.scriptdetection.FontS import com.knecon.fforesight.service.ocr.processor.service.threads.OCRThread; import com.knecon.fforesight.service.ocr.processor.settings.OcrServiceSettings; +import io.micrometer.observation.ObservationRegistry; +import io.micrometer.observation.annotation.Observed; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; @@ -48,6 +50,7 @@ public class OCRService { OcrResultWriter ocrResultWriter; GhostScriptService ghostScriptService; FontStyleDetector boldDetector; + ObservationRegistry registry; /** @@ -59,6 +62,7 @@ public class OCRService { * @param fileId Id of file * @param out OutputStream where to write to */ + @Observed(name = "OCRService", contextualName = "run-ocr-on-document") @SneakyThrows public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) { diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/GhostScriptOutputHandler.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/GhostScriptOutputHandler.java index 0dd0c60..464a18c 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/GhostScriptOutputHandler.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/GhostScriptOutputHandler.java @@ -109,7 +109,6 @@ public class GhostScriptOutputHandler extends Thread { if (imageFile == null) { throw new IllegalArgumentException(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet())); } - assert new File(imageFile.absoluteFilePath()).isFile(); renderedPageImageFileOutput.add(imageFile); } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java index 35dc148..446f567 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/threads/ImageProcessingThread.java @@ -198,8 +198,10 @@ public class ImageProcessingThread extends Thread { grayScale = pix; } else if (pix.d == 32) { grayScale = Leptonica1.pixConvertRGBToGrayFast(pix); + LeptUtils.disposePix(pix); } else if (pix.d == 1) { grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255); + LeptUtils.disposePix(pix); } else { throw new UnsupportedOperationException(String.format("Unknown pix format with bpp of %d", pix.d)); } @@ -208,36 +210,33 @@ public class ImageProcessingThread extends Thread { float targetFactor = targetDpi / imageDpi; if (targetFactor > 2.1) { scaledUp = Leptonica1.pixScaleGray4xLI(grayScale); + LeptUtils.disposePix(grayScale); } else if (targetFactor > 1.1) { scaledUp = Leptonica1.pixScaleGray2xLI(grayScale); + LeptUtils.disposePix(grayScale); } else { scaledUp = grayScale; } // remove noise and prep for Otsu gaussian = Leptonica1.pixConvolve(scaledUp, gaussianKernel, 8, 1); + LeptUtils.disposePix(scaledUp); // Threshold to binary if (pix.w < 100 || pix.h < 100) { binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); } else { binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.2f, null); - if (binarized == null) { // Sometimes Otsu just fails, then we binarize directly binarized = Leptonica1.pixThresholdToBinary(gaussian, 170); } } - - LeptUtils.disposePix(pix); - LeptUtils.disposePix(grayScale); - LeptUtils.disposePix(scaledUp); LeptUtils.disposePix(gaussian); return binarized; } - private static ITessAPI.TessBaseAPI initDetectionScriptHandle() { ITessAPI.TessBaseAPI handle = TessAPI1.TessBaseAPICreate(); diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/Tesseract2.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/Tesseract2.java index 61870fa..d85dc46 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/Tesseract2.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/Tesseract2.java @@ -138,11 +138,4 @@ public class Tesseract2 extends Tesseract1 { return renderer; } - @Override - protected void dispose() { - - TessBaseAPIEnd(getHandle()); - TessBaseAPIDelete(getHandle()); - } - } diff --git a/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java b/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java index 6c33d7a..56f74ef 100644 --- a/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java +++ b/ocr-service-v1/ocr-service-server/src/main/java/com/knecon/fforesight/service/ocr/v1/server/queue/OcrMessageReceiver.java @@ -21,6 +21,7 @@ import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo; import feign.FeignException; +import io.micrometer.observation.annotation.Observed; import lombok.AccessLevel; import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; diff --git a/ocr-service-v1/ocr-service-server/src/main/resources/vcpkg.json b/ocr-service-v1/ocr-service-server/src/main/resources/vcpkg.json index 1f3a0a1..da9da43 100644 --- a/ocr-service-v1/ocr-service-server/src/main/resources/vcpkg.json +++ b/ocr-service-v1/ocr-service-server/src/main/resources/vcpkg.json @@ -6,7 +6,7 @@ "overrides": [ { "name": "tesseract", - "version": "5.3.2" + "version": "5.3.3" }, { "name": "leptonica", diff --git a/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java b/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java index 6c4d124..2c3e40d 100644 --- a/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java +++ b/ocr-service-v1/ocr-service-server/src/test/java/com/knecon/fforesight/service/ocr/v1/server/OcrServiceIntegrationTest.java @@ -64,7 +64,7 @@ public class OcrServiceIntegrationTest extends AbstractTest { @SneakyThrows public void testOcr() { - String text = testOCR("files/402Study.pdf"); + String text = testOCR("files/UNAPPROVED_VV-331155 (1).pdf"); }