RED-7669: optimize OCR-module performance

* cleanup Code
2023-11-15 15:55:26 +01:00 · 2023-11-15 15:55:26 +01:00 · 4c225c2219
commit 4c225c2219
parent 3d09f46844
2 changed files with 17 additions and 88 deletions
--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java
@ -54,87 +54,10 @@ public class ExtractedOcrImage implements OcrImage {
    @SneakyThrows
    synchronized private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) {

-        setAlphaChannelToWhite(image);
-        Pix grayScale = convertToGrayScale(image);
-        Pix scaledUp = scaleToTargetDpi(imageDpi, targetDpi, grayScale);
-        return despecklePix(scaledUp);
-    }
-
-
-    private static Pix despecklePix(Pix pix) {
-
-        assert pix.d == 8;
-        Pix despeckled;
-        if (pix.w < 100 || pix.h < 100) {
-            // too small to properly despeckle, just binarize instead.
-            despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
-        } else {
-            despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
-            if (despeckled == null) {
-                despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
-            }
-        }
-        if (pix != despeckled) {
-            LeptUtils.disposePix(pix);
-        }
-        return despeckled;
-    }
-
-
-    private static Pix scaleToTargetDpi(float imageDpi, int targetDpi, Pix grayScale) {
-
-        float targetFactor = targetDpi / imageDpi;
-
-        if (targetFactor > 3) {
-            Pix scaledUp;
-            scaledUp = Leptonica1.pixScaleGray4xLI(grayScale);
-            LeptUtils.disposePix(grayScale);
-            return scaledUp;
-        } else if (targetFactor > 1.9) {
-            Pix scaledUp;
-            scaledUp = Leptonica1.pixScaleGray2xLI(grayScale);
-            LeptUtils.disposePix(grayScale);
-            return scaledUp;
-        } else {
-            return grayScale;
-        }
-    }
-
-
-    private static Pix convertToGrayScale(BufferedImage image) throws IOException {
-
-        Pix pix = LeptUtils.convertImageToPix(image);
-        if (pix.d == 8) {
-            return pix;
-        } else if (pix.d == 32) {
-            Pix grayScale = Leptonica1.pixConvertRGBToGrayFast(pix);
-            LeptUtils.disposePix(pix);
-            return grayScale;
-        } else {
-            Pix grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255);
-            LeptUtils.disposePix(pix);
-            return grayScale;
-        }
-    }
-
-
-    private static void setAlphaChannelToWhite(BufferedImage image) {
-
-        if (image.getTransparency() == Transparency.TRANSLUCENT) {
-            // NOTE: For BITMASK images, the color model is likely IndexColorModel,
-            // and this model will contain the "real" color of the transparent parts
-            // which is likely a better fit than unconditionally setting it to white.
-
-            // Fill background  with white
-            Graphics2D graphics = image.createGraphics();
-            try {
-                graphics.setComposite(AlphaComposite.DstOver); // Set composite rules to paint "behind"
-                graphics.setPaint(Color.WHITE);
-                graphics.fillRect(0, 0, image.getWidth(), image.getHeight());
-            } finally {
-                graphics.dispose();
-            }
-        }
+        ImageProcessingUtils.setAlphaChannelToWhite(image);
+        Pix grayScale = ImageProcessingUtils.convertToGrayScale(image);
+        Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale);
+        return ImageProcessingUtils.despecklePix(scaledUp);
    }


--- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java
+++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java
@ -15,15 +15,21 @@ import net.sourceforge.lept4j.util.LeptUtils;
@UtilityClass
 public class ImageProcessingUtils {

-    public static Pix despecklePix(Pix scaledUp) {
+    public static Pix despecklePix(Pix pix) {

-        assert scaledUp.d == 8;
-        Pix despeckled = LeptUtils.despeckle(scaledUp, LeptUtils.SEL_STR3, 3);
-        if (despeckled == null) { // sometimes despeckle fails, and I wasn't able to figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with too small images, not sure though...
-            despeckled = Leptonica1.pixThresholdToBinary(scaledUp, 180);
+        assert pix.d == 8;
+        Pix despeckled;
+        if (pix.w < 100 || pix.h < 100) {
+            // too small to properly despeckle, just binarize instead.
+            despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
+        } else {
+            despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
+            if (despeckled == null) {
+                despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
+            }
        }
-        if (scaledUp != despeckled) {
-            LeptUtils.disposePix(scaledUp);
+        if (pix != despeckled) {
+            LeptUtils.disposePix(pix);
        }
        return despeckled;
    }