diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java index 472287e..6455f97 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/model/ExtractedOcrImage.java @@ -54,87 +54,10 @@ public class ExtractedOcrImage implements OcrImage { @SneakyThrows synchronized private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) { - setAlphaChannelToWhite(image); - Pix grayScale = convertToGrayScale(image); - Pix scaledUp = scaleToTargetDpi(imageDpi, targetDpi, grayScale); - return despecklePix(scaledUp); - } - - - private static Pix despecklePix(Pix pix) { - - assert pix.d == 8; - Pix despeckled; - if (pix.w < 100 || pix.h < 100) { - // too small to properly despeckle, just binarize instead. - despeckled = Leptonica1.pixThresholdToBinary(pix, 180); - } else { - despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though... - if (despeckled == null) { - despeckled = Leptonica1.pixThresholdToBinary(pix, 180); - } - } - if (pix != despeckled) { - LeptUtils.disposePix(pix); - } - return despeckled; - } - - - private static Pix scaleToTargetDpi(float imageDpi, int targetDpi, Pix grayScale) { - - float targetFactor = targetDpi / imageDpi; - - if (targetFactor > 3) { - Pix scaledUp; - scaledUp = Leptonica1.pixScaleGray4xLI(grayScale); - LeptUtils.disposePix(grayScale); - return scaledUp; - } else if (targetFactor > 1.9) { - Pix scaledUp; - scaledUp = Leptonica1.pixScaleGray2xLI(grayScale); - LeptUtils.disposePix(grayScale); - return scaledUp; - } else { - return grayScale; - } - } - - - private static Pix convertToGrayScale(BufferedImage image) throws IOException { - - Pix pix = LeptUtils.convertImageToPix(image); - if (pix.d == 8) { - return pix; - } else if (pix.d == 32) { - Pix grayScale = Leptonica1.pixConvertRGBToGrayFast(pix); - LeptUtils.disposePix(pix); - return grayScale; - } else { - Pix grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255); - LeptUtils.disposePix(pix); - return grayScale; - } - } - - - private static void setAlphaChannelToWhite(BufferedImage image) { - - if (image.getTransparency() == Transparency.TRANSLUCENT) { - // NOTE: For BITMASK images, the color model is likely IndexColorModel, - // and this model will contain the "real" color of the transparent parts - // which is likely a better fit than unconditionally setting it to white. - - // Fill background with white - Graphics2D graphics = image.createGraphics(); - try { - graphics.setComposite(AlphaComposite.DstOver); // Set composite rules to paint "behind" - graphics.setPaint(Color.WHITE); - graphics.fillRect(0, 0, image.getWidth(), image.getHeight()); - } finally { - graphics.dispose(); - } - } + ImageProcessingUtils.setAlphaChannelToWhite(image); + Pix grayScale = ImageProcessingUtils.convertToGrayScale(image); + Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale); + return ImageProcessingUtils.despecklePix(scaledUp); } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java index 27621b3..1727113 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/utils/ImageProcessingUtils.java @@ -15,15 +15,21 @@ import net.sourceforge.lept4j.util.LeptUtils; @UtilityClass public class ImageProcessingUtils { - public static Pix despecklePix(Pix scaledUp) { + public static Pix despecklePix(Pix pix) { - assert scaledUp.d == 8; - Pix despeckled = LeptUtils.despeckle(scaledUp, LeptUtils.SEL_STR3, 3); - if (despeckled == null) { // sometimes despeckle fails, and I wasn't able to figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with too small images, not sure though... - despeckled = Leptonica1.pixThresholdToBinary(scaledUp, 180); + assert pix.d == 8; + Pix despeckled; + if (pix.w < 100 || pix.h < 100) { + // too small to properly despeckle, just binarize instead. + despeckled = Leptonica1.pixThresholdToBinary(pix, 180); + } else { + despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though... + if (despeckled == null) { + despeckled = Leptonica1.pixThresholdToBinary(pix, 180); + } } - if (scaledUp != despeckled) { - LeptUtils.disposePix(scaledUp); + if (pix != despeckled) { + LeptUtils.disposePix(pix); } return despeckled; }