RED-7669: optimize OCR-module performance

* cleanup Code
This commit is contained in:
Kilian Schuettler 2023-11-15 15:55:26 +01:00
parent 3d09f46844
commit 4c225c2219
2 changed files with 17 additions and 88 deletions

View File

@ -54,87 +54,10 @@ public class ExtractedOcrImage implements OcrImage {
@SneakyThrows
synchronized private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) {
setAlphaChannelToWhite(image);
Pix grayScale = convertToGrayScale(image);
Pix scaledUp = scaleToTargetDpi(imageDpi, targetDpi, grayScale);
return despecklePix(scaledUp);
}
private static Pix despecklePix(Pix pix) {
assert pix.d == 8;
Pix despeckled;
if (pix.w < 100 || pix.h < 100) {
// too small to properly despeckle, just binarize instead.
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
} else {
despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
if (despeckled == null) {
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
}
}
if (pix != despeckled) {
LeptUtils.disposePix(pix);
}
return despeckled;
}
private static Pix scaleToTargetDpi(float imageDpi, int targetDpi, Pix grayScale) {
float targetFactor = targetDpi / imageDpi;
if (targetFactor > 3) {
Pix scaledUp;
scaledUp = Leptonica1.pixScaleGray4xLI(grayScale);
LeptUtils.disposePix(grayScale);
return scaledUp;
} else if (targetFactor > 1.9) {
Pix scaledUp;
scaledUp = Leptonica1.pixScaleGray2xLI(grayScale);
LeptUtils.disposePix(grayScale);
return scaledUp;
} else {
return grayScale;
}
}
private static Pix convertToGrayScale(BufferedImage image) throws IOException {
Pix pix = LeptUtils.convertImageToPix(image);
if (pix.d == 8) {
return pix;
} else if (pix.d == 32) {
Pix grayScale = Leptonica1.pixConvertRGBToGrayFast(pix);
LeptUtils.disposePix(pix);
return grayScale;
} else {
Pix grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255);
LeptUtils.disposePix(pix);
return grayScale;
}
}
private static void setAlphaChannelToWhite(BufferedImage image) {
if (image.getTransparency() == Transparency.TRANSLUCENT) {
// NOTE: For BITMASK images, the color model is likely IndexColorModel,
// and this model will contain the "real" color of the transparent parts
// which is likely a better fit than unconditionally setting it to white.
// Fill background with white
Graphics2D graphics = image.createGraphics();
try {
graphics.setComposite(AlphaComposite.DstOver); // Set composite rules to paint "behind"
graphics.setPaint(Color.WHITE);
graphics.fillRect(0, 0, image.getWidth(), image.getHeight());
} finally {
graphics.dispose();
}
}
ImageProcessingUtils.setAlphaChannelToWhite(image);
Pix grayScale = ImageProcessingUtils.convertToGrayScale(image);
Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale);
return ImageProcessingUtils.despecklePix(scaledUp);
}

View File

@ -15,15 +15,21 @@ import net.sourceforge.lept4j.util.LeptUtils;
@UtilityClass
public class ImageProcessingUtils {
public static Pix despecklePix(Pix scaledUp) {
public static Pix despecklePix(Pix pix) {
assert scaledUp.d == 8;
Pix despeckled = LeptUtils.despeckle(scaledUp, LeptUtils.SEL_STR3, 3);
if (despeckled == null) { // sometimes despeckle fails, and I wasn't able to figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with too small images, not sure though...
despeckled = Leptonica1.pixThresholdToBinary(scaledUp, 180);
assert pix.d == 8;
Pix despeckled;
if (pix.w < 100 || pix.h < 100) {
// too small to properly despeckle, just binarize instead.
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
} else {
despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
if (despeckled == null) {
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
}
}
if (scaledUp != despeckled) {
LeptUtils.disposePix(scaledUp);
if (pix != despeckled) {
LeptUtils.disposePix(pix);
}
return despeckled;
}