RED-7669: optimize OCR-module performance
* cleanup Code
This commit is contained in:
parent
3d09f46844
commit
4c225c2219
@ -54,87 +54,10 @@ public class ExtractedOcrImage implements OcrImage {
|
||||
@SneakyThrows
|
||||
synchronized private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) {
|
||||
|
||||
setAlphaChannelToWhite(image);
|
||||
Pix grayScale = convertToGrayScale(image);
|
||||
Pix scaledUp = scaleToTargetDpi(imageDpi, targetDpi, grayScale);
|
||||
return despecklePix(scaledUp);
|
||||
}
|
||||
|
||||
|
||||
private static Pix despecklePix(Pix pix) {
|
||||
|
||||
assert pix.d == 8;
|
||||
Pix despeckled;
|
||||
if (pix.w < 100 || pix.h < 100) {
|
||||
// too small to properly despeckle, just binarize instead.
|
||||
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
|
||||
} else {
|
||||
despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
|
||||
if (despeckled == null) {
|
||||
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
|
||||
}
|
||||
}
|
||||
if (pix != despeckled) {
|
||||
LeptUtils.disposePix(pix);
|
||||
}
|
||||
return despeckled;
|
||||
}
|
||||
|
||||
|
||||
private static Pix scaleToTargetDpi(float imageDpi, int targetDpi, Pix grayScale) {
|
||||
|
||||
float targetFactor = targetDpi / imageDpi;
|
||||
|
||||
if (targetFactor > 3) {
|
||||
Pix scaledUp;
|
||||
scaledUp = Leptonica1.pixScaleGray4xLI(grayScale);
|
||||
LeptUtils.disposePix(grayScale);
|
||||
return scaledUp;
|
||||
} else if (targetFactor > 1.9) {
|
||||
Pix scaledUp;
|
||||
scaledUp = Leptonica1.pixScaleGray2xLI(grayScale);
|
||||
LeptUtils.disposePix(grayScale);
|
||||
return scaledUp;
|
||||
} else {
|
||||
return grayScale;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static Pix convertToGrayScale(BufferedImage image) throws IOException {
|
||||
|
||||
Pix pix = LeptUtils.convertImageToPix(image);
|
||||
if (pix.d == 8) {
|
||||
return pix;
|
||||
} else if (pix.d == 32) {
|
||||
Pix grayScale = Leptonica1.pixConvertRGBToGrayFast(pix);
|
||||
LeptUtils.disposePix(pix);
|
||||
return grayScale;
|
||||
} else {
|
||||
Pix grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255);
|
||||
LeptUtils.disposePix(pix);
|
||||
return grayScale;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void setAlphaChannelToWhite(BufferedImage image) {
|
||||
|
||||
if (image.getTransparency() == Transparency.TRANSLUCENT) {
|
||||
// NOTE: For BITMASK images, the color model is likely IndexColorModel,
|
||||
// and this model will contain the "real" color of the transparent parts
|
||||
// which is likely a better fit than unconditionally setting it to white.
|
||||
|
||||
// Fill background with white
|
||||
Graphics2D graphics = image.createGraphics();
|
||||
try {
|
||||
graphics.setComposite(AlphaComposite.DstOver); // Set composite rules to paint "behind"
|
||||
graphics.setPaint(Color.WHITE);
|
||||
graphics.fillRect(0, 0, image.getWidth(), image.getHeight());
|
||||
} finally {
|
||||
graphics.dispose();
|
||||
}
|
||||
}
|
||||
ImageProcessingUtils.setAlphaChannelToWhite(image);
|
||||
Pix grayScale = ImageProcessingUtils.convertToGrayScale(image);
|
||||
Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale);
|
||||
return ImageProcessingUtils.despecklePix(scaledUp);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -15,15 +15,21 @@ import net.sourceforge.lept4j.util.LeptUtils;
|
||||
@UtilityClass
|
||||
public class ImageProcessingUtils {
|
||||
|
||||
public static Pix despecklePix(Pix scaledUp) {
|
||||
public static Pix despecklePix(Pix pix) {
|
||||
|
||||
assert scaledUp.d == 8;
|
||||
Pix despeckled = LeptUtils.despeckle(scaledUp, LeptUtils.SEL_STR3, 3);
|
||||
if (despeckled == null) { // sometimes despeckle fails, and I wasn't able to figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with too small images, not sure though...
|
||||
despeckled = Leptonica1.pixThresholdToBinary(scaledUp, 180);
|
||||
assert pix.d == 8;
|
||||
Pix despeckled;
|
||||
if (pix.w < 100 || pix.h < 100) {
|
||||
// too small to properly despeckle, just binarize instead.
|
||||
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
|
||||
} else {
|
||||
despeckled = LeptUtils.despeckle(pix, LeptUtils.SEL_STR3, 3); // sometimes this fails and I can't figure out why. Then we skip the despeckling and just simply convert to binary. Might have something to do with Imagesize, not sure though...
|
||||
if (despeckled == null) {
|
||||
despeckled = Leptonica1.pixThresholdToBinary(pix, 180);
|
||||
}
|
||||
}
|
||||
if (scaledUp != despeckled) {
|
||||
LeptUtils.disposePix(scaledUp);
|
||||
if (pix != despeckled) {
|
||||
LeptUtils.disposePix(pix);
|
||||
}
|
||||
return despeckled;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user