RED-7669: optimize OCR-module performance

* move all critical stuff to its own singleton thread
* make gs process queue any image once the file has been written
This commit is contained in:
Kilian Schuettler 2023-11-23 15:21:07 +01:00
parent 955ff6281d
commit 880bebcafc
5 changed files with 67 additions and 50 deletions

View File

@ -16,20 +16,8 @@ import lombok.experimental.FieldDefaults;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
@Getter
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class ExtractedImage {
int pageNumber;
QuadPoint position;
int height;
int width;
BufferedImage image;
Matrix ctm;
int numberOnPage;
PDColorSpace colorSpace;
public record ExtractedImage(
int pageNumber, QuadPoint position, int height, int width, BufferedImage image, Matrix ctm, int numberOnPage, PDColorSpace colorSpace) implements UnprocessedImage {
@SneakyThrows
public Pix asPix() {

View File

@ -63,7 +63,7 @@ public class ImageExtractionThread extends Thread {
for (ExtractedImage image : extractedImages) {
imageProcessingQueue.put((UnprocessedImage) image);
logger.addImagesToProcess(image.getPageNumber(), image.getNumberOnPage());
logger.addImagesToProcess(image.pageNumber(), image.numberOnPage());
}
}
}
@ -87,7 +87,7 @@ public class ImageExtractionThread extends Thread {
}
for (ExtractedImage imageOnPage : imagesOnCurrentPage) {
if (imageOnPage.getWidth() > FULL_PAGE_IMAGE_THRESHOLD * page.getCropBox().getWidth() && imageOnPage.getHeight() > FULL_PAGE_IMAGE_THRESHOLD * page.getCropBox().getHeight()) {
if (imageOnPage.width() > FULL_PAGE_IMAGE_THRESHOLD * page.getCropBox().getWidth() && imageOnPage.height() > FULL_PAGE_IMAGE_THRESHOLD * page.getCropBox().getHeight()) {
return true;
}
}

View File

@ -2,7 +2,6 @@ package com.knecon.fforesight.service.ocr.processor.service.threads;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
import java.lang.annotation.Documented;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.ArrayList;
@ -28,7 +27,6 @@ import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import net.sourceforge.tess4j.ITessAPI;
@ -107,44 +105,39 @@ public class ImageProcessingThread extends Thread {
private OcrImage processRenderedPageImageFile(RenderedPageImageFile renderedPageImageFile) {
Pix grayScale = ImageProcessingUtils.convertToGrayScale(renderedPageImageFile.asPix());
Pix despeckled = ImageProcessingUtils.despecklePix(grayScale);
Pix pix = binarize(renderedPageImageFile.asPix(), settings.getDpi(), settings.getDpi());
int orientDegree = detectOrientation(despeckled, settings.getDpi(), detectionScriptHandle);
Pix rotatedPix = switch (360 - orientDegree) {
case 90 -> Leptonica1.pixRotateOrth(despeckled, 1);
case 180 -> Leptonica1.pixRotateOrth(despeckled, 2);
case 270 -> Leptonica1.pixRotateOrth(despeckled, 3);
default -> despeckled;
};
int orientDegree = detectOrientation(pix, settings.getDpi(), detectionScriptHandle);
Pix rotatedPix = ImageProcessingUtils.deRotatePix(orientDegree, pix);
OcrImage ocrImage = new RenderedPageOcrImage(despeckled.h,
despeckled.w,
OcrImage ocrImage = new RenderedPageOcrImage(pix.h,
pix.w,
PageInformation.fromPDPage(renderedPageImageFile.pageNumber(), document.getPage(renderedPageImageFile.pageNumber() - 1)),
rotatedPix,
orientDegree);
if (despeckled != rotatedPix) {
LeptUtils.disposePix(despeckled);
if (pix != rotatedPix) {
LeptUtils.disposePix(pix);
}
return ocrImage;
}
private OcrImage processExtractedImage(ExtractedImage extractedImage) {
float imageDPI = Math.abs(extractedImage.getImage().getWidth() / (extractedImage.getCtm().getScalingFactorX() / 72));
float imageDPI = Math.abs(extractedImage.image().getWidth() / (extractedImage.ctm().getScalingFactorX() / 72));
Pix pix = binarize(extractedImage.asPix(), imageDPI, settings.getDpi());
int orientDegree = detectOrientation(pix, settings.getDpi(), detectionScriptHandle);
Pix rotatedPix = getRotatedPix(orientDegree, pix);
Pix rotatedPix = ImageProcessingUtils.deRotatePix(orientDegree, pix);
OcrImage ocrImage = new ExtractedOcrImage(extractedImage.getPageNumber(),
extractedImage.getNumberOnPage(),
extractedImage.getHeight(),
extractedImage.getWidth(),
extractedImage.getCtm(),
OcrImage ocrImage = new ExtractedOcrImage(extractedImage.pageNumber(),
extractedImage.numberOnPage(),
extractedImage.height(),
extractedImage.width(),
extractedImage.ctm(),
rotatedPix,
pix.h,
pix.w,
@ -157,15 +150,6 @@ public class ImageProcessingThread extends Thread {
}
private static Pix getRotatedPix(int orientDegree, Pix pix) {
return switch (360 - orientDegree) {
case 90 -> Leptonica1.pixRotateOrth(pix, 1);
case 180 -> Leptonica1.pixRotateOrth(pix, 2);
case 270 -> Leptonica1.pixRotateOrth(pix, 3);
default -> pix;
};
}
static public int detectOrientation(Pix pix, int dpi, ITessAPI.TessBaseAPI detectionScriptHandle) {

View File

@ -24,10 +24,10 @@ public class ImageProcessingUtils {
public BufferedImage convertToDeviceColorSpace(ExtractedImage extractedImage) {
BufferedImage image;
if (extractedImage.getColorSpace() instanceof PDDeviceRGB || extractedImage.getColorSpace() instanceof PDDeviceGray) {
image = extractedImage.getImage();
if (extractedImage.colorSpace() instanceof PDDeviceRGB || extractedImage.colorSpace() instanceof PDDeviceGray) {
image = extractedImage.image();
} else {
BufferedImage pdfImage = extractedImage.getImage();
BufferedImage pdfImage = extractedImage.image();
image = new BufferedImage(pdfImage.getWidth(), pdfImage.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
Graphics g = image.getGraphics();
g.drawImage(pdfImage, 0, 0, null);
@ -98,6 +98,17 @@ public class ImageProcessingUtils {
}
public Pix deRotatePix(int orientDegree, Pix pix) {
return switch (360 - orientDegree) {
case 90 -> Leptonica1.pixRotateOrth(pix, 1);
case 180 -> Leptonica1.pixRotateOrth(pix, 2);
case 270 -> Leptonica1.pixRotateOrth(pix, 3);
default -> pix;
};
}
public static void setAlphaChannelToWhite(BufferedImage image) {
if (image.getTransparency() == Transparency.TRANSLUCENT) {

View File

@ -0,0 +1,34 @@
package com.knecon.fforesight.service.ocr.processor.utils;
import static net.sourceforge.lept4j.ILeptonica.IFF_PNG;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
class ImageProcessingUtilsTest {
@BeforeEach
public void loadLeptonica() {
System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB"));
}
@Test
public void testRotation() {
Pix pix = Leptonica1.pixRead("/home/kschuettler/Downloads/painHarold.webp");
Pix pix2 = ImageProcessingUtils.deRotatePix(0, pix);
Leptonica1.pixWrite("/tmp/0.png", pix2, IFF_PNG);
Pix pix3 = ImageProcessingUtils.deRotatePix(90, pix);
Leptonica1.pixWrite("/tmp/90.png", pix3, IFF_PNG);
Pix pix4 = ImageProcessingUtils.deRotatePix(180, pix);
Leptonica1.pixWrite("/tmp/180.png", pix4, IFF_PNG);
Pix pix5 = ImageProcessingUtils.deRotatePix(270, pix);
Leptonica1.pixWrite("/tmp/270.png", pix5, IFF_PNG);
}
}