RED-7669: optimize OCR-module performance
* try and synchronize all malloc calls
This commit is contained in:
parent
574f7ac25e
commit
6f99664906
@ -2,10 +2,15 @@ package com.knecon.fforesight.service.ocr.processor.model;
|
|||||||
|
|
||||||
import java.awt.geom.AffineTransform;
|
import java.awt.geom.AffineTransform;
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.util.concurrent.Semaphore;
|
||||||
|
|
||||||
import org.apache.pdfbox.util.Matrix;
|
import org.apache.pdfbox.util.Matrix;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.ocr.processor.service.threads.OCRThread;
|
||||||
import com.knecon.fforesight.service.ocr.processor.utils.ImageProcessingUtils;
|
import com.knecon.fforesight.service.ocr.processor.utils.ImageProcessingUtils;
|
||||||
|
import com.pdftron.sdf.Obj;
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
@ -52,12 +57,15 @@ public class ExtractedOcrImage implements OcrImage {
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
synchronized private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) {
|
private Pix binarize(BufferedImage image, float imageDpi, int targetDpi) {
|
||||||
|
|
||||||
ImageProcessingUtils.setAlphaChannelToWhite(image);
|
ImageProcessingUtils.setAlphaChannelToWhite(image);
|
||||||
Pix grayScale = ImageProcessingUtils.convertToGrayScale(image);
|
|
||||||
Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale);
|
synchronized (OCRThread.class) { // must synchronize the mallocs here with the mallocs tesseract detection script.
|
||||||
return ImageProcessingUtils.despecklePix(scaledUp);
|
Pix grayScale = ImageProcessingUtils.convertToGrayScale(image);
|
||||||
|
Pix scaledUp = ImageProcessingUtils.scaleToTargetDpi(imageDpi, targetDpi, grayScale);
|
||||||
|
return ImageProcessingUtils.despecklePix(scaledUp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,8 @@ package com.knecon.fforesight.service.ocr.processor.service.threads;
|
|||||||
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
import static net.sourceforge.tess4j.ITessAPI.TRUE;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.nio.FloatBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
@ -13,8 +15,8 @@ import com.knecon.fforesight.service.ocr.processor.model.OcrImage;
|
|||||||
import com.knecon.fforesight.service.ocr.processor.model.OcrResult;
|
import com.knecon.fforesight.service.ocr.processor.model.OcrResult;
|
||||||
import com.knecon.fforesight.service.ocr.processor.service.OcrProgressLogger;
|
import com.knecon.fforesight.service.ocr.processor.service.OcrProgressLogger;
|
||||||
import com.knecon.fforesight.service.ocr.processor.service.Statistics;
|
import com.knecon.fforesight.service.ocr.processor.service.Statistics;
|
||||||
import com.knecon.fforesight.service.ocr.processor.utils.NativeMemoryAllocationUtils;
|
|
||||||
import com.knecon.fforesight.service.ocr.processor.utils.Tesseract2;
|
import com.knecon.fforesight.service.ocr.processor.utils.Tesseract2;
|
||||||
|
import com.sun.jna.ptr.PointerByReference;
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
@ -126,15 +128,21 @@ public class OCRThread extends Thread {
|
|||||||
TessAPI1.TessBaseAPISetImage2(detectionScriptHandle, image.getPix());
|
TessAPI1.TessBaseAPISetImage2(detectionScriptHandle, image.getPix());
|
||||||
TessAPI1.TessBaseAPISetSourceResolution(detectionScriptHandle, image.getDpi());
|
TessAPI1.TessBaseAPISetSourceResolution(detectionScriptHandle, image.getDpi());
|
||||||
|
|
||||||
|
synchronized (OCRThread.class) { // must synchronize the mallocs here with the mallocs in leptonica binarization.
|
||||||
|
orientationDegreeResultBuffer = IntBuffer.allocate(1);
|
||||||
|
orientationDegreeConfidenceBuffer = FloatBuffer.allocate(1);
|
||||||
|
scriptureNameBuffer = new PointerByReference();
|
||||||
|
scriptureConfidenceBuffer = FloatBuffer.allocate(1);
|
||||||
|
}
|
||||||
|
|
||||||
int orient_deg = 0;
|
int orient_deg = 0;
|
||||||
int result = TessAPI1.TessBaseAPIDetectOrientationScript(detectionScriptHandle,
|
int result = TessAPI1.TessBaseAPIDetectOrientationScript(detectionScriptHandle,
|
||||||
buffers.orientationDegreeResultBuffer(),
|
orientationDegreeResultBuffer,
|
||||||
buffers.orientationDegreeConfidenceBuffer(),
|
orientationDegreeConfidenceBuffer,
|
||||||
buffers.scriptureNameBuffer(),
|
scriptureNameBuffer,
|
||||||
buffers.scriptureConfidenceBuffer());
|
scriptureConfidenceBuffer);
|
||||||
if (result == TRUE) {
|
if (result == TRUE) {
|
||||||
orient_deg = buffers.orientationDegreeResultBuffer().get();
|
orient_deg = orientationDegreeResultBuffer.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized (OCRThread.class) {
|
synchronized (OCRThread.class) {
|
||||||
|
|||||||
@ -1,26 +0,0 @@
|
|||||||
package com.knecon.fforesight.service.ocr.processor.utils;
|
|
||||||
|
|
||||||
import java.nio.FloatBuffer;
|
|
||||||
import java.nio.IntBuffer;
|
|
||||||
|
|
||||||
import com.sun.jna.ptr.PointerByReference;
|
|
||||||
|
|
||||||
import lombok.experimental.UtilityClass;
|
|
||||||
|
|
||||||
@UtilityClass
|
|
||||||
public class NativeMemoryAllocationUtils {
|
|
||||||
|
|
||||||
synchronized public static DetectionScriptBuffers getDetectionScriptBuffers() {
|
|
||||||
|
|
||||||
IntBuffer orient_degB = IntBuffer.allocate(1);
|
|
||||||
FloatBuffer orient_confB = FloatBuffer.allocate(1);
|
|
||||||
PointerByReference script_nameB = new PointerByReference();
|
|
||||||
FloatBuffer script_confB = FloatBuffer.allocate(1);
|
|
||||||
return new DetectionScriptBuffers(orient_degB, orient_confB, script_nameB, script_confB);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public record DetectionScriptBuffers(IntBuffer orientationDegreeResultBuffer, FloatBuffer orientationDegreeConfidenceBuffer, PointerByReference scriptureNameBuffer, FloatBuffer scriptureConfidenceBuffer) {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
x
Reference in New Issue
Block a user