Merge branch 'RED-7669' into 'master'

RED-7669: optimize OCR-module performance

Closes RED-7669

See merge request redactmanager/ocr-service!27
This commit is contained in:
Dominique Eifländer 2023-12-12 15:27:00 +01:00
commit 80d38fb785
4 changed files with 5 additions and 16 deletions

View File

@ -20,7 +20,7 @@ dependencies {
api("org.apache.pdfbox:jbig2-imageio:3.0.4")
api("com.github.jai-imageio:jai-imageio-core:1.4.0")
api("com.github.jai-imageio:jai-imageio-jpeg2000:1.4.0")
api("io.github.karols:hocr4j:0.1.2")
api("io.github.karols:hocr4j:0.2.0")
api("com.amazonaws:aws-java-sdk-kms:1.12.440")
api("com.google.guava:guava:31.1-jre")
api("com.iqser.red.commons:pdftron-logic-commons:2.20.0")

View File

@ -58,11 +58,4 @@ public class ExtractedOcrImage implements OcrImage {
return affineTransform;
}
@Override
public int getOptimalPageSegmentationMode() {
return ITessAPI.TessPageSegMode.PSM_SINGLE_BLOCK;
}
}

View File

@ -11,6 +11,7 @@ import lombok.SneakyThrows;
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import net.sourceforge.tess4j.ITessAPI;
public interface OcrImage {
@ -84,7 +85,9 @@ public interface OcrImage {
*
* @return The optimal page segmentation mode.
*/
int getOptimalPageSegmentationMode(); // TODO: evaluate if PSM can be dynamically chosen to increase performance
default int getOptimalPageSegmentationMode() {
return ITessAPI.TessPageSegMode.PSM_AUTO;
} // TODO: evaluate if PSM can be dynamically chosen to increase performance
/**

View File

@ -28,13 +28,6 @@ public class RenderedPageOcrImage implements OcrImage {
int rotationDegrees;
@Override
public int getOptimalPageSegmentationMode() {
return ITessAPI.TessPageSegMode.PSM_SINGLE_BLOCK;
}
@Override
public AffineTransform getImageCTM() {