Merge branch 'RED-8155' into 'master'

RED-8155: bold-detection in ocr-service

See merge request fforesight/ocr-service!34
This commit is contained in:
Dominique Eifländer 2024-01-17 13:54:00 +01:00
commit eaa6973a1f
7 changed files with 12 additions and 16 deletions

View File

@ -28,6 +28,8 @@ import com.knecon.fforesight.service.ocr.processor.service.scriptdetection.FontS
import com.knecon.fforesight.service.ocr.processor.service.threads.OCRThread;
import com.knecon.fforesight.service.ocr.processor.settings.OcrServiceSettings;
import io.micrometer.observation.ObservationRegistry;
import io.micrometer.observation.annotation.Observed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -48,6 +50,7 @@ public class OCRService {
OcrResultWriter ocrResultWriter;
GhostScriptService ghostScriptService;
FontStyleDetector boldDetector;
ObservationRegistry registry;
/**
@ -59,6 +62,7 @@ public class OCRService {
* @param fileId Id of file
* @param out OutputStream where to write to
*/
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
@SneakyThrows
public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) {

View File

@ -109,7 +109,6 @@ public class GhostScriptOutputHandler extends Thread {
if (imageFile == null) {
throw new IllegalArgumentException(String.format("Page number %d does not exist in this thread. It only has pagenumbers %s", pageNumber, pagesToProcess.keySet()));
}
assert new File(imageFile.absoluteFilePath()).isFile();
renderedPageImageFileOutput.add(imageFile);
}

View File

@ -198,8 +198,10 @@ public class ImageProcessingThread extends Thread {
grayScale = pix;
} else if (pix.d == 32) {
grayScale = Leptonica1.pixConvertRGBToGrayFast(pix);
LeptUtils.disposePix(pix);
} else if (pix.d == 1) {
grayScale = Leptonica1.pixConvert1To8(null, pix, (byte) 0, (byte) 255);
LeptUtils.disposePix(pix);
} else {
throw new UnsupportedOperationException(String.format("Unknown pix format with bpp of %d", pix.d));
}
@ -208,36 +210,33 @@ public class ImageProcessingThread extends Thread {
float targetFactor = targetDpi / imageDpi;
if (targetFactor > 2.1) {
scaledUp = Leptonica1.pixScaleGray4xLI(grayScale);
LeptUtils.disposePix(grayScale);
} else if (targetFactor > 1.1) {
scaledUp = Leptonica1.pixScaleGray2xLI(grayScale);
LeptUtils.disposePix(grayScale);
} else {
scaledUp = grayScale;
}
// remove noise and prep for Otsu
gaussian = Leptonica1.pixConvolve(scaledUp, gaussianKernel, 8, 1);
LeptUtils.disposePix(scaledUp);
// Threshold to binary
if (pix.w < 100 || pix.h < 100) {
binarized = Leptonica1.pixThresholdToBinary(gaussian, 170);
} else {
binarized = Leptonica1.pixOtsuThreshOnBackgroundNorm(gaussian, null, 50, 50, 165, 10, 100, 5, 5, 0.2f, null);
if (binarized == null) { // Sometimes Otsu just fails, then we binarize directly
binarized = Leptonica1.pixThresholdToBinary(gaussian, 170);
}
}
LeptUtils.disposePix(pix);
LeptUtils.disposePix(grayScale);
LeptUtils.disposePix(scaledUp);
LeptUtils.disposePix(gaussian);
return binarized;
}
private static ITessAPI.TessBaseAPI initDetectionScriptHandle() {
ITessAPI.TessBaseAPI handle = TessAPI1.TessBaseAPICreate();

View File

@ -138,11 +138,4 @@ public class Tesseract2 extends Tesseract1 {
return renderer;
}
@Override
protected void dispose() {
TessBaseAPIEnd(getHandle());
TessBaseAPIDelete(getHandle());
}
}

View File

@ -21,6 +21,7 @@ import com.knecon.fforesight.service.ocr.v1.api.model.DocumentRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import feign.FeignException;
import io.micrometer.observation.annotation.Observed;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;

View File

@ -6,7 +6,7 @@
"overrides": [
{
"name": "tesseract",
"version": "5.3.2"
"version": "5.3.3"
},
{
"name": "leptonica",

View File

@ -64,7 +64,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
@SneakyThrows
public void testOcr() {
String text = testOCR("files/402Study.pdf");
String text = testOCR("files/UNAPPROVED_VV-331155 (1).pdf");
}