hotfix: Extend Tesseract instead of Tesseract1

This commit is contained in:
Dominique Eifländer 2024-01-15 16:13:43 +01:00
parent fb1fe35bc1
commit 8924e905ad

View File

@ -1,5 +1,25 @@
package com.knecon.fforesight.service.ocr.processor.utils;
import static net.sourceforge.tess4j.ITessAPI.TRUE;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIDelete;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIEnd;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIGetIterator;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIGetStringVariable;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIMeanTextConf;
import static net.sourceforge.tess4j.TessAPI1.TessBaseAPIProcessPage;
import static net.sourceforge.tess4j.TessAPI1.TessDeleteResultRenderer;
import static net.sourceforge.tess4j.TessAPI1.TessHOcrRendererCreate;
import static net.sourceforge.tess4j.TessAPI1.TessPageIteratorBegin;
import static net.sourceforge.tess4j.TessAPI1.TessPageIteratorBoundingBox;
import static net.sourceforge.tess4j.TessAPI1.TessPageIteratorNext;
import static net.sourceforge.tess4j.TessAPI1.TessResultIteratorConfidence;
import static net.sourceforge.tess4j.TessAPI1.TessResultIteratorDelete;
import static net.sourceforge.tess4j.TessAPI1.TessResultIteratorGetPageIterator;
import static net.sourceforge.tess4j.TessAPI1.TessResultIteratorGetUTF8Text;
import static net.sourceforge.tess4j.TessAPI1.TessResultRendererBeginDocument;
import static net.sourceforge.tess4j.TessAPI1.TessResultRendererEndDocument;
import static net.sourceforge.tess4j.TessAPI1.TessResultRendererInsert;
import java.awt.Rectangle;
import java.nio.IntBuffer;
import java.util.ArrayList;
@ -9,20 +29,19 @@ import com.sun.jna.Pointer;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.tess4j.ITessAPI;
import net.sourceforge.tess4j.OCRResult;
import net.sourceforge.tess4j.TessAPI1;
import net.sourceforge.tess4j.Tesseract1;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.Word;
@Slf4j
/**
* Overriden version only so I can use Tesseract1 with Pixs instead of BufferedImages. All Functions are copied and then the BufferedImage -> Pix conversion deleted.
*/
public class Tesseract2 extends Tesseract1 {
*/ public class Tesseract2 extends Tesseract {
private int createDocuments(Pix pix, String filename, TessResultRenderer renderer) {
private int createDocuments(Pix pix, String filename, ITessAPI.TessResultRenderer renderer) {
String title = TessBaseAPIGetStringVariable(getHandle(), DOCUMENT_TITLE);
TessResultRendererBeginDocument(renderer, title);
@ -62,7 +81,7 @@ public class Tesseract2 extends Tesseract1 {
try {
for (int i = 0; i < pixs.length; i++) {
try {
TessResultRenderer renderer = createRenderers(outputbases[i], formats);
ITessAPI.TessResultRenderer renderer = createRenderers(outputbases[i], formats);
int meanTextConfidence = createDocuments(pixs[i], filenames[i], renderer);
TessDeleteResultRenderer(renderer);
List<Word> words = meanTextConfidence > 0 ? getRecognizedWords(pageIteratorLevel) : new ArrayList<Word>();
@ -85,8 +104,8 @@ public class Tesseract2 extends Tesseract1 {
List<Word> words = new ArrayList<>();
try {
TessResultIterator ri = TessBaseAPIGetIterator(getHandle());
TessPageIterator pi = TessResultIteratorGetPageIterator(ri);
ITessAPI.TessResultIterator ri = TessBaseAPIGetIterator(getHandle());
ITessAPI.TessPageIterator pi = TessResultIteratorGetPageIterator(ri);
TessPageIteratorBegin(pi);
do {
@ -119,9 +138,9 @@ public class Tesseract2 extends Tesseract1 {
}
private TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
private ITessAPI.TessResultRenderer createRenderers(String outputbase, List<RenderedFormat> formats) {
TessResultRenderer renderer = null;
ITessAPI.TessResultRenderer renderer = null;
for (RenderedFormat format : formats) {
switch (format) {
@ -138,6 +157,7 @@ public class Tesseract2 extends Tesseract1 {
return renderer;
}
@Override
protected void dispose() {