diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java index 8158ebc..387a7a1 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/initializer/NativeLibrariesInitializer.java @@ -34,12 +34,16 @@ public class NativeLibrariesInitializer { System.setProperty("jna.library.path", System.getenv("VCPKG_DYNAMIC_LIB")); log.info("Asserting Native Libraries loaded"); - NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica"); - assert leptonicaLib != null; - log.info("Leptonica library loaded from {}", leptonicaLib.getFile().getAbsolutePath()); - NativeLibrary tesseractLib = NativeLibrary.getInstance("tesseract"); - assert tesseractLib != null; - log.info("Tesseract library loaded from {}", tesseractLib.getFile().getAbsolutePath()); + + try (NativeLibrary leptonicaLib = NativeLibrary.getInstance("leptonica")) { + assert leptonicaLib != null; + log.info("Leptonica library loaded from {}", leptonicaLib.getFile().getAbsolutePath()); + } + + try (NativeLibrary tesseractLib = NativeLibrary.getInstance("tesseract")) { + assert tesseractLib != null; + log.info("Tesseract library loaded from {}", tesseractLib.getFile().getAbsolutePath()); + } } } diff --git a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java index e730ede..1895775 100644 --- a/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java +++ b/ocr-service-v1/ocr-service-processor/src/main/java/com/knecon/fforesight/service/ocr/processor/service/OcrResultWriter.java @@ -85,15 +85,16 @@ public class OcrResultWriter { } + @SuppressWarnings("PMD") private List getTextBBoxes(Page page) { List textBBoxes = new ArrayList<>(); try (var textExtractor = new TextExtractor()) { textExtractor.begin(page); - try { - for (TextExtractor.Line line = textExtractor.getFirstLine(); line.isValid(); line = line.getNextLine()) { - for (var word = line.getFirstWord(); word.isValid(); word = word.getNextWord()) { + + for (TextExtractor.Line line = textExtractor.getFirstLine(); line.isValid(); line = getNextLine(line)) { + for (TextExtractor.Word word = line.getFirstWord(); word.isValid(); word = getNextWord(word)) { textBBoxes.add(Converter.toRectangle2D(word.getBBox())); } } @@ -105,9 +106,19 @@ public class OcrResultWriter { } - private static Function pageNumber1IdxTo0IdxMapper() { - // PDFBox uses a 0-based index for page numbers internally, while we use a 1-based index - return p -> p - 1; + private static TextExtractor.Word getNextWord(TextExtractor.Word word) { + + TextExtractor.Word nextWord = word.getNextWord(); + word.close(); + return nextWord; + } + + + private static TextExtractor.Line getNextLine(TextExtractor.Line line) { + + TextExtractor.Line newLine = line.getNextLine(); + line.close(); + return newLine; } @@ -133,7 +144,6 @@ public class OcrResultWriter { } - private VisualizationsOnPage createDebugTextVisualizations(List ocrResultsToWrite, List textBBoxes) { List wordsToDraw = new ArrayList<>(); @@ -175,7 +185,6 @@ public class OcrResultWriter { } - private VisualizationsOnPage createDebugBBoxVisualizations(List ocrResultsToWrite) { List words = ocrResultsToWrite.stream() @@ -199,9 +208,9 @@ public class OcrResultWriter { private List quadPointAsLines(QuadPoint rect) { return List.of(new ColoredLine(new Line2D.Double(rect.a(), rect.b()), Color.ORANGE, 1), - new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1), - new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1), - new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1)); + new ColoredLine(new Line2D.Double(rect.b(), rect.c()), Color.BLUE, 1), + new ColoredLine(new Line2D.Double(rect.c(), rect.d()), Color.GREEN, 1), + new ColoredLine(new Line2D.Double(rect.d(), rect.a()), Color.MAGENTA, 1)); }