diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java index d3b9324..b370016 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java @@ -69,10 +69,11 @@ public class OCRService { long start = System.currentTimeMillis(); try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) { + log.info("Start invisible element removal for file with dossierId {} and fileId {}", dossierId, fileId); invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false); } long end = System.currentTimeMillis(); - log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, format("%.1f", (float) ((end - start) / 1000))); + log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, format("%.1f", (end - start) / 1000.0)); try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) { runOcr(transferInputStream, out, fileId); } @@ -93,12 +94,12 @@ public class OCRService { Map pageIdToRectCollection = imagePositionRetrievalService.getImagePositionPerPage(pdfDoc, true); int numProcessedPages = 0; + // optimization: only scanning pages that contain images for (Integer pageId : pageIdToRectCollection.keySet()) { try { // optimization: creating a new document is faster than reusing the same and adding/removing pages one by one OCROptions options = new OCROptions(); PDFDoc ocrPageDoc = new PDFDoc(); - // optimization: only scanning pages that contain images Page pdfPage = pdfDoc.getPage(pageId); // optimization: this line ensures the ocr text is placed correctly by PDFTron pdfPage.setMediaBox(pdfPage.getCropBox());