From 8db0b712f7baeae23e9abf8ef798c78d157b703f Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 9 Feb 2023 13:57:21 +0100 Subject: [PATCH] RED-6126: performance-test *improved error logging --- .../ocr/v1/server/service/OCRService.java | 54 +++++++++++++------ .../v1/server/service/OcrMessageReceiver.java | 1 + 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java index afa3b93..d3b9324 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java @@ -1,7 +1,10 @@ package com.iqser.red.service.ocr.v1.server.service; +import static java.lang.String.format; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Map; @@ -9,6 +12,7 @@ import java.util.Map; import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.stereotype.Service; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse; import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; @@ -24,7 +28,6 @@ import com.pdftron.sdf.SDFDoc; import io.micrometer.core.annotation.Timed; import lombok.RequiredArgsConstructor; -import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -59,14 +62,17 @@ public class OCRService { * @param fileId The file id * @param out OutputStream to write the file to */ - @SneakyThrows @Timed("redactmanager_runOcrOnDocument") - public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) { + public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) throws IOException { try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) { + long start = System.currentTimeMillis(); + try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) { invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false); } + long end = System.currentTimeMillis(); + log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, format("%.1f", (float) ((end - start) / 1000))); try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) { runOcr(transferInputStream, out, fileId); } @@ -74,10 +80,15 @@ public class OCRService { } - @SneakyThrows private void runOcr(InputStream fileStream, OutputStream out, String fileId) { - PDFDoc pdfDoc = new PDFDoc(fileStream); + PDFDoc pdfDoc; + try { + pdfDoc = new PDFDoc(fileStream); + } catch (Exception e) { + log.error("Couldn't parse file with fileId {} from InputStream ", fileId); + throw new RuntimeException(e); + } Map pageIdToRectCollection = imagePositionRetrievalService.getImagePositionPerPage(pdfDoc, true); @@ -102,7 +113,7 @@ public class OCRService { StringBuilder zonesString = new StringBuilder(); for (int j = 0; j < pageIdToRectCollection.get(pageId).getNumRects(); ++j) { var r = pageIdToRectCollection.get(pageId).getRectAt(j); - zonesString.append(String.format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2())); + zonesString.append(format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2())); } log.info("{}/{} Page {} done, OCR regions {}", numProcessedPages, pageIdToRectCollection.size(), pageId, zonesString); @@ -120,21 +131,34 @@ public class OCRService { .build())); } catch (PDFNetException e) { - log.error("failed to process page {}", pageId); + log.error("Failed to process page {}", pageId); + throw new RuntimeException(e); + } catch (JsonProcessingException e) { + log.error("Failed to send \"processed\" message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size()); throw new RuntimeException(e); } } - rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, - objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder() - .fileId(fileId) - .numberOfPagesToOCR(pageIdToRectCollection.size()) - .numberOfOCRedPages(numProcessedPages) - .ocrFinished(true) - .build())); + try { + rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, + objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder() + .fileId(fileId) + .numberOfPagesToOCR(pageIdToRectCollection.size()) + .numberOfOCRedPages(numProcessedPages) + .ocrFinished(true) + .build())); + } catch (JsonProcessingException e) { + log.error("Failed to send message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size()); + throw new RuntimeException(e); + } Optimizer.optimize(pdfDoc); - pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); + try { + pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); + } catch (Exception e) { + log.error("Processed File with fileId {} could not be saved", fileId); + throw new RuntimeException(e); + } } } diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java index 72af005..7ae1443 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java @@ -53,6 +53,7 @@ public class OcrMessageReceiver { fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), new ByteArrayInputStream(out.toByteArray())); } catch (IOException e) { + log.error("Failed to store file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); throw new RuntimeException(e); }