RED-6126: performance-test

*improved error logging
This commit is contained in:
Kilian Schuettler 2023-02-09 13:57:21 +01:00
parent 6ccf3f80fc
commit 8db0b712f7
2 changed files with 40 additions and 15 deletions

View File

@ -1,7 +1,10 @@
package com.iqser.red.service.ocr.v1.server.service;
import static java.lang.String.format;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Map;
@ -9,6 +12,7 @@ import java.util.Map;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
@ -24,7 +28,6 @@ import com.pdftron.sdf.SDFDoc;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@ -59,14 +62,17 @@ public class OCRService {
* @param fileId The file id
* @param out OutputStream to write the file to
*/
@SneakyThrows
@Timed("redactmanager_runOcrOnDocument")
public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) {
public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) throws IOException {
try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
long start = System.currentTimeMillis();
try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) {
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false);
}
long end = System.currentTimeMillis();
log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, format("%.1f", (float) ((end - start) / 1000)));
try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) {
runOcr(transferInputStream, out, fileId);
}
@ -74,10 +80,15 @@ public class OCRService {
}
@SneakyThrows
private void runOcr(InputStream fileStream, OutputStream out, String fileId) {
PDFDoc pdfDoc = new PDFDoc(fileStream);
PDFDoc pdfDoc;
try {
pdfDoc = new PDFDoc(fileStream);
} catch (Exception e) {
log.error("Couldn't parse file with fileId {} from InputStream ", fileId);
throw new RuntimeException(e);
}
Map<Integer, RectCollection> pageIdToRectCollection = imagePositionRetrievalService.getImagePositionPerPage(pdfDoc, true);
@ -102,7 +113,7 @@ public class OCRService {
StringBuilder zonesString = new StringBuilder();
for (int j = 0; j < pageIdToRectCollection.get(pageId).getNumRects(); ++j) {
var r = pageIdToRectCollection.get(pageId).getRectAt(j);
zonesString.append(String.format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2()));
zonesString.append(format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2()));
}
log.info("{}/{} Page {} done, OCR regions {}", numProcessedPages, pageIdToRectCollection.size(), pageId, zonesString);
@ -120,21 +131,34 @@ public class OCRService {
.build()));
} catch (PDFNetException e) {
log.error("failed to process page {}", pageId);
log.error("Failed to process page {}", pageId);
throw new RuntimeException(e);
} catch (JsonProcessingException e) {
log.error("Failed to send \"processed\" message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size());
throw new RuntimeException(e);
}
}
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
.fileId(fileId)
.numberOfPagesToOCR(pageIdToRectCollection.size())
.numberOfOCRedPages(numProcessedPages)
.ocrFinished(true)
.build()));
try {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
.fileId(fileId)
.numberOfPagesToOCR(pageIdToRectCollection.size())
.numberOfOCRedPages(numProcessedPages)
.ocrFinished(true)
.build()));
} catch (JsonProcessingException e) {
log.error("Failed to send message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size());
throw new RuntimeException(e);
}
Optimizer.optimize(pdfDoc);
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
try {
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
} catch (Exception e) {
log.error("Processed File with fileId {} could not be saved", fileId);
throw new RuntimeException(e);
}
}
}

View File

@ -53,6 +53,7 @@ public class OcrMessageReceiver {
fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), new ByteArrayInputStream(out.toByteArray()));
} catch (IOException e) {
log.error("Failed to store file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
throw new RuntimeException(e);
}