RED-6126: performance-test
*improved error logging
This commit is contained in:
parent
6ccf3f80fc
commit
8db0b712f7
@ -1,7 +1,10 @@
|
||||
package com.iqser.red.service.ocr.v1.server.service;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Map;
|
||||
@ -9,6 +12,7 @@ import java.util.Map;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
|
||||
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
|
||||
@ -24,7 +28,6 @@ import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@ -59,14 +62,17 @@ public class OCRService {
|
||||
* @param fileId The file id
|
||||
* @param out OutputStream to write the file to
|
||||
*/
|
||||
@SneakyThrows
|
||||
@Timed("redactmanager_runOcrOnDocument")
|
||||
public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) {
|
||||
public void runOcrOnDocument(String dossierId, String fileId, OutputStream out) throws IOException {
|
||||
|
||||
try (ByteArrayOutputStream transferOutputStream = new ByteArrayOutputStream()) {
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
try (InputStream fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId)) {
|
||||
invisibleElementRemovalService.removeInvisibleElements(fileStream, transferOutputStream, false);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
log.info("Invisible element removal successful for file with dossierId {} and fileId {}, took {}s", dossierId, fileId, format("%.1f", (float) ((end - start) / 1000)));
|
||||
try (InputStream transferInputStream = new ByteArrayInputStream(transferOutputStream.toByteArray())) {
|
||||
runOcr(transferInputStream, out, fileId);
|
||||
}
|
||||
@ -74,10 +80,15 @@ public class OCRService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void runOcr(InputStream fileStream, OutputStream out, String fileId) {
|
||||
|
||||
PDFDoc pdfDoc = new PDFDoc(fileStream);
|
||||
PDFDoc pdfDoc;
|
||||
try {
|
||||
pdfDoc = new PDFDoc(fileStream);
|
||||
} catch (Exception e) {
|
||||
log.error("Couldn't parse file with fileId {} from InputStream ", fileId);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
Map<Integer, RectCollection> pageIdToRectCollection = imagePositionRetrievalService.getImagePositionPerPage(pdfDoc, true);
|
||||
|
||||
@ -102,7 +113,7 @@ public class OCRService {
|
||||
StringBuilder zonesString = new StringBuilder();
|
||||
for (int j = 0; j < pageIdToRectCollection.get(pageId).getNumRects(); ++j) {
|
||||
var r = pageIdToRectCollection.get(pageId).getRectAt(j);
|
||||
zonesString.append(String.format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2()));
|
||||
zonesString.append(format("[lower left (%.1f|%.1f) upper right (%.1f|%.1f)]", r.getX1(), r.getY1(), r.getX2(), r.getY2()));
|
||||
}
|
||||
log.info("{}/{} Page {} done, OCR regions {}", numProcessedPages, pageIdToRectCollection.size(), pageId, zonesString);
|
||||
|
||||
@ -120,21 +131,34 @@ public class OCRService {
|
||||
.build()));
|
||||
|
||||
} catch (PDFNetException e) {
|
||||
log.error("failed to process page {}", pageId);
|
||||
log.error("Failed to process page {}", pageId);
|
||||
throw new RuntimeException(e);
|
||||
} catch (JsonProcessingException e) {
|
||||
log.error("Failed to send \"processed\" message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
|
||||
.fileId(fileId)
|
||||
.numberOfPagesToOCR(pageIdToRectCollection.size())
|
||||
.numberOfOCRedPages(numProcessedPages)
|
||||
.ocrFinished(true)
|
||||
.build()));
|
||||
try {
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
|
||||
.fileId(fileId)
|
||||
.numberOfPagesToOCR(pageIdToRectCollection.size())
|
||||
.numberOfOCRedPages(numProcessedPages)
|
||||
.ocrFinished(true)
|
||||
.build()));
|
||||
} catch (JsonProcessingException e) {
|
||||
log.error("Failed to send message to rabbitMQ for file with fileID {} on OCR page {}/{}", fileId, numProcessedPages, pageIdToRectCollection.size());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
Optimizer.optimize(pdfDoc);
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
try {
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
} catch (Exception e) {
|
||||
log.error("Processed File with fileId {} could not be saved", fileId);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -53,6 +53,7 @@ public class OcrMessageReceiver {
|
||||
|
||||
fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), new ByteArrayInputStream(out.toByteArray()));
|
||||
} catch (IOException e) {
|
||||
log.error("Failed to store file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user