Clari-30 ocr service compatibility

This commit is contained in:
Timo Bejan 2024-03-08 14:44:48 +02:00
parent 6d69b783f1
commit d8011bdba5
6 changed files with 16 additions and 13 deletions

View File

@ -13,5 +13,12 @@ public class DocumentRequest {
protected String dossierId;
protected String fileId;
protected boolean removeWatermark;
public DocumentRequest(String dossierId, String fileId) {
this.dossierId = dossierId;
this.fileId = fileId;
}
}

View File

@ -67,9 +67,9 @@ public class OCRService {
*/
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
@SneakyThrows
public void runOcrOnDocument(String dossierId, String fileId, Path tmpDir, File documentFile, File viewerDocumentFile) {
public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile) {
if (settings.isRemoveWatermark()) {
if (removeWatermark) {
removeWatermarkIfEnabled(documentFile);
}
removeInvisibleElements(documentFile);

View File

@ -21,7 +21,6 @@ public class OcrServiceSettings {
int minImageWidth = 20; // Minimum width for images to be processed
float minRotationConfidence = 2; // Sets a lower bound for the confidence rating for rotated pages.
boolean debug; // If true, overlays OCR images with a grid and draws word bounding boxes
boolean removeWatermark; // If true, watermarks will be removed
String languages = "deu+eng"; // Defines languages loaded into Tesseract as 3-char codes, additional languages must also be installed in the docker environment
COSName ocrMarkedContentTag = COSName.getPDFName("KNECON_OCR");
boolean boldDetection = true; // if true, bold detection will be attempted

View File

@ -10,7 +10,6 @@ import org.springframework.amqp.AmqpRejectAndDontRequeueException;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Service;
import org.springframework.util.FileSystemUtils;
@ -62,7 +61,7 @@ public class OcrMessageReceiver {
fileStorageService.downloadFiles(dossierId, fileId, documentFile, viewerDocumentFile);
ocrService.runOcrOnDocument(dossierId, fileId, tmpDir, documentFile, viewerDocumentFile);
ocrService.runOcrOnDocument(dossierId, fileId, ocrRequestMessage.isRemoveWatermark(), tmpDir, documentFile, viewerDocumentFile);
fileStorageService.storeFiles(dossierId, fileId, documentFile, viewerDocumentFile);

View File

@ -23,16 +23,14 @@ public class OcrMessageSender implements IOcrMessageSender {
RabbitTemplate rabbitTemplate;
@PostConstruct
protected void pc(){
log.error("PC");
}
public void sendOcrFinished(String fileId, int totalImages) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(totalImages).ocrFinished(true).build());
}
public void sendOCRStarted(String fileId) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
@ -49,9 +47,9 @@ public class OcrMessageSender implements IOcrMessageSender {
}
public void sendOcrResponse(String dossierId, String fileId){
public void sendOcrResponse(String dossierId, String fileId) {
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_QUEUE, new DocumentRequest(dossierId,fileId));
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_RESPONSE_QUEUE, new DocumentRequest(dossierId, fileId));
}
}

View File

@ -125,7 +125,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
Files.copy(pdfFileResource.getFile().toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(pdfFileResource.getFile().toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
System.out.println("File:" + documentFile);
try (var fileStream = new FileInputStream(documentFile.toFile())) {
@ -177,7 +177,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
System.out.println("File:" + documentFile);
System.out.println("\n\n");
}