Merge branch 'clari-30' into 'master'
Clari-30 ocr service compatibility See merge request fforesight/ocr-service!44
This commit is contained in:
commit
f54f526f44
@ -13,5 +13,12 @@ public class DocumentRequest {
|
|||||||
|
|
||||||
protected String dossierId;
|
protected String dossierId;
|
||||||
protected String fileId;
|
protected String fileId;
|
||||||
|
protected boolean removeWatermark;
|
||||||
|
|
||||||
|
public DocumentRequest(String dossierId, String fileId) {
|
||||||
|
|
||||||
|
this.dossierId = dossierId;
|
||||||
|
this.fileId = fileId;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -67,9 +67,9 @@ public class OCRService {
|
|||||||
*/
|
*/
|
||||||
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
|
@Observed(name = "OCRService", contextualName = "run-ocr-on-document")
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void runOcrOnDocument(String dossierId, String fileId, Path tmpDir, File documentFile, File viewerDocumentFile) {
|
public void runOcrOnDocument(String dossierId, String fileId, boolean removeWatermark, Path tmpDir, File documentFile, File viewerDocumentFile) {
|
||||||
|
|
||||||
if (settings.isRemoveWatermark()) {
|
if (removeWatermark) {
|
||||||
removeWatermarkIfEnabled(documentFile);
|
removeWatermarkIfEnabled(documentFile);
|
||||||
}
|
}
|
||||||
removeInvisibleElements(documentFile);
|
removeInvisibleElements(documentFile);
|
||||||
|
|||||||
@ -21,7 +21,6 @@ public class OcrServiceSettings {
|
|||||||
int minImageWidth = 20; // Minimum width for images to be processed
|
int minImageWidth = 20; // Minimum width for images to be processed
|
||||||
float minRotationConfidence = 2; // Sets a lower bound for the confidence rating for rotated pages.
|
float minRotationConfidence = 2; // Sets a lower bound for the confidence rating for rotated pages.
|
||||||
boolean debug; // If true, overlays OCR images with a grid and draws word bounding boxes
|
boolean debug; // If true, overlays OCR images with a grid and draws word bounding boxes
|
||||||
boolean removeWatermark; // If true, watermarks will be removed
|
|
||||||
String languages = "deu+eng"; // Defines languages loaded into Tesseract as 3-char codes, additional languages must also be installed in the docker environment
|
String languages = "deu+eng"; // Defines languages loaded into Tesseract as 3-char codes, additional languages must also be installed in the docker environment
|
||||||
COSName ocrMarkedContentTag = COSName.getPDFName("KNECON_OCR");
|
COSName ocrMarkedContentTag = COSName.getPDFName("KNECON_OCR");
|
||||||
boolean boldDetection = true; // if true, bold detection will be attempted
|
boolean boldDetection = true; // if true, bold detection will be attempted
|
||||||
|
|||||||
@ -10,7 +10,6 @@ import org.springframework.amqp.AmqpRejectAndDontRequeueException;
|
|||||||
import org.springframework.amqp.core.Message;
|
import org.springframework.amqp.core.Message;
|
||||||
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
|
||||||
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
import org.springframework.amqp.rabbit.annotation.RabbitListener;
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.util.FileSystemUtils;
|
import org.springframework.util.FileSystemUtils;
|
||||||
|
|
||||||
@ -62,7 +61,7 @@ public class OcrMessageReceiver {
|
|||||||
|
|
||||||
fileStorageService.downloadFiles(dossierId, fileId, documentFile, viewerDocumentFile);
|
fileStorageService.downloadFiles(dossierId, fileId, documentFile, viewerDocumentFile);
|
||||||
|
|
||||||
ocrService.runOcrOnDocument(dossierId, fileId, tmpDir, documentFile, viewerDocumentFile);
|
ocrService.runOcrOnDocument(dossierId, fileId, ocrRequestMessage.isRemoveWatermark(), tmpDir, documentFile, viewerDocumentFile);
|
||||||
|
|
||||||
fileStorageService.storeFiles(dossierId, fileId, documentFile, viewerDocumentFile);
|
fileStorageService.storeFiles(dossierId, fileId, documentFile, viewerDocumentFile);
|
||||||
|
|
||||||
|
|||||||
@ -23,16 +23,14 @@ public class OcrMessageSender implements IOcrMessageSender {
|
|||||||
|
|
||||||
RabbitTemplate rabbitTemplate;
|
RabbitTemplate rabbitTemplate;
|
||||||
|
|
||||||
@PostConstruct
|
|
||||||
protected void pc(){
|
|
||||||
log.error("PC");
|
|
||||||
}
|
|
||||||
public void sendOcrFinished(String fileId, int totalImages) {
|
public void sendOcrFinished(String fileId, int totalImages) {
|
||||||
|
|
||||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||||
OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(totalImages).ocrFinished(true).build());
|
OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(totalImages).numberOfOCRedPages(totalImages).ocrFinished(true).build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void sendOCRStarted(String fileId) {
|
public void sendOCRStarted(String fileId) {
|
||||||
|
|
||||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||||
|
|||||||
@ -125,7 +125,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
|||||||
Files.copy(pdfFileResource.getFile().toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
Files.copy(pdfFileResource.getFile().toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
Files.copy(pdfFileResource.getFile().toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
|
Files.copy(pdfFileResource.getFile().toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
|
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
|
||||||
System.out.println("File:" + documentFile);
|
System.out.println("File:" + documentFile);
|
||||||
|
|
||||||
try (var fileStream = new FileInputStream(documentFile.toFile())) {
|
try (var fileStream = new FileInputStream(documentFile.toFile())) {
|
||||||
@ -177,7 +177,7 @@ public class OcrServiceIntegrationTest extends AbstractTest {
|
|||||||
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
Files.copy(file.toPath(), documentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
|
Files.copy(file.toPath(), viewerDocumentFile, StandardCopyOption.REPLACE_EXISTING);
|
||||||
|
|
||||||
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
|
ocrService.runOcrOnDocument(TEST_DOSSIER_ID, "file", false, tmpDir, documentFile.toFile(), viewerDocumentFile.toFile());
|
||||||
System.out.println("File:" + documentFile);
|
System.out.println("File:" + documentFile);
|
||||||
System.out.println("\n\n");
|
System.out.println("\n\n");
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user