diff --git a/ocr-service-v1/ocr-service-server-v1/pom.xml b/ocr-service-v1/ocr-service-server-v1/pom.xml index c3e739f..9b0247e 100644 --- a/ocr-service-v1/ocr-service-server-v1/pom.xml +++ b/ocr-service-v1/ocr-service-server-v1/pom.xml @@ -15,7 +15,7 @@ 0.10.0 2.118.0 - 2.10.0 + 2.14.0 diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java index 89159d5..5408eb9 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java @@ -12,6 +12,7 @@ import org.springframework.context.annotation.Import; import org.springframework.scheduling.annotation.EnableAsync; import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService; +import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService; import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient; import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings; @@ -25,7 +26,7 @@ import io.micrometer.core.instrument.MeterRegistry; @ImportAutoConfiguration({MultiTenancyAutoConfiguration.class}) @EnableConfigurationProperties(OcrServiceSettings.class) @SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class}) -@Import({ MessagingConfiguration.class, StorageAutoConfiguration.class}) +@Import({MessagingConfiguration.class, StorageAutoConfiguration.class}) @EnableFeignClients(basePackageClasses = FileStatusProcessingUpdateClient.class) public class Application { @@ -53,4 +54,11 @@ public class Application { return new InvisibleElementRemovalService(); } + + @Bean + public WatermarkRemovalService watermarkRemovalService() { + + return new WatermarkRemovalService(); + } + } diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java index 2c224c7..cf928e9 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java @@ -14,9 +14,8 @@ import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.stereotype.Service; import com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService; +import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService; import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse; -import com.iqser.red.service.ocr.v1.server.client.DossierClient; -import com.iqser.red.service.ocr.v1.server.client.DossierTemplateClient; import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings; import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.DossierTemplate; diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalService.java deleted file mode 100644 index 2558b8b..0000000 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalService.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.iqser.red.service.ocr.v1.server.service; - -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Set; -import java.util.TreeSet; - -import org.springframework.stereotype.Service; - -import com.pdftron.pdf.Element; -import com.pdftron.pdf.ElementReader; -import com.pdftron.pdf.ElementWriter; -import com.pdftron.pdf.PDFDoc; -import com.pdftron.pdf.Page; -import com.pdftron.pdf.PageIterator; -import com.pdftron.sdf.Obj; -import com.pdftron.sdf.SDFDoc; - -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -@Service -public class WatermarkRemovalService { - - /** - * !!!Warning!! This logic is definitive wrong and should NEVER run in production, - * however it was used in second DocuMine (SCM) prototype and we currently need it to compare the results. - * - * @param pdfFile the file as Inputstream. - * @param transferOutputStream the resulting file as Outputstream. - */ - @SneakyThrows - public void removeWatermarks(InputStream pdfFile, OutputStream transferOutputStream) { - - PDFDoc pdfDoc = new PDFDoc(pdfFile); - this.execute(pdfDoc); - - try { - pdfDoc.save(transferOutputStream, SDFDoc.SaveMode.LINEARIZED, null); - } catch (Exception var10) { - log.error("File could not be saved after watermark removal"); - throw new RuntimeException(var10); - } finally { - pdfDoc.close(); - } - } - - - @SneakyThrows - private void execute(PDFDoc pdfDoc) { - - ElementWriter writer = new ElementWriter(); - ElementReader reader = new ElementReader(); - Set visited = new TreeSet<>(); - - for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) { - Page page = iterator.next(); - removeOverlapText(page, reader, writer, visited); - } - - reader.destroy(); - writer.destroy(); - } - - - @SneakyThrows - private void removeOverlapText(Page page, ElementReader reader, ElementWriter writer, Set visited) { - - visited.add((int) page.getSDFObj().getObjNum()); - reader.begin(page); - writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); - processElements(reader, writer, visited, false); - writer.end(); - reader.end(); - } - - - @SneakyThrows - private void processElements(ElementReader reader, ElementWriter writer, Set visited, boolean isInForm) { - - for (Element element = reader.next(); element != null; element = reader.next()) - switch (element.getType()) { - case Element.e_image, Element.e_inline_image -> processImage(element, writer, isInForm); - case Element.e_form -> processForm(reader, writer, element, visited); - default -> writer.writeElement(element); - } - } - - - @SneakyThrows - private void processForm(ElementReader reader, ElementWriter writer, Element element, Set visited) { - - writer.writeElement(element); - Obj formObj = element.getXObject(); - - if (!visited.contains((int) formObj.getObjNum())) { - visited.add((int) formObj.getObjNum()); - ElementWriter formWriter = new ElementWriter(); - reader.formBegin(); - formWriter.begin(formObj); - - reader.clearChangeList(); - formWriter.setDefaultGState(reader); - - processElements(reader, formWriter, visited, true); - formWriter.end(); - formWriter.destroy(); - reader.end(); - } - } - - - @SneakyThrows - private void processImage(Element element, ElementWriter writer, boolean isInForm) { - - // !!! Warning, this will also remove none watermark images form files. - // Idea: Remove watermarks by comparing (hash values) images. Watermarks to remove should be uploaded in dossier/dossierTemplate. - // Removing watermarks should be done in preprocessing, not at ocr. - if (!isInForm) { - writer.writeElement(element); - } - } - -} diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalServiceTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalServiceTest.java index ba4ff25..b985fe3 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalServiceTest.java +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/WatermarkRemovalServiceTest.java @@ -1,13 +1,12 @@ package com.iqser.red.service.ocr.v1.server.service; -import static org.junit.jupiter.api.Assertions.*; - import java.io.FileOutputStream; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.core.io.ClassPathResource; +import com.iqser.red.pdftronlogic.commons.WatermarkRemovalService; import com.iqser.red.service.ocr.v1.server.AbstractTest; import com.iqser.red.service.ocr.v1.server.utils.OsUtils; @@ -29,5 +28,4 @@ class WatermarkRemovalServiceTest extends AbstractTest { } } - } \ No newline at end of file