From caff5580dda644451433921a6a66dafe1cfa5dca Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Wed, 22 Feb 2023 11:57:41 +0100 Subject: [PATCH] RED-6126: In the OCRService, OCR Text is not applied to Document *refactored Tests with inheritance *called PDFNet init/terminate in tests *don't call init on startup --- .../server/initializer/PDFNetInitializer.java | 4 +- .../service/ocr/v1/server/AbstractTest.java | 77 +++++++++++++ .../v1/server/OcrServiceIntegrationTest.java | 109 ++++-------------- .../ImagePositionRetrievalServiceTest.java | 43 +++---- .../InvisibleElementRemovalServiceTest.java | 36 +++--- 5 files changed, 129 insertions(+), 140 deletions(-) create mode 100644 ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/AbstractTest.java diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java index 2ad2450..a432800 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java @@ -6,8 +6,6 @@ import lombok.SneakyThrows; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import javax.annotation.PostConstruct; - @Component @RequiredArgsConstructor public class PDFNetInitializer { @@ -23,9 +21,9 @@ public class PDFNetInitializer { // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError. public void init() { + PDFNet.initialize(pdftronLicense); PDFNet.setTempPath("/tmp/pdftron"); PDFNet.addResourceSearchPath(ocrModulePath); - PDFNet.initialize(pdftronLicense); } diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/AbstractTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/AbstractTest.java new file mode 100644 index 0000000..46e684c --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/AbstractTest.java @@ -0,0 +1,77 @@ +package com.iqser.red.service.ocr.v1.server; + +import com.iqser.red.service.ocr.v1.server.initializer.PDFNetInitializer; +import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; +import com.pdftron.pdf.PDFNet; +import lombok.SneakyThrows; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import static org.assertj.core.api.Assertions.assertThat; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) +@Import(AbstractTest.TestConfiguration.class) +public class AbstractTest { + + @Autowired + protected StorageService storageService; + + @Autowired + private PDFNetInitializer pdfNetInitializer; + + @BeforeEach + @SneakyThrows + @Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top. + public void initPDFNet() { + pdfNetInitializer.init(); + } + + @AfterAll + public static void terminatePDFNet() { + PDFNet.terminate(); + } + + + @SneakyThrows + public void dummyTest() { + + // Build needs one test to not fail. + assertThat(1).isEqualTo(1); + } + + + @AfterEach + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } + + + + @Configuration + @EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class}) + public static class TestConfiguration { + + @Bean + @Primary + public StorageService inMemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java index cbd57ff..afef495 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java @@ -1,7 +1,22 @@ package com.iqser.red.service.ocr.v1.server; -import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; -import static org.assertj.core.api.Assertions.assertThat; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.ocr.v1.server.service.FileStorageService; +import com.iqser.red.service.ocr.v1.server.service.OCRService; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.*; +import io.micrometer.prometheus.PrometheusMeterRegistry; +import io.micrometer.prometheus.PrometheusTimer; +import lombok.SneakyThrows; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -11,53 +26,11 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.autoconfigure.EnableAutoConfiguration; -import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; -import org.springframework.context.annotation.Primary; -import org.springframework.core.io.ClassPathResource; -import org.springframework.test.context.junit.jupiter.SpringExtension; +import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; +import static org.assertj.core.api.Assertions.assertThat; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.iqser.red.service.ocr.v1.server.service.FileStorageService; -import com.iqser.red.service.ocr.v1.server.service.OCRService; -import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService; -import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; -import com.iqser.red.storage.commons.StorageAutoConfiguration; -import com.iqser.red.storage.commons.service.StorageService; -import com.pdftron.common.PDFNetException; -import com.pdftron.pdf.OCRModule; -import com.pdftron.pdf.PDFDoc; -import com.pdftron.pdf.Page; -import com.pdftron.pdf.PageIterator; -import com.pdftron.pdf.TextExtractor; - -import io.micrometer.prometheus.PrometheusMeterRegistry; -import io.micrometer.prometheus.PrometheusTimer; -import lombok.SneakyThrows; - -@ExtendWith(SpringExtension.class) -@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT // - , properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"}) -@Import(OcrServiceIntegrationTest.TestConfiguration.class) -public class OcrServiceIntegrationTest { - - @Autowired - protected StorageService storageService; - - @Autowired - protected FileStorageService fileStorageService; +@SpringBootTest(properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"}) +public class OcrServiceIntegrationTest extends AbstractTest { @Autowired protected ObjectMapper objectMapper; @@ -71,16 +44,13 @@ public class OcrServiceIntegrationTest { @Autowired private PrometheusMeterRegistry registry; - @BeforeEach @SneakyThrows - @Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top. + @Disabled public void assertOCRModuleIsLoaded() { - assert OCRModule.isModuleAvailable(); } - @Test @Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top. public void testOCRMetrics() { @@ -163,15 +133,16 @@ public class OcrServiceIntegrationTest { } try (var out = new FileOutputStream(getTemporaryDirectory() + "/" + fileName + ".pdf")) { ocrService.runOcrOnDocument("dossier", "file", out); + System.out.println("File:" + getTemporaryDirectory() + "/" + fileName + ".pdf"); } - System.out.println("File:" + getTemporaryDirectory() + "/" + fileName + ".pdf"); try (var fileStream = new FileInputStream(getTemporaryDirectory() + "/" + fileName + ".pdf")) { return extractAllTextFromDocument(fileStream); } - } + } + private static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException { PDFDoc pdfDoc = new PDFDoc(fileStream); @@ -189,34 +160,4 @@ public class OcrServiceIntegrationTest { } - @SneakyThrows - public void dummyTest() { - - // Build needs one test to not fail. - assertThat(1).isEqualTo(1); - } - - - @AfterEach - public void cleanupStorage() { - - if (this.storageService instanceof FileSystemBackedStorageService) { - ((FileSystemBackedStorageService) this.storageService).clearStorage(); - } - } - - - @Configuration - @EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class}) - public static class TestConfiguration { - - @Bean - @Primary - public StorageService inMemoryStorage() { - - return new FileSystemBackedStorageService(); - } - - } - } diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalServiceTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalServiceTest.java index d4eb08e..698e5de 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalServiceTest.java +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalServiceTest.java @@ -1,7 +1,15 @@ package com.iqser.red.service.ocr.v1.server.service; -import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; -import static org.assertj.core.api.Assertions.assertThat; +import com.iqser.red.service.ocr.v1.server.AbstractTest; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.*; +import com.pdftron.sdf.SDFDoc; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Test; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -12,36 +20,11 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; -import org.springframework.context.annotation.Import; -import org.springframework.core.io.ClassPathResource; -import org.springframework.test.context.junit.jupiter.SpringExtension; +import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; +import static org.assertj.core.api.Assertions.assertThat; -import com.iqser.red.service.ocr.v1.server.Application; -import com.iqser.red.service.ocr.v1.server.OcrServiceIntegrationTest; -import com.pdftron.common.PDFNetException; -import com.pdftron.pdf.ColorPt; -import com.pdftron.pdf.ColorSpace; -import com.pdftron.pdf.Element; -import com.pdftron.pdf.ElementBuilder; -import com.pdftron.pdf.ElementWriter; -import com.pdftron.pdf.PDFDoc; -import com.pdftron.pdf.Page; -import com.pdftron.pdf.Rect; -import com.pdftron.pdf.RectCollection; -import com.pdftron.sdf.SDFDoc; -import lombok.SneakyThrows; - -@ExtendWith(SpringExtension.class) -@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) -@Import(OcrServiceIntegrationTest.TestConfiguration.class) -class ImagePositionRetrievalServiceTest { +class ImagePositionRetrievalServiceTest extends AbstractTest { @Autowired private ImagePositionRetrievalService imagePositionRetrievalService; diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/InvisibleElementRemovalServiceTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/InvisibleElementRemovalServiceTest.java index 9aa1f26..af2ecb2 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/InvisibleElementRemovalServiceTest.java +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/service/InvisibleElementRemovalServiceTest.java @@ -1,34 +1,24 @@ package com.iqser.red.service.ocr.v1.server.service; -import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; -import static org.assertj.core.api.AssertionsForClassTypes.assertThat; - -import java.io.FileInputStream; -import java.io.FileOutputStream; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.springframework.amqp.rabbit.core.RabbitTemplate; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.boot.test.mock.mockito.MockBean; -import org.springframework.context.annotation.Import; -import org.springframework.core.io.ClassPathResource; -import org.springframework.test.context.junit.jupiter.SpringExtension; - -import com.iqser.red.service.ocr.v1.server.Application; -import com.iqser.red.service.ocr.v1.server.OcrServiceIntegrationTest; +import com.iqser.red.service.ocr.v1.server.AbstractTest; import com.pdftron.pdf.PDFDoc; import com.pdftron.pdf.Page; import com.pdftron.pdf.PageIterator; import com.pdftron.pdf.TextExtractor; - import lombok.SneakyThrows; +import org.junit.jupiter.api.Test; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.core.io.ClassPathResource; -@ExtendWith(SpringExtension.class) -@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT) -@Import(OcrServiceIntegrationTest.TestConfiguration.class) -public class InvisibleElementRemovalServiceTest { +import java.io.FileInputStream; +import java.io.FileOutputStream; + +import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +public class InvisibleElementRemovalServiceTest extends AbstractTest { @Autowired private InvisibleElementRemovalService invisibleElementRemovalService;