RED-6126: In the OCRService, OCR Text is not applied to Document
*refactored Tests with inheritance *called PDFNet init/terminate in tests *don't call init on startup
This commit is contained in:
parent
430ad45a67
commit
caff5580dd
@ -6,8 +6,6 @@ import lombok.SneakyThrows;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class PDFNetInitializer {
|
||||
@ -23,9 +21,9 @@ public class PDFNetInitializer {
|
||||
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
|
||||
public void init() {
|
||||
|
||||
PDFNet.initialize(pdftronLicense);
|
||||
PDFNet.setTempPath("/tmp/pdftron");
|
||||
PDFNet.addResourceSearchPath(ocrModulePath);
|
||||
PDFNet.initialize(pdftronLicense);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
package com.iqser.red.service.ocr.v1.server;
|
||||
|
||||
import com.iqser.red.service.ocr.v1.server.initializer.PDFNetInitializer;
|
||||
import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(AbstractTest.TestConfiguration.class)
|
||||
public class AbstractTest {
|
||||
|
||||
@Autowired
|
||||
protected StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
private PDFNetInitializer pdfNetInitializer;
|
||||
|
||||
@BeforeEach
|
||||
@SneakyThrows
|
||||
@Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top.
|
||||
public void initPDFNet() {
|
||||
pdfNetInitializer.init();
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void terminatePDFNet() {
|
||||
PDFNet.terminate();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void dummyTest() {
|
||||
|
||||
// Build needs one test to not fail.
|
||||
assertThat(1).isEqualTo(1);
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanupStorage() {
|
||||
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inMemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,22 @@
|
||||
package com.iqser.red.service.ocr.v1.server;
|
||||
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.ocr.v1.server.service.FileStorageService;
|
||||
import com.iqser.red.service.ocr.v1.server.service.OCRService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.*;
|
||||
import io.micrometer.prometheus.PrometheusMeterRegistry;
|
||||
import io.micrometer.prometheus.PrometheusTimer;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
@ -11,53 +26,11 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.ocr.v1.server.service.FileStorageService;
|
||||
import com.iqser.red.service.ocr.v1.server.service.OCRService;
|
||||
import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.storage.commons.StorageAutoConfiguration;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.OCRModule;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.pdf.TextExtractor;
|
||||
|
||||
import io.micrometer.prometheus.PrometheusMeterRegistry;
|
||||
import io.micrometer.prometheus.PrometheusTimer;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT //
|
||||
, properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"})
|
||||
@Import(OcrServiceIntegrationTest.TestConfiguration.class)
|
||||
public class OcrServiceIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
protected StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
protected FileStorageService fileStorageService;
|
||||
@SpringBootTest(properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"})
|
||||
public class OcrServiceIntegrationTest extends AbstractTest {
|
||||
|
||||
@Autowired
|
||||
protected ObjectMapper objectMapper;
|
||||
@ -71,16 +44,13 @@ public class OcrServiceIntegrationTest {
|
||||
@Autowired
|
||||
private PrometheusMeterRegistry registry;
|
||||
|
||||
|
||||
@BeforeEach
|
||||
@SneakyThrows
|
||||
@Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top.
|
||||
@Disabled
|
||||
public void assertOCRModuleIsLoaded() {
|
||||
|
||||
assert OCRModule.isModuleAvailable();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top.
|
||||
public void testOCRMetrics() {
|
||||
@ -163,15 +133,16 @@ public class OcrServiceIntegrationTest {
|
||||
}
|
||||
try (var out = new FileOutputStream(getTemporaryDirectory() + "/" + fileName + ".pdf")) {
|
||||
ocrService.runOcrOnDocument("dossier", "file", out);
|
||||
System.out.println("File:" + getTemporaryDirectory() + "/" + fileName + ".pdf");
|
||||
}
|
||||
|
||||
System.out.println("File:" + getTemporaryDirectory() + "/" + fileName + ".pdf");
|
||||
try (var fileStream = new FileInputStream(getTemporaryDirectory() + "/" + fileName + ".pdf")) {
|
||||
return extractAllTextFromDocument(fileStream);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static String extractAllTextFromDocument(InputStream fileStream) throws IOException, PDFNetException {
|
||||
|
||||
PDFDoc pdfDoc = new PDFDoc(fileStream);
|
||||
@ -189,34 +160,4 @@ public class OcrServiceIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void dummyTest() {
|
||||
|
||||
// Build needs one test to not fail.
|
||||
assertThat(1).isEqualTo(1);
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanupStorage() {
|
||||
|
||||
if (this.storageService instanceof FileSystemBackedStorageService) {
|
||||
((FileSystemBackedStorageService) this.storageService).clearStorage();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class})
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public StorageService inMemoryStorage() {
|
||||
|
||||
return new FileSystemBackedStorageService();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,7 +1,15 @@
|
||||
package com.iqser.red.service.ocr.v1.server.service;
|
||||
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import com.iqser.red.service.ocr.v1.server.AbstractTest;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.*;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
@ -12,36 +20,11 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import com.iqser.red.service.ocr.v1.server.Application;
|
||||
import com.iqser.red.service.ocr.v1.server.OcrServiceIntegrationTest;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.ColorPt;
|
||||
import com.pdftron.pdf.ColorSpace;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.ElementBuilder;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.Rect;
|
||||
import com.pdftron.pdf.RectCollection;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(OcrServiceIntegrationTest.TestConfiguration.class)
|
||||
class ImagePositionRetrievalServiceTest {
|
||||
class ImagePositionRetrievalServiceTest extends AbstractTest {
|
||||
|
||||
@Autowired
|
||||
private ImagePositionRetrievalService imagePositionRetrievalService;
|
||||
|
||||
@ -1,34 +1,24 @@
|
||||
package com.iqser.red.service.ocr.v1.server.service;
|
||||
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
|
||||
import com.iqser.red.service.ocr.v1.server.Application;
|
||||
import com.iqser.red.service.ocr.v1.server.OcrServiceIntegrationTest;
|
||||
import com.iqser.red.service.ocr.v1.server.AbstractTest;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.pdf.TextExtractor;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
@ExtendWith(SpringExtension.class)
|
||||
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
|
||||
@Import(OcrServiceIntegrationTest.TestConfiguration.class)
|
||||
public class InvisibleElementRemovalServiceTest {
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
|
||||
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
|
||||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
|
||||
|
||||
public class InvisibleElementRemovalServiceTest extends AbstractTest {
|
||||
|
||||
@Autowired
|
||||
private InvisibleElementRemovalService invisibleElementRemovalService;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user