diff --git a/.dev/docker-compose.yaml b/.dev/docker-compose.yaml
new file mode 100644
index 0000000..af378d3
--- /dev/null
+++ b/.dev/docker-compose.yaml
@@ -0,0 +1,19 @@
+version: '2'
+
+
+services:
+ rabbitmq:
+ image: 'rabbitmq:3.9-alpine'
+ mem_limit: 500m
+ environment:
+ - RABBITMQ_DEFAULT_USER=user
+ - RABBITMQ_DEFAULT_PASS=rabbitmq
+ ports:
+ - 5672:5672
+ - 15672:15672
+ minio:
+ mem_limit: 500m
+ image: 'minio/minio:latest'
+ command: server /entity
+ ports:
+ - 9000:9000
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..38b5906
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,13 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+## [Unreleased]
+
+### Fixed
+
+### Added
+
+### Changed
+
+### Removed
\ No newline at end of file
diff --git a/bamboo-specs/pom.xml b/bamboo-specs/pom.xml
new file mode 100644
index 0000000..d35f676
--- /dev/null
+++ b/bamboo-specs/pom.xml
@@ -0,0 +1,37 @@
+
+ 4.0.0
+
+
+ com.atlassian.bamboo
+ bamboo-specs-parent
+ 8.1.3
+
+
+
+ bamboo-specs
+ 1.0.0-SNAPSHOT
+ jar
+
+
+
+ com.atlassian.bamboo
+ bamboo-specs-api
+
+
+ com.atlassian.bamboo
+ bamboo-specs
+
+
+
+
+ junit
+ junit
+ test
+
+
+
+
+
+
+
diff --git a/bamboo-specs/src/main/java/buildjob/PlanSpec.java b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
new file mode 100644
index 0000000..ec40b3a
--- /dev/null
+++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
@@ -0,0 +1,125 @@
+package buildjob;
+
+import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
+
+import java.time.LocalTime;
+
+import com.atlassian.bamboo.specs.api.BambooSpec;
+import com.atlassian.bamboo.specs.api.builders.BambooKey;
+import com.atlassian.bamboo.specs.api.builders.Variable;
+import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
+import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
+import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
+import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
+import com.atlassian.bamboo.specs.api.builders.plan.Job;
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
+import com.atlassian.bamboo.specs.api.builders.plan.Stage;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
+import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
+import com.atlassian.bamboo.specs.api.builders.project.Project;
+import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
+import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
+import com.atlassian.bamboo.specs.builders.task.ScriptTask;
+import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
+import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
+import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
+import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
+import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
+import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
+import com.atlassian.bamboo.specs.util.BambooServer;
+
+/**
+ * Plan configuration for Bamboo.
+ * Learn more on: https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs
+ */
+@BambooSpec
+public class PlanSpec {
+
+ private static final String SERVICE_NAME = "ocr-service";
+
+ private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", "");
+
+
+ /**
+ * Run main to publish plan on Bamboo
+ */
+ public static void main(final String[] args) throws Exception {
+ //By default credentials are read from the '.credentials' file.
+ BambooServer bambooServer = new BambooServer("http://localhost:8085");
+
+ Plan plan = new PlanSpec().createPlan();
+ bambooServer.publish(plan);
+ PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
+ bambooServer.publish(planPermission);
+
+ Plan secPlan = new PlanSpec().createSecBuild();
+ bambooServer.publish(secPlan);
+ PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
+ bambooServer.publish(secPlanPermission);
+ }
+
+
+ private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
+
+ Permissions permission = new Permissions().userPermissions("atlbamboo",
+ PermissionType.EDIT,
+ PermissionType.VIEW,
+ PermissionType.ADMIN,
+ PermissionType.CLONE,
+ PermissionType.BUILD)
+ .groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+ .groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
+ .loggedInUserPermissions(PermissionType.VIEW)
+ .anonymousUserPermissionView();
+ return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
+ }
+
+
+ private Project project() {
+
+ return new Project().name("RED").key(new BambooKey("RED"));
+ }
+
+
+ public Plan createPlan() {
+
+ return new Plan(project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)).description("Plan created from (enter repository url of your plan)")
+ .variables(new Variable("maven_add_param", ""))
+ .stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
+ .inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
+ new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
+ new ScriptTask().description("Build").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh").argument(SERVICE_NAME),
+ createJUnitParserTask().description("Resultparser")
+ .resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
+ .enabled(true),
+ new InjectVariablesTask().description("Inject git Tag").path("git.tag").namespace("g").scope(InjectVariablesScope.LOCAL),
+ new VcsTagTask().description("${bamboo.g.gitTag}").tagName("${bamboo.g.gitTag}").defaultRepository())
+ .dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
+ .volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml")
+ .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+ .linkedRepositories("RED / " + SERVICE_NAME)
+ .triggers(new BitbucketServerTrigger())
+ .planBranchManagement(new PlanBranchManagement().createForVcsBranch()
+ .delete(new BranchCleanup().whenInactiveInRepositoryAfterDays(14))
+ .notificationForCommitters());
+ }
+
+
+ public Plan createSecBuild() {
+
+ return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
+ .stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
+ .inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
+ new VcsCheckoutTask().description("Checkout Default Repository").checkoutItems(new CheckoutItem().defaultRepository()),
+ new ScriptTask().description("Sonar").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh").argument(SERVICE_NAME))
+ .dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
+ .dockerRunArguments("--net=host")
+ .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
+ .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
+ .linkedRepositories("RED / " + SERVICE_NAME)
+ .triggers(new ScheduledTrigger().scheduleOnceDaily(LocalTime.of(23, 00)))
+ .planBranchManagement(new PlanBranchManagement().createForVcsBranchMatching("release.*").notificationForCommitters());
+ }
+
+}
diff --git a/bamboo-specs/src/main/resources/scripts/build-java.sh b/bamboo-specs/src/main/resources/scripts/build-java.sh
new file mode 100755
index 0000000..59daedc
--- /dev/null
+++ b/bamboo-specs/src/main/resources/scripts/build-java.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+set -e
+
+SERVICE_NAME=$1
+
+if [[ "$bamboo_planRepository_branchName" == "master" ]]
+then
+ branchVersion=$(cat pom.xml | grep -Eo ".*" | sed -s 's|\(.*\)\..*\(-*.*\)|\1|')
+ latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1)
+ newVersion="$(semver $latestVersion -p -i minor)"
+elif [[ "$bamboo_planRepository_branchName" == release* ]]
+then
+ branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
+ latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1)
+ newVersion="$(semver $latestVersion -p -i patch)"
+elif [[ "${bamboo_version_tag}" != "dev" ]]
+then
+ newVersion="${bamboo_version_tag}"
+else
+ mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ --no-transfer-progress \
+ ${bamboo_maven_add_param} \
+ clean install \
+ -Djava.security.egd=file:/dev/./urandomelse
+ echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
+ exit 0
+fi
+
+echo "gitTag=${newVersion}" > git.tag
+
+mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ versions:set \
+ -DnewVersion=${newVersion}
+
+mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
+ versions:set \
+ -DnewVersion=${newVersion}
+
+mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ --no-transfer-progress \
+ clean deploy \
+ ${bamboo_maven_add_param} \
+ -e \
+ -DdeployAtEnd=true \
+ -Dmaven.wagon.http.ssl.insecure=true \
+ -Dmaven.wagon.http.ssl.allowall=true \
+ -Dmaven.wagon.http.ssl.ignore.validity.dates=true \
+ -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases
+
+mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
+ package
+
+mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
+ docker:push
\ No newline at end of file
diff --git a/bamboo-specs/src/main/resources/scripts/sonar-java.sh b/bamboo-specs/src/main/resources/scripts/sonar-java.sh
new file mode 100755
index 0000000..b7ade4d
--- /dev/null
+++ b/bamboo-specs/src/main/resources/scripts/sonar-java.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+set -e
+
+SERVICE_NAME=$1
+
+echo "build jar binaries"
+mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ --no-transfer-progress \
+ clean install \
+ -Djava.security.egd=file:/dev/./urandomelse
+
+echo "dependency-check:aggregate"
+mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ org.owasp:dependency-check-maven:aggregate
+
+if [[ -z "${bamboo_repository_pr_key}" ]]
+then
+ echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
+ mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ sonar:sonar \
+ -Dsonar.projectKey=RED_$SERVICE_NAME \
+ -Dsonar.host.url=https://sonarqube.iqser.com \
+ -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+ -Dsonar.branch.name=${bamboo_planRepository_1_branch} \
+ -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \
+ -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \
+ -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html
+else
+ echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
+ mvn --no-transfer-progress \
+ -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
+ sonar:sonar \
+ -Dsonar.projectKey=RED_$SERVICE_NAME \
+ -Dsonar.host.url=https://sonarqube.iqser.com \
+ -Dsonar.login=${bamboo_sonarqube_api_token_secret} \
+ -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
+ -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
+ -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
+ -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \
+ -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \
+ -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html
+fi
\ No newline at end of file
diff --git a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
new file mode 100644
index 0000000..6e1b615
--- /dev/null
+++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
@@ -0,0 +1,22 @@
+package buildjob;
+
+import org.junit.Test;
+
+import com.atlassian.bamboo.specs.api.builders.plan.Plan;
+import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
+import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
+
+public class PlanSpecTest {
+
+ @Test
+ public void checkYourPlanOffline() throws PropertiesValidationException {
+
+ Plan plan = new PlanSpec().createPlan();
+ EntityPropertiesBuilders.build(plan);
+
+ Plan secPlan = new PlanSpec().createSecBuild();
+ EntityPropertiesBuilders.build(secPlan);
+
+ }
+
+}
\ No newline at end of file
diff --git a/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz b/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz
new file mode 100644
index 0000000..1fee039
Binary files /dev/null and b/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz differ
diff --git a/ocr-service-image-v1/pom.xml b/ocr-service-image-v1/pom.xml
new file mode 100644
index 0000000..3d6d18d
--- /dev/null
+++ b/ocr-service-image-v1/pom.xml
@@ -0,0 +1,118 @@
+
+
+
+ com.iqser.red
+ platform-docker-dependency
+ 1.2.0
+
+
+ 4.0.0
+
+ ocr-service-image-v1
+ com.iqser.red.service
+ 1.0-SNAPSHOT
+ pom
+
+
+ ocr-service-server-v1
+ ${service.server}.jar
+ false
+ ${docker.image.prefix}/${service.server}
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+
+
+ io.fabric8
+ docker-maven-plugin
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ download-platform-jar
+ prepare-package
+
+ copy
+
+
+
+
+ ${project.groupId}
+ ${service.server}
+ ${version}
+ jar
+ true
+ ${platform.jar}
+
+
+ ${docker.build.directory}
+
+
+
+
+
+ io.fabric8
+ docker-maven-plugin
+
+
+
+ ${docker.image.name}
+
+ ${docker.build.directory}
+
+ ${platform.jar}
+
+
+ ${docker.image.version}
+ latest
+
+
+
+
+
+
+
+ maven-resources-plugin
+
+
+ copy-resources
+ prepare-package
+
+ copy-resources
+
+
+ ${basedir}/target/build/libs/
+
+
+ libs
+ false
+
+
+
+
+
+
+
+
+
+
diff --git a/ocr-service-image-v1/src/main/docker/Dockerfile b/ocr-service-image-v1/src/main/docker/Dockerfile
new file mode 100644
index 0000000..9efc0e2
--- /dev/null
+++ b/ocr-service-image-v1/src/main/docker/Dockerfile
@@ -0,0 +1,14 @@
+FROM red/base-image:2.0.0
+
+COPY "libs/pdftron/OCRModuleLinux.tar.gz" .
+RUN tar xvzf OCRModuleLinux.tar.gz
+RUN mkdir /OCRModule
+RUN mv Lib/* /OCRModule/
+
+ARG PLATFORM_JAR
+
+ENV PLATFORM_JAR ${PLATFORM_JAR}
+
+ENV USES_ELASTICSEARCH false
+
+COPY ["${PLATFORM_JAR}", "/"]
diff --git a/ocr-service-v1/ocr-service-api-v1/pom.xml b/ocr-service-v1/ocr-service-api-v1/pom.xml
new file mode 100644
index 0000000..783322f
--- /dev/null
+++ b/ocr-service-v1/ocr-service-api-v1/pom.xml
@@ -0,0 +1,68 @@
+
+
+ 4.0.0
+
+
+ com.iqser.red.service
+ ocr-service-v1
+ 1.0-SNAPSHOT
+
+
+ ocr-service-api-v1
+ 1.0-SNAPSHOT
+
+
+ 1.269.0
+ 3.155.0
+ 1.9.9
+
+
+
+
+
+
+ com.dslplatform
+ dsl-json-java8
+ ${dsljson.version}
+
+
+
+
+
+ io.github.openfeign
+ feign-core
+ true
+
+
+
+ org.springframework
+ spring-web
+
+
+
+ com.iqser.red.service
+ persistence-service-api-v1
+
+
+ com.iqser.red.service
+ redaction-service-api-v1
+
+
+ ${persistence-service.version}
+
+
+ com.iqser.red.service
+ redaction-service-api-v1
+
+
+ com.iqser.red.service
+ persistence-service-api-v1
+
+
+ ${redaction-service.version}
+
+
+
+
diff --git a/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java
new file mode 100644
index 0000000..421d7b8
--- /dev/null
+++ b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java
@@ -0,0 +1,16 @@
+package com.iqser.red.service.ocr.v1.api.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import lombok.experimental.SuperBuilder;
+
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+public class DocumentRequest {
+
+ protected String dossierId;
+ protected String fileId;
+
+}
diff --git a/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java
new file mode 100644
index 0000000..24aa315
--- /dev/null
+++ b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java
@@ -0,0 +1,19 @@
+package com.iqser.red.service.ocr.v1.api.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class OCRStatusUpdateResponse {
+
+ private String fileId;
+ private int numberOfPagesToOCR;
+ private int numberOfOCRedPages;
+ private boolean ocrFinished;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/pom.xml b/ocr-service-v1/ocr-service-server-v1/pom.xml
new file mode 100644
index 0000000..8085f1e
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/pom.xml
@@ -0,0 +1,145 @@
+
+
+ 4.0.0
+
+
+ com.iqser.red.service
+ ocr-service-v1
+ 1.0-SNAPSHOT
+
+
+ ocr-service-server-v1
+ 1.0-SNAPSHOT
+
+
+
+ com.iqser.red.service
+ ocr-service-api-v1
+ ${project.version}
+
+
+
+ com.iqser.red.commons
+ storage-commons
+
+
+ com.iqser.red.commons
+ spring-commons
+
+
+ com.iqser.red.commons
+ metric-commons
+
+
+ org.springframework.cloud
+ spring-cloud-starter-openfeign
+
+
+ com.pdftron
+ PDFNet
+ 9.4.0
+
+
+ org.springframework.boot
+ spring-boot-starter-amqp
+ 2.3.1.RELEASE
+
+
+ com.amazonaws
+ aws-java-sdk-kms
+ 1.12.158
+
+
+ com.google.guava
+ guava
+
+
+
+
+ com.iqser.red.commons
+ test-commons
+ test
+
+
+ org.springframework.amqp
+ spring-rabbit-test
+ 2.3.1
+ test
+
+
+ org.springframework.boot
+ spring-boot-starter-test
+
+
+ org.springframework.boot
+ spring-boot-starter-tomcat
+
+
+
+
+ org.apache.commons
+ commons-lang3
+ 3.12.0
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+ lombok.launch.AnnotationProcessorHider$AnnotationProcessor
+ com.dslplatform.json.processor.CompiledJsonAnnotationProcessor
+
+
+
+
+
+ pl.project13.maven
+ git-commit-id-plugin
+
+
+
+ revision
+
+
+ true
+
+ true
+
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+
+
+
+ repackage
+
+
+ true
+
+
+
+
+
+
+
+
+
+ pdftron
+ PDFNet Maven
+ https://pdftron.com/maven/release
+
+
+
+
+
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java
new file mode 100644
index 0000000..1150ef1
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java
@@ -0,0 +1,45 @@
+package com.iqser.red.service.ocr.v1.server;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.cloud.openfeign.EnableFeignClients;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Import;
+import org.springframework.scheduling.annotation.EnableAsync;
+
+import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
+import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
+import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
+import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
+
+import io.micrometer.core.aop.TimedAspect;
+import io.micrometer.core.instrument.MeterRegistry;
+
+@EnableAsync
+@EnableConfigurationProperties(OcrServiceSettings.class)
+@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
+@Import({DefaultWebMvcConfiguration.class, MessagingConfiguration.class})
+@EnableFeignClients(basePackageClasses = FileStatusProcessingUpdateClient.class)
+public class Application {
+
+ /**
+ * Entry point to the service application.
+ *
+ * @param args Any command line parameter given upon startup.
+ */
+ public static void main(String[] args) {
+
+ SpringApplication.run(Application.class, args);
+ }
+
+
+ @Bean
+ public TimedAspect timedAspect(MeterRegistry registry) {
+
+ return new TimedAspect(registry);
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java
new file mode 100644
index 0000000..b0c51e8
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java
@@ -0,0 +1,10 @@
+package com.iqser.red.service.ocr.v1.server.client;
+
+import org.springframework.cloud.openfeign.FeignClient;
+
+import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource;
+
+@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}")
+public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource {
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java
new file mode 100644
index 0000000..2c298dd
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java
@@ -0,0 +1,42 @@
+package com.iqser.red.service.ocr.v1.server.configuration;
+
+import org.springframework.amqp.core.Queue;
+import org.springframework.amqp.core.QueueBuilder;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+import lombok.RequiredArgsConstructor;
+
+@Configuration
+@RequiredArgsConstructor
+public class MessagingConfiguration {
+
+ public static final String OCR_QUEUE = "ocrQueue";
+ public static final String OCR_DLQ = "ocrDLQ";
+
+ public static final String X_DEAD_LETTER_EXCHANGE = "x-dead-letter-exchange";
+ public static final String X_DEAD_LETTER_ROUTING_KEY = "x-dead-letter-routing-key";
+ public static final String X_MAX_PRIORITY = "x-max-priority";
+
+ public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue";
+
+
+ @Bean
+ public Queue ocrQueue() {
+
+ return QueueBuilder.durable(OCR_QUEUE)
+ .withArgument(X_DEAD_LETTER_EXCHANGE, "")
+ .withArgument(X_DEAD_LETTER_ROUTING_KEY, OCR_DLQ)
+ .withArgument(X_MAX_PRIORITY, 2)
+ .maxPriority(2)
+ .build();
+ }
+
+
+ @Bean
+ public Queue ocrDeadLetterQueue() {
+
+ return QueueBuilder.durable(OCR_DLQ).build();
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java
new file mode 100644
index 0000000..babf7b3
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java
@@ -0,0 +1,35 @@
+package com.iqser.red.service.ocr.v1.server.initializer;
+
+import javax.annotation.PostConstruct;
+
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+
+import com.pdftron.pdf.PDFNet;
+
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+
+@Component
+@RequiredArgsConstructor
+public class PDFNetInitializer {
+
+ @Value("${pdftron.license:}")
+ private String pdftronLicense;
+
+ @Value("${pdftron.ocrmodule.path:/tmp}")
+ private String ocrModulePath;
+
+
+ @SneakyThrows
+ @PostConstruct
+ // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
+ public void init() {
+
+ PDFNet.setTempPath("/tmp/pdftron");
+ PDFNet.addResourceSearchPath(ocrModulePath);
+ PDFNet.initialize(pdftronLicense);
+
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java
new file mode 100644
index 0000000..72fa107
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java
@@ -0,0 +1,15 @@
+package com.iqser.red.service.ocr.v1.server.model;
+
+import com.iqser.red.service.redaction.v1.model.Rectangle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+@Data
+@AllArgsConstructor
+public class ImagePosition {
+
+ private Rectangle rectangle;
+ private boolean hasTransparency;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java
new file mode 100644
index 0000000..15c018f
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java
@@ -0,0 +1,14 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import lombok.Data;
+
+@Data
+public class Classification {
+
+ private Map probabilities = new HashMap<>();
+ private String label;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java
new file mode 100644
index 0000000..19d54b8
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java
@@ -0,0 +1,11 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class FilterGeometry {
+
+ private ImageSize imageSize;
+ private ImageFormat imageFormat;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java
new file mode 100644
index 0000000..31143c7
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java
@@ -0,0 +1,12 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class Filters {
+
+ private FilterGeometry geometry;
+ private Probability probability;
+ private boolean allPassed;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java
new file mode 100644
index 0000000..9e61053
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java
@@ -0,0 +1,11 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class Geometry {
+
+ private float width;
+ private float height;
+
+}
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java
new file mode 100644
index 0000000..94424a0
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java
@@ -0,0 +1,12 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class ImageFormat {
+
+ private float quotient;
+ private boolean tooTall;
+ private boolean tooWide;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java
new file mode 100644
index 0000000..5a4c6c6
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java
@@ -0,0 +1,14 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class ImageMetadata {
+
+ private Classification classification;
+ private Position position;
+ private Geometry geometry;
+ private Filters filters;
+ private boolean alpha;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java
new file mode 100644
index 0000000..4bc072e
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java
@@ -0,0 +1,31 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.dslplatform.json.CompiledJson;
+import com.dslplatform.json.JsonAttribute;
+import com.fasterxml.jackson.annotation.JsonAlias;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import lombok.Data;
+
+@Data
+@CompiledJson
+public class ImageServiceResponse {
+
+ private String dossierId;
+ private String fileId;
+
+ @JsonProperty(value = "imageMetadata")
+ @JsonAlias("data")
+ @JsonAttribute(alternativeNames = {"imageMetadata"})
+ private List data = new ArrayList<>();
+
+
+ @JsonProperty(value = "imageMetadata")
+ @JsonAlias("data")
+ @JsonAttribute(alternativeNames = {"imageMetadata"})
+ public void setData(List data) {this.data = data;}
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java
new file mode 100644
index 0000000..ecc740f
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java
@@ -0,0 +1,12 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class ImageSize {
+
+ private float quotient;
+ private boolean tooLarge;
+ private boolean tooSmall;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java
new file mode 100644
index 0000000..b4feb05
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java
@@ -0,0 +1,14 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class Position {
+
+ private float x1;
+ private float x2;
+ private float y1;
+ private float y2;
+ private int pageNumber;
+
+}
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java
new file mode 100644
index 0000000..30173a7
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java
@@ -0,0 +1,10 @@
+package com.iqser.red.service.ocr.v1.server.model.image;
+
+import lombok.Data;
+
+@Data
+public class Probability {
+
+ private boolean unconfident;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java
new file mode 100644
index 0000000..fa8d6e3
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java
@@ -0,0 +1,69 @@
+package com.iqser.red.service.ocr.v1.server.service;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.springframework.stereotype.Service;
+
+import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
+import com.iqser.red.storage.commons.service.StorageService;
+
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class FileStorageService {
+
+ private final StorageService storageService;
+
+
+ public static String getStorageId(String dossierId, String fileId, FileType fileType) {
+
+ return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
+ }
+
+
+ @SneakyThrows
+ public byte[] getOriginalFile(String dossierId, String fileId) {
+
+ return IOUtils.toByteArray(storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream());
+ }
+
+
+ @SneakyThrows
+ public InputStream getOriginalFileAsStream(String dossierId, String fileId) {
+
+ return storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream();
+ }
+
+
+ public void storeOriginalFile(String dossierId, String fileId, InputStream stream) {
+
+ storageService.storeObject(getStorageId(dossierId, fileId, FileType.ORIGIN), stream);
+ }
+
+
+ public boolean untouchedFileExists(String dossierId, String fileId) {
+
+ return storageService.objectExists(getStorageId(dossierId, fileId, FileType.UNTOUCHED));
+ }
+
+
+ public void storeUntouchedFile(String dossierId, String fileId, byte[] data) {
+
+ storageService.storeObject(getStorageId(dossierId, fileId, FileType.UNTOUCHED), new ByteArrayInputStream(data));
+ }
+
+
+ @SneakyThrows
+ public ImageServiceResponse getImageServiceResponse(String dossierId, String fileId) {
+
+ return storageService.readJSONObject(getStorageId(dossierId, fileId, FileType.IMAGE_INFO), ImageServiceResponse.class);
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java
new file mode 100644
index 0000000..1b00f0c
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java
@@ -0,0 +1,325 @@
+package com.iqser.red.service.ocr.v1.server.service;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.commons.io.IOUtils;
+import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.stereotype.Service;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
+import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
+import com.iqser.red.service.ocr.v1.server.model.ImagePosition;
+import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse;
+import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
+import com.iqser.red.service.persistence.service.v1.api.utils.SuppressFBWarnings;
+import com.iqser.red.service.redaction.v1.model.Point;
+import com.iqser.red.service.redaction.v1.model.Rectangle;
+import com.pdftron.common.PDFNetException;
+import com.pdftron.pdf.Element;
+import com.pdftron.pdf.ElementReader;
+import com.pdftron.pdf.ElementWriter;
+import com.pdftron.pdf.OCRModule;
+import com.pdftron.pdf.OCROptions;
+import com.pdftron.pdf.Optimizer;
+import com.pdftron.pdf.PDFDoc;
+import com.pdftron.pdf.Page;
+import com.pdftron.pdf.PageIterator;
+import com.pdftron.pdf.Rect;
+import com.pdftron.pdf.RectCollection;
+import com.pdftron.sdf.Obj;
+import com.pdftron.sdf.SDFDoc;
+
+import io.micrometer.core.annotation.Timed;
+import lombok.RequiredArgsConstructor;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class OCRService {
+
+ public static final String ENGLISH = "eng";
+
+ private final FileStorageService fileStorageService;
+ private final OcrServiceSettings settings;
+
+ private final RabbitTemplate rabbitTemplate;
+
+ private final ObjectMapper objectMapper;
+
+
+ @Timed("redactmanager_PDFTron-ocrDocument")
+ @SneakyThrows
+ public InputStream ocrDocument(String dossierId, String fileId) {
+
+ var fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId);
+
+ var imageServiceResponse = fileStorageService.getImageServiceResponse(dossierId, fileId);
+
+ var fileBytes = IOUtils.toByteArray(fileStream);
+
+ var ocrBytes = ocr(fileBytes, fileId, imageServiceResponse);
+
+ return new ByteArrayInputStream(ocrBytes);
+
+ }
+
+
+ @SuppressFBWarnings("REC_CATCH_EXCEPTION")
+ private byte[] ocr(byte[] file, String fileId, ImageServiceResponse imageServiceResponse) {
+
+ PDFDoc pdfDoc = null;
+ try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+ pdfDoc = new PDFDoc(file);
+ removeInvisibleText(pdfDoc);
+
+ Map> pages = new HashMap<>();
+
+ // TODO take logic to ignore small and combine images from image-service.
+ // TODO Then replace logic so ocr-service is independent from image-service.
+ imageServiceResponse.getData()
+ .forEach(imageMetadata -> pages.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
+ .add(new ImagePosition(new Rectangle(new Point(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1()),
+ imageMetadata.getGeometry().getWidth(),
+ imageMetadata.getGeometry().getHeight(),
+ imageMetadata.getPosition().getPageNumber()), imageMetadata.isAlpha())));
+
+ Map pdfDocMap = Collections.synchronizedMap(new HashMap<>());
+
+ rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+ objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(pages.keySet().size()).build()));
+
+ ocrPages(pdfDoc, fileId, pages, pdfDocMap);
+
+ for (var entry : pdfDocMap.entrySet()) {
+
+ var ocrDoc = entry.getValue();
+ var page = entry.getKey();
+
+ Page ocrPage = ocrDoc.getPageIterator(1).next();
+ pdfDoc.pageInsert(pdfDoc.getPageIterator(page), ocrPage);
+ pdfDoc.pageRemove(pdfDoc.getPageIterator(page + 1));
+
+ ocrDoc.close();
+ }
+
+ Optimizer.optimize(pdfDoc);
+ pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
+ pdfDoc.close();
+
+ rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+ objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
+ .fileId(fileId)
+ .numberOfPagesToOCR(pages.keySet().size())
+ .numberOfOCRedPages(pages.keySet().size())
+ .ocrFinished(true)
+ .build()));
+
+ return out.toByteArray();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ } finally {
+ if (pdfDoc != null) {
+ try {
+ pdfDoc.close();
+ } catch (Exception e) {
+ log.debug("Failed to close document", e);
+ }
+ }
+ }
+ }
+
+
+ @SneakyThrows
+ private void ocrPages(PDFDoc pdfDoc, String fileId, Map> pages, Map pdfDocMap) {
+
+ int numberOfOCRedPages = 0;
+ for (var pageEntry : pages.entrySet()) {
+
+ try {
+ RectCollection rectCollection = new RectCollection();
+
+ var page = pageEntry.getKey();
+
+ Page pdfPage = pdfDoc.getPageIterator(page).next();
+
+ pdfPage.setMediaBox(pdfPage.getCropBox());
+
+ for (ImagePosition imagePosition : pageEntry.getValue()) {
+ Rectangle rectangle = imagePosition.getRectangle();
+
+ // Warning coordinate system is different in this call macOs/Linux
+ double y = -rectangle.getTopLeft().getY() + pdfPage.getCropBox().getY2() - rectangle.getHeight();
+ rectCollection.addRect(rectangle.getTopLeft().getX(), y, rectangle.getTopLeft().getX() + rectangle.getWidth(), y + rectangle.getHeight());
+ }
+
+ PDFDoc ocrDoc = new PDFDoc();
+ ocrDoc.pagePushBack(pdfPage);
+ pdfDocMap.put(pageEntry.getKey(), ocrDoc);
+
+ OCROptions options = new OCROptions();
+ options.addTextZonesForPage(rectCollection, 1);
+ options.addLang(ENGLISH);
+ options.addDPI(settings.getOcrDPI());
+ OCRModule.processPDF(ocrDoc, options);
+
+ rectCollection.clear();
+
+ } catch (Exception e) {
+ log.warn("Failed to process PDF page {} - {}", pageEntry.getKey(), e);
+ }
+
+ rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
+ objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
+ .fileId(fileId)
+ .numberOfPagesToOCR(pages.keySet().size())
+ .numberOfOCRedPages(++numberOfOCRedPages)
+ .build()));
+
+ log.warn("Done page {}", pageEntry);
+
+ }
+ }
+
+
+ /**
+ * There are 2 possibilities to have invisible Text in pdfs.
+ * 1. gState is set to invisible, this is ocr text.
+ * 2. Filled Path elements in front of the text.
+ */
+ @SneakyThrows
+ private void removeInvisibleText(PDFDoc pdfDoc) {
+
+ ElementWriter writer = new ElementWriter();
+ ElementReader reader = new ElementReader();
+ Set visited = new TreeSet<>();
+
+ for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
+ Page page = iterator.next();
+ removeOverlapText(page, reader, writer, visited);
+ }
+ }
+
+
+ @SneakyThrows
+ private void removeOverlapText(Page page, ElementReader reader, ElementWriter writer, Set visited) {
+
+ visited.add((int) page.getSDFObj().getObjNum());
+ reader.begin(page);
+ writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
+ processElements(reader, writer, visited, false);
+ writer.end();
+ reader.end();
+ }
+
+
+ @SneakyThrows
+ private void processElements(ElementReader reader, ElementWriter writer, Set visited, boolean isInForm) {
+
+ Set filledRectangles = new HashSet<>();
+ for (Element element = reader.next(); element != null; element = reader.next())
+
+ switch (element.getType()) {
+ case Element.e_image:
+ case Element.e_inline_image:
+ processImage(element, writer, isInForm);
+ break;
+
+ case Element.e_text:
+ processText(element, writer, filledRectangles);
+ break;
+
+ case Element.e_path:
+ processPath(element, writer, filledRectangles);
+ break;
+
+ case Element.e_form:
+ processForm(reader, writer, element, visited);
+ break;
+
+ default:
+ writer.writeElement(element);
+ }
+ }
+
+
+ @SneakyThrows
+ private void processImage(Element element, ElementWriter writer, boolean isInForm) {
+
+ if (!isInForm || !settings.isRemoveWatermark()) {
+ writer.writeElement(element);
+ }
+ }
+
+
+ @SneakyThrows
+ private void processText(Element element, ElementWriter writer, Set filledRectangles) {
+
+ if (element.getBBox() == null) {
+ writer.writeElement(element);
+ return;
+ }
+
+ double x = element.getBBox().getX1();
+ double y = element.getBBox().getY1();
+ boolean filledRectangleIntersection = filledRectangles.stream().anyMatch(r -> {
+ try {
+ return r.contains(x, y);
+ } catch (PDFNetException e) {
+ throw new RuntimeException("Internal pdftron error during removal of overlap text", e);
+ }
+ });
+
+ var gState = element.getGState();
+
+ //See PDF Reference 5.3 Text rendering modes, 3 = Invisible, however this ocr does not use it.
+ if (!filledRectangleIntersection && gState.getTextRenderMode() != 3) {
+ writer.writeElement(element);
+ }
+ }
+
+
+ @SneakyThrows
+ private void processPath(Element element, ElementWriter writer, Set filledRectangles) {
+
+ if (element.getPathData() != null && element.getPathData().getPoints().length > 4) {
+ filledRectangles.add(element.getBBox());
+ }
+ writer.writeElement(element);
+ }
+
+
+ @SneakyThrows
+ private void processForm(ElementReader reader, ElementWriter writer, Element element, Set visited) {
+
+ writer.writeElement(element);
+ Obj formObj = element.getXObject();
+
+ if (!visited.contains((int) formObj.getObjNum())) {
+ visited.add((int) formObj.getObjNum());
+ ElementWriter new_writer = new ElementWriter();
+ reader.formBegin();
+ new_writer.begin(formObj);
+
+ reader.clearChangeList();
+ new_writer.setDefaultGState(reader);
+
+ processElements(reader, new_writer, visited, true);
+ new_writer.end();
+ reader.end();
+ }
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java
new file mode 100644
index 0000000..772ffb8
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java
@@ -0,0 +1,79 @@
+package com.iqser.red.service.ocr.v1.server.service;
+
+import org.springframework.amqp.AmqpRejectAndDontRequeueException;
+import org.springframework.amqp.rabbit.annotation.RabbitHandler;
+import org.springframework.amqp.rabbit.annotation.RabbitListener;
+import org.springframework.http.HttpStatus;
+import org.springframework.stereotype.Service;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
+import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
+import com.iqser.red.service.ocr.v1.api.model.DocumentRequest;
+
+import feign.FeignException;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class OcrMessageReceiver {
+
+ private final ObjectMapper objectMapper;
+ private final FileStorageService fileStorageService;
+ private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
+
+ private final OCRService ocrService;
+
+
+ @RabbitHandler
+ @RabbitListener(queues = MessagingConfiguration.OCR_QUEUE, concurrency = "1")
+ public void receiveOcr(String in) throws JsonProcessingException {
+
+ DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class);
+
+ long start = System.currentTimeMillis();
+ log.info("Start ocr for file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+
+ setStatusOcrProcessing(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+
+ if (!fileStorageService.untouchedFileExists(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId())) {
+ byte[] originalFile = fileStorageService.getOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+ fileStorageService.storeUntouchedFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), originalFile);
+ }
+
+ var ocrResult = ocrService.ocrDocument(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+
+ fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), ocrResult);
+
+ long end = System.currentTimeMillis();
+ log.info("Successfully processed ocr for file with dossierId {} and fileId {}, took {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), end - start);
+
+ fileStatusProcessingUpdateClient.ocrSuccessful(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+ }
+
+
+ @RabbitHandler
+ @RabbitListener(queues = MessagingConfiguration.OCR_DLQ, concurrency = "1")
+ public void receiveOcrDQL(String in) throws JsonProcessingException {
+
+ DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class);
+ log.info("OCR DQL received: {}", ocrRequestMessage);
+ fileStatusProcessingUpdateClient.ocrFailed(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
+ }
+
+
+ private void setStatusOcrProcessing(String dossierId, String fileId) {
+
+ try {
+ fileStatusProcessingUpdateClient.ocrProcessing(dossierId, fileId);
+ } catch (FeignException e) {
+ if (e.status() == HttpStatus.CONFLICT.value()) {
+ throw new AmqpRejectAndDontRequeueException(e.getMessage());
+ }
+ }
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java
new file mode 100644
index 0000000..f1f60e3
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java
@@ -0,0 +1,14 @@
+package com.iqser.red.service.ocr.v1.server.settings;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+import lombok.Data;
+
+@Data
+@ConfigurationProperties("ocr-service")
+public class OcrServiceSettings {
+
+ private int ocrDPI = 300;
+ private boolean removeWatermark;
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json b/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json
new file mode 100644
index 0000000..899d259
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json
@@ -0,0 +1,9 @@
+{
+ "properties": [
+ {
+ "name": "persistence-service.url",
+ "type": "java.lang.String",
+ "description": "URL of the persistence service."
+ }
+ ]
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml
new file mode 100644
index 0000000..12a5a2d
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml
@@ -0,0 +1,10 @@
+server:
+ port: 8086
+
+persistence-service.url: "http://persistence-service-v1:8080"
+
+storage:
+ bucket-name: 'redaction'
+ endpoint: 'http://localhost:9000'
+ key: minioadmin
+ secret: minioadmin
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml
new file mode 100644
index 0000000..a838588
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml
@@ -0,0 +1,49 @@
+info:
+ description: OCR Service V1 Server
+
+persistence-service.url: "http://persistence-service-v1:8080"
+
+server:
+ port: 8080
+
+spring:
+ main:
+ allow-circular-references: true # FIXME
+ profiles:
+ active: kubernetes
+ rabbitmq:
+ host: ${RABBITMQ_HOST:localhost}
+ port: ${RABBITMQ_PORT:5672}
+ username: ${RABBITMQ_USERNAME:user}
+ password: ${RABBITMQ_PASSWORD:rabbitmq}
+ listener:
+ simple:
+ acknowledge-mode: AUTO
+ concurrency: 2
+ retry:
+ enabled: true
+ max-attempts: 3
+ max-interval: 15000
+ prefetch: 1
+
+platform.multi-tenancy:
+ enabled: false
+
+
+management:
+ endpoint:
+ metrics.enabled: ${monitoring.enabled:false}
+ prometheus.enabled: ${monitoring.enabled:false}
+ health.enabled: true
+ endpoints.web.exposure.include: prometheus, health
+ metrics.export.prometheus.enabled: ${monitoring.enabled:false}
+
+
+storage:
+ signer-type: 'AWSS3V4SignerType'
+ bucket-name: 'redaction'
+ region: 'us-east-1'
+ endpoint: 'https://s3.amazonaws.com'
+ backend: 's3'
+
+pdftron.license: ${PDFTRON_LICENSE}
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt b/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt
new file mode 100644
index 0000000..1e16e74
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt
@@ -0,0 +1,22 @@
+------------------------------------------------------------------
+| |
+| OCR Service V1 Server |
+| |
+ ________________________________________________________________
+| |
+| ___ ________ ________ _________ _________ |
+| | | / \ / || || \ |
+| | | / ____ \ / _____|| _____||______ \ |
+| | | / / \ \| / | | \ | |
+| | || | | || \____ | |____ ______/ | |
+| | || | | || \ | | | | |
+| | || | ___| | \_____ || ____| | _ _/ |
+| | || | \ \ | \ || | | | \ \ |
+| | | \ \_ \ / ______/ || |_____ | | \ \ |
+| | | \ \ \ \ | /| || | \ \ |
+| |___| \____\ \___\|_________/ |_________||___| \___\ |
+| |
+| |
+| F r o m d a t a t o i n f o r m a t i o n |
+| |
+|________________________________________________________________|
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml
new file mode 100644
index 0000000..d210e44
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml
@@ -0,0 +1,7 @@
+spring:
+ application:
+ name: ocr-service-v1
+
+management.endpoints:
+ web.base-path: /
+ enabled-by-default: false
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java
new file mode 100644
index 0000000..f19d07e
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java
@@ -0,0 +1,112 @@
+package com.iqser.red.service.ocr.v1.server;
+
+import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.io.File;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.springframework.amqp.rabbit.core.RabbitTemplate;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
+import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.boot.test.mock.mockito.MockBean;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.context.annotation.Primary;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.test.context.junit.jupiter.SpringExtension;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService;
+import com.iqser.red.service.ocr.v1.server.service.OCRService;
+import com.iqser.red.service.ocr.v1.server.service.FileStorageService;
+import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
+import com.iqser.red.storage.commons.StorageAutoConfiguration;
+import com.iqser.red.storage.commons.service.StorageService;
+
+import lombok.SneakyThrows;
+
+@ExtendWith(SpringExtension.class)
+@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT //
+ , properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"})
+@Import(OcrServiceIntegrationTest.TestConfiguration.class)
+public class OcrServiceIntegrationTest {
+
+ @Autowired
+ protected StorageService storageService;
+
+ @Autowired
+ protected FileStorageService fileStorageService;
+
+ @Autowired
+ protected ObjectMapper objectMapper;
+
+ @MockBean
+ protected RabbitTemplate rabbitTemplate;
+
+ @Autowired
+ private OCRService ocrService;
+
+
+ @Test
+ @Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top.
+ @SneakyThrows
+ public void testOCR() {
+
+ String fileName = "Watermark";
+
+ ClassPathResource imageInfoResource = new ClassPathResource("files/" + fileName + ".IMAGE_INFO.json");
+ ClassPathResource pdfFileResource = new ClassPathResource("files/" + fileName + ".pdf");
+
+ var originId = FileStorageService.getStorageId("dossier", "file", FileType.ORIGIN);
+ storageService.storeObject(originId, pdfFileResource.getInputStream());
+
+ var imageId = FileStorageService.getStorageId("dossier", "file", FileType.IMAGE_INFO);
+ storageService.storeObject(imageId, imageInfoResource.getInputStream());
+
+ var response = ocrService.ocrDocument("dossier", "file");
+
+ var out = FileUtils.openOutputStream(new File(getTemporaryDirectory() + "/" + fileName + ".pdf"));
+ IOUtils.copy(response, out);
+ }
+
+
+ @SneakyThrows
+ public void dummyTest() {
+
+ // Build needs one text to not fail.
+ assertThat(1).isEqualTo(1);
+ }
+
+
+ @AfterEach
+ public void cleanupStorage() {
+
+ if (this.storageService instanceof FileSystemBackedStorageService) {
+ ((FileSystemBackedStorageService) this.storageService).clearStorage();
+ }
+ }
+
+
+ @Configuration
+ @EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class})
+ public static class TestConfiguration {
+
+ @Bean
+ @Primary
+ public StorageService inmemoryStorage() {
+
+ return new FileSystemBackedStorageService();
+ }
+
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java
new file mode 100644
index 0000000..6016b05
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java
@@ -0,0 +1,125 @@
+package com.iqser.red.service.ocr.v1.server.utils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.io.IOUtils;
+import org.springframework.core.io.InputStreamResource;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.iqser.red.commons.jackson.ObjectMapperFactory;
+import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
+import com.iqser.red.storage.commons.service.StorageService;
+
+import lombok.SneakyThrows;
+
+public class FileSystemBackedStorageService implements StorageService {
+
+ private final Map dataMap = new HashMap<>();
+
+
+ public FileSystemBackedStorageService() {
+
+ }
+
+
+ @SneakyThrows
+ @Override
+ public InputStreamResource getObject(String objectId) {
+
+ var res = dataMap.get(objectId);
+ if (res == null) {
+ throw new StorageObjectDoesNotExist(new RuntimeException());
+ }
+ return new InputStreamResource(new FileInputStream(res));
+
+ }
+
+
+ @Override
+ public void deleteObject(String objectId) {
+
+ dataMap.remove(objectId);
+ }
+
+
+ @Override
+ public boolean objectExists(String objectId) {
+
+ return dataMap.containsKey(objectId);
+ }
+
+
+ @Override
+ public void init() {
+
+ }
+
+
+ @Override
+ @SneakyThrows
+ public void storeJSONObject(String objectId, T any) {
+
+ File tempFile = File.createTempFile("test", ".tmp");
+ getMapper().writeValue(new FileOutputStream(tempFile), any);
+ dataMap.put(objectId, tempFile);
+ }
+
+
+ private ObjectMapper getMapper() {
+
+ return ObjectMapperFactory.create();
+ }
+
+
+ @Override
+ @SneakyThrows
+ public T readJSONObject(String objectId, Class clazz) {
+
+ if (dataMap.get(objectId) == null || !dataMap.get(objectId).exists()) {
+ throw new StorageObjectDoesNotExist("Stored object not found");
+ }
+ return getMapper().readValue(new FileInputStream(dataMap.get(objectId)), clazz);
+ }
+
+
+ public List listPaths() {
+
+ return new ArrayList<>(dataMap.keySet());
+ }
+
+
+ public List listFilePaths() {
+
+ return dataMap.values().stream().map(File::getAbsolutePath).collect(Collectors.toList());
+ }
+
+
+ @Override
+ @SneakyThrows
+ public void storeObject(String objectId, InputStream stream) {
+
+ File tempFile = File.createTempFile("test", ".tmp");
+
+ try (var fileOutputStream = new FileOutputStream(tempFile)) {
+ IOUtils.copy(stream, fileOutputStream);
+ }
+
+ dataMap.put(objectId, tempFile);
+ }
+
+
+ public void clearStorage() {
+
+ this.dataMap.forEach((k, v) -> v.delete());
+ this.dataMap.clear();
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java
new file mode 100644
index 0000000..ccbff74
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java
@@ -0,0 +1,58 @@
+package com.iqser.red.service.ocr.v1.server.utils;
+
+import org.apache.commons.lang3.StringUtils;
+
+import lombok.SneakyThrows;
+
+public final class OsUtils {
+
+ private static final String SERVICE_NAME = "pdftron-redaction-service-v1";
+
+
+ private OsUtils() {
+
+ throw new IllegalStateException("Utility class");
+ }
+
+
+ @SneakyThrows
+ public static String getTemporaryDirectory(String suffix, String fileId) {
+
+ return addBackSlashAtEnd(getTemporaryDirectory()) + addBackSlashAtEnd(SERVICE_NAME) + addBackSlashAtEnd(suffix) + addBackSlashAtEnd(fileId);
+ }
+
+
+ private static boolean isWindows() {
+
+ return StringUtils.containsIgnoreCase(System.getProperty("os.name"), "Windows");
+ }
+
+
+ private static String addBackSlashAtEnd(String s) {
+
+ return removeSlashAtBegin(StringUtils.endsWithIgnoreCase(s, "\\") ? s : removeForwardSlashAtEnd(s) + "\\");
+ }
+
+
+ private static String removeForwardSlashAtEnd(String s) {
+
+ return StringUtils.endsWithIgnoreCase(s, "/") ? StringUtils.substring(s, s.length() - 1) : s;
+ }
+
+
+ private static String removeSlashAtBegin(String s) {
+
+ return StringUtils.startsWithIgnoreCase(s, "/") || StringUtils.startsWithIgnoreCase(s, "\\") ? StringUtils.substring(s, 1) : s;
+ }
+
+
+ public static String getTemporaryDirectory() {
+
+ String tmpdir = System.getProperty("java.io.tmpdir");
+ if (isWindows() && StringUtils.isNotBlank(tmpdir)) {
+ return tmpdir;
+ }
+ return "/tmp";
+ }
+
+}
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml b/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml
new file mode 100644
index 0000000..18b57b2
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml
@@ -0,0 +1,8 @@
+persistence-service.url: "http://persistence-service-v1:8080"
+
+spring:
+ main:
+ allow-circular-references: true # FIXME
+
+
+pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json
new file mode 100644
index 0000000..7c13b44
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json
@@ -0,0 +1 @@
+{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "917b9d9c9f548f85fef3679db45ff46c", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9927, "logo": 0.0038, "other": 0.0034, "formula": 0.0}}, "representation": "FFF2CF0F7C74FFC1070830FFF", "position": {"x1": -7, "x2": 603, "y1": 0, "y2": 852, "pageNumber": 1}, "geometry": {"width": 610, "height": 852}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 1.0096, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.716, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []}
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json
new file mode 100644
index 0000000..ac6c30f
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json
@@ -0,0 +1 @@
+{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "d7f1e0e37cba4e28ebdf894a79d3bd67", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9872, "logo": 0.0064, "other": 0.0063, "formula": 0.0001}}, "representation": "FFFCF10608F6F89747BFFC301", "position": {"x1": -9, "x2": 584, "y1": 9, "y2": 849, "pageNumber": 1}, "geometry": {"width": 593, "height": 840}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.9992, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.706, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []}
\ No newline at end of file
diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json
new file mode 100644
index 0000000..a4eb3b4
--- /dev/null
+++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json
@@ -0,0 +1 @@
+{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "32b19ec38896f5105c09041def470c90", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "logo", "probabilities": {"logo": 0.9999, "signature": 0.0001, "formula": 0.0, "other": 0.0}}, "representation": "307EF8F6E9833CE9D7AF9EFFF", "position": {"x1": 26, "x2": 586, "y1": -2, "y2": 794, "pageNumber": 1}, "geometry": {"width": 560, "height": 796}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.959, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.7035, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}, {"classification": {"label": "logo", "probabilities": {"logo": 1.0, "formula": 0.0, "other": 0.0, "signature": 0.0}}, "representation": "FFF7FFD2000000018F3FFEFFF", "position": {"x1": 90, "x2": 210, "y1": 676, "y2": 720, "pageNumber": 1}, "geometry": {"width": 120, "height": 44}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.1044, "tooLarge": false, "tooSmall": false}, "imageFormat": {"quotient": 2.7273, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": true}}], "dataCV": []}
\ No newline at end of file
diff --git a/ocr-service-v1/pom.xml b/ocr-service-v1/pom.xml
new file mode 100644
index 0000000..b9e5d2e
--- /dev/null
+++ b/ocr-service-v1/pom.xml
@@ -0,0 +1,95 @@
+
+
+ 4.0.0
+
+
+ com.iqser.red
+ platform-dependency
+ 1.14.0
+
+
+
+ com.iqser.red.service
+ ocr-service-v1
+ 1.0-SNAPSHOT
+
+ pom
+
+
+ ocr-service-api-v1
+ ocr-service-server-v1
+
+
+
+
+
+ com.iqser.red
+ platform-commons-dependency
+ 1.20.0
+ import
+ pom
+
+
+
+
+
+
+
+ org.sonarsource.scanner.maven
+ sonar-maven-plugin
+ 3.9.0.2155
+
+
+ org.owasp
+ dependency-check-maven
+ 6.3.1
+
+ ALL
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+
+
+ prepare-agent
+
+ prepare-agent
+
+
+
+ report
+
+ report
+
+
+
+
+
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+ 0.8.8
+
+
+ prepare-agent
+
+ prepare-agent
+
+
+
+ report
+
+ report-aggregate
+
+ verify
+
+
+
+
+
+
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..8dfcae3
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,19 @@
+
+
+ 4.0.0
+
+ com.iqser.red.service
+ ocr-service
+ 1.0-SNAPSHOT
+
+ pom
+
+
+ bamboo-specs
+ ocr-service-v1
+ ocr-service-image-v1
+
+
+