diff --git a/.dev/docker-compose.yaml b/.dev/docker-compose.yaml new file mode 100644 index 0000000..af378d3 --- /dev/null +++ b/.dev/docker-compose.yaml @@ -0,0 +1,19 @@ +version: '2' + + +services: + rabbitmq: + image: 'rabbitmq:3.9-alpine' + mem_limit: 500m + environment: + - RABBITMQ_DEFAULT_USER=user + - RABBITMQ_DEFAULT_PASS=rabbitmq + ports: + - 5672:5672 + - 15672:15672 + minio: + mem_limit: 500m + image: 'minio/minio:latest' + command: server /entity + ports: + - 9000:9000 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..38b5906 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,13 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased] + +### Fixed + +### Added + +### Changed + +### Removed \ No newline at end of file diff --git a/bamboo-specs/pom.xml b/bamboo-specs/pom.xml new file mode 100644 index 0000000..d35f676 --- /dev/null +++ b/bamboo-specs/pom.xml @@ -0,0 +1,37 @@ + + 4.0.0 + + + com.atlassian.bamboo + bamboo-specs-parent + 8.1.3 + + + + bamboo-specs + 1.0.0-SNAPSHOT + jar + + + + com.atlassian.bamboo + bamboo-specs-api + + + com.atlassian.bamboo + bamboo-specs + + + + + junit + junit + test + + + + + + + diff --git a/bamboo-specs/src/main/java/buildjob/PlanSpec.java b/bamboo-specs/src/main/java/buildjob/PlanSpec.java new file mode 100644 index 0000000..ec40b3a --- /dev/null +++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java @@ -0,0 +1,125 @@ +package buildjob; + +import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask; + +import java.time.LocalTime; + +import com.atlassian.bamboo.specs.api.BambooSpec; +import com.atlassian.bamboo.specs.api.builders.BambooKey; +import com.atlassian.bamboo.specs.api.builders.Variable; +import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration; +import com.atlassian.bamboo.specs.api.builders.permission.PermissionType; +import com.atlassian.bamboo.specs.api.builders.permission.Permissions; +import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions; +import com.atlassian.bamboo.specs.api.builders.plan.Job; +import com.atlassian.bamboo.specs.api.builders.plan.Plan; +import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier; +import com.atlassian.bamboo.specs.api.builders.plan.Stage; +import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup; +import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement; +import com.atlassian.bamboo.specs.api.builders.project.Project; +import com.atlassian.bamboo.specs.builders.task.CheckoutItem; +import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask; +import com.atlassian.bamboo.specs.builders.task.ScriptTask; +import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask; +import com.atlassian.bamboo.specs.builders.task.VcsTagTask; +import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger; +import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger; +import com.atlassian.bamboo.specs.model.task.InjectVariablesScope; +import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location; +import com.atlassian.bamboo.specs.util.BambooServer; + +/** + * Plan configuration for Bamboo. + * Learn more on: https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs + */ +@BambooSpec +public class PlanSpec { + + private static final String SERVICE_NAME = "ocr-service"; + + private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", ""); + + + /** + * Run main to publish plan on Bamboo + */ + public static void main(final String[] args) throws Exception { + //By default credentials are read from the '.credentials' file. + BambooServer bambooServer = new BambooServer("http://localhost:8085"); + + Plan plan = new PlanSpec().createPlan(); + bambooServer.publish(plan); + PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier()); + bambooServer.publish(planPermission); + + Plan secPlan = new PlanSpec().createSecBuild(); + bambooServer.publish(secPlan); + PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier()); + bambooServer.publish(secPlanPermission); + } + + + private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) { + + Permissions permission = new Permissions().userPermissions("atlbamboo", + PermissionType.EDIT, + PermissionType.VIEW, + PermissionType.ADMIN, + PermissionType.CLONE, + PermissionType.BUILD) + .groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD) + .groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD) + .loggedInUserPermissions(PermissionType.VIEW) + .anonymousUserPermissionView(); + return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission); + } + + + private Project project() { + + return new Project().name("RED").key(new BambooKey("RED")); + } + + + public Plan createPlan() { + + return new Plan(project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)).description("Plan created from (enter repository url of your plan)") + .variables(new Variable("maven_add_param", "")) + .stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean") + .inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"), + new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()), + new ScriptTask().description("Build").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh").argument(SERVICE_NAME), + createJUnitParserTask().description("Resultparser") + .resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml") + .enabled(true), + new InjectVariablesTask().description("Inject git Tag").path("git.tag").namespace("g").scope(InjectVariablesScope.LOCAL), + new VcsTagTask().description("${bamboo.g.gitTag}").tagName("${bamboo.g.gitTag}").defaultRepository()) + .dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim") + .volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml") + .volume("/var/run/docker.sock", "/var/run/docker.sock")))) + .linkedRepositories("RED / " + SERVICE_NAME) + .triggers(new BitbucketServerTrigger()) + .planBranchManagement(new PlanBranchManagement().createForVcsBranch() + .delete(new BranchCleanup().whenInactiveInRepositoryAfterDays(14)) + .notificationForCommitters()); + } + + + public Plan createSecBuild() { + + return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan") + .stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean") + .inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"), + new VcsCheckoutTask().description("Checkout Default Repository").checkoutItems(new CheckoutItem().defaultRepository()), + new ScriptTask().description("Sonar").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh").argument(SERVICE_NAME)) + .dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0") + .dockerRunArguments("--net=host") + .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml") + .volume("/var/run/docker.sock", "/var/run/docker.sock")))) + .linkedRepositories("RED / " + SERVICE_NAME) + .triggers(new ScheduledTrigger().scheduleOnceDaily(LocalTime.of(23, 00))) + .planBranchManagement(new PlanBranchManagement().createForVcsBranchMatching("release.*").notificationForCommitters()); + } + +} diff --git a/bamboo-specs/src/main/resources/scripts/build-java.sh b/bamboo-specs/src/main/resources/scripts/build-java.sh new file mode 100755 index 0000000..59daedc --- /dev/null +++ b/bamboo-specs/src/main/resources/scripts/build-java.sh @@ -0,0 +1,58 @@ +#!/bin/bash +set -e + +SERVICE_NAME=$1 + +if [[ "$bamboo_planRepository_branchName" == "master" ]] +then + branchVersion=$(cat pom.xml | grep -Eo ".*" | sed -s 's|\(.*\)\..*\(-*.*\)|\1|') + latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1) + newVersion="$(semver $latestVersion -p -i minor)" +elif [[ "$bamboo_planRepository_branchName" == release* ]] +then + branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|') + latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1) + newVersion="$(semver $latestVersion -p -i patch)" +elif [[ "${bamboo_version_tag}" != "dev" ]] +then + newVersion="${bamboo_version_tag}" +else + mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + --no-transfer-progress \ + ${bamboo_maven_add_param} \ + clean install \ + -Djava.security.egd=file:/dev/./urandomelse + echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag + exit 0 +fi + +echo "gitTag=${newVersion}" > git.tag + +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + versions:set \ + -DnewVersion=${newVersion} + +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \ + versions:set \ + -DnewVersion=${newVersion} + +mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + --no-transfer-progress \ + clean deploy \ + ${bamboo_maven_add_param} \ + -e \ + -DdeployAtEnd=true \ + -Dmaven.wagon.http.ssl.insecure=true \ + -Dmaven.wagon.http.ssl.allowall=true \ + -Dmaven.wagon.http.ssl.ignore.validity.dates=true \ + -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases + +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \ + package + +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \ + docker:push \ No newline at end of file diff --git a/bamboo-specs/src/main/resources/scripts/sonar-java.sh b/bamboo-specs/src/main/resources/scripts/sonar-java.sh new file mode 100755 index 0000000..b7ade4d --- /dev/null +++ b/bamboo-specs/src/main/resources/scripts/sonar-java.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -e + +SERVICE_NAME=$1 + +echo "build jar binaries" +mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + --no-transfer-progress \ + clean install \ + -Djava.security.egd=file:/dev/./urandomelse + +echo "dependency-check:aggregate" +mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + org.owasp:dependency-check-maven:aggregate + +if [[ -z "${bamboo_repository_pr_key}" ]] +then + echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}" + mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + sonar:sonar \ + -Dsonar.projectKey=RED_$SERVICE_NAME \ + -Dsonar.host.url=https://sonarqube.iqser.com \ + -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ + -Dsonar.branch.name=${bamboo_planRepository_1_branch} \ + -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \ + -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \ + -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html +else + echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}" + mvn --no-transfer-progress \ + -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \ + sonar:sonar \ + -Dsonar.projectKey=RED_$SERVICE_NAME \ + -Dsonar.host.url=https://sonarqube.iqser.com \ + -Dsonar.login=${bamboo_sonarqube_api_token_secret} \ + -Dsonar.pullrequest.key=${bamboo_repository_pr_key} \ + -Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \ + -Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \ + -Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \ + -Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \ + -Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html +fi \ No newline at end of file diff --git a/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java new file mode 100644 index 0000000..6e1b615 --- /dev/null +++ b/bamboo-specs/src/test/java/buildjob/PlanSpecTest.java @@ -0,0 +1,22 @@ +package buildjob; + +import org.junit.Test; + +import com.atlassian.bamboo.specs.api.builders.plan.Plan; +import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException; +import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders; + +public class PlanSpecTest { + + @Test + public void checkYourPlanOffline() throws PropertiesValidationException { + + Plan plan = new PlanSpec().createPlan(); + EntityPropertiesBuilders.build(plan); + + Plan secPlan = new PlanSpec().createSecBuild(); + EntityPropertiesBuilders.build(secPlan); + + } + +} \ No newline at end of file diff --git a/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz b/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz new file mode 100644 index 0000000..1fee039 Binary files /dev/null and b/ocr-service-image-v1/libs/pdftron/OCRModuleLinux.tar.gz differ diff --git a/ocr-service-image-v1/pom.xml b/ocr-service-image-v1/pom.xml new file mode 100644 index 0000000..3d6d18d --- /dev/null +++ b/ocr-service-image-v1/pom.xml @@ -0,0 +1,118 @@ + + + + com.iqser.red + platform-docker-dependency + 1.2.0 + + + 4.0.0 + + ocr-service-image-v1 + com.iqser.red.service + 1.0-SNAPSHOT + pom + + + ocr-service-server-v1 + ${service.server}.jar + false + ${docker.image.prefix}/${service.server} + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + org.apache.maven.plugins + maven-resources-plugin + + + org.codehaus.mojo + exec-maven-plugin + + + io.fabric8 + docker-maven-plugin + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + download-platform-jar + prepare-package + + copy + + + + + ${project.groupId} + ${service.server} + ${version} + jar + true + ${platform.jar} + + + ${docker.build.directory} + + + + + + io.fabric8 + docker-maven-plugin + + + + ${docker.image.name} + + ${docker.build.directory} + + ${platform.jar} + + + ${docker.image.version} + latest + + + + + + + + maven-resources-plugin + + + copy-resources + prepare-package + + copy-resources + + + ${basedir}/target/build/libs/ + + + libs + false + + + + + + + + + + diff --git a/ocr-service-image-v1/src/main/docker/Dockerfile b/ocr-service-image-v1/src/main/docker/Dockerfile new file mode 100644 index 0000000..9efc0e2 --- /dev/null +++ b/ocr-service-image-v1/src/main/docker/Dockerfile @@ -0,0 +1,14 @@ +FROM red/base-image:2.0.0 + +COPY "libs/pdftron/OCRModuleLinux.tar.gz" . +RUN tar xvzf OCRModuleLinux.tar.gz +RUN mkdir /OCRModule +RUN mv Lib/* /OCRModule/ + +ARG PLATFORM_JAR + +ENV PLATFORM_JAR ${PLATFORM_JAR} + +ENV USES_ELASTICSEARCH false + +COPY ["${PLATFORM_JAR}", "/"] diff --git a/ocr-service-v1/ocr-service-api-v1/pom.xml b/ocr-service-v1/ocr-service-api-v1/pom.xml new file mode 100644 index 0000000..783322f --- /dev/null +++ b/ocr-service-v1/ocr-service-api-v1/pom.xml @@ -0,0 +1,68 @@ + + + 4.0.0 + + + com.iqser.red.service + ocr-service-v1 + 1.0-SNAPSHOT + + + ocr-service-api-v1 + 1.0-SNAPSHOT + + + 1.269.0 + 3.155.0 + 1.9.9 + + + + + + + com.dslplatform + dsl-json-java8 + ${dsljson.version} + + + + + + io.github.openfeign + feign-core + true + + + + org.springframework + spring-web + + + + com.iqser.red.service + persistence-service-api-v1 + + + com.iqser.red.service + redaction-service-api-v1 + + + ${persistence-service.version} + + + com.iqser.red.service + redaction-service-api-v1 + + + com.iqser.red.service + persistence-service-api-v1 + + + ${redaction-service.version} + + + + diff --git a/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java new file mode 100644 index 0000000..421d7b8 --- /dev/null +++ b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/DocumentRequest.java @@ -0,0 +1,16 @@ +package com.iqser.red.service.ocr.v1.api.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.SuperBuilder; + +@Data +@NoArgsConstructor +@AllArgsConstructor +public class DocumentRequest { + + protected String dossierId; + protected String fileId; + +} diff --git a/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java new file mode 100644 index 0000000..24aa315 --- /dev/null +++ b/ocr-service-v1/ocr-service-api-v1/src/main/java/com/iqser/red/service/ocr/v1/api/model/OCRStatusUpdateResponse.java @@ -0,0 +1,19 @@ +package com.iqser.red.service.ocr.v1.api.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class OCRStatusUpdateResponse { + + private String fileId; + private int numberOfPagesToOCR; + private int numberOfOCRedPages; + private boolean ocrFinished; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/pom.xml b/ocr-service-v1/ocr-service-server-v1/pom.xml new file mode 100644 index 0000000..8085f1e --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/pom.xml @@ -0,0 +1,145 @@ + + + 4.0.0 + + + com.iqser.red.service + ocr-service-v1 + 1.0-SNAPSHOT + + + ocr-service-server-v1 + 1.0-SNAPSHOT + + + + com.iqser.red.service + ocr-service-api-v1 + ${project.version} + + + + com.iqser.red.commons + storage-commons + + + com.iqser.red.commons + spring-commons + + + com.iqser.red.commons + metric-commons + + + org.springframework.cloud + spring-cloud-starter-openfeign + + + com.pdftron + PDFNet + 9.4.0 + + + org.springframework.boot + spring-boot-starter-amqp + 2.3.1.RELEASE + + + com.amazonaws + aws-java-sdk-kms + 1.12.158 + + + com.google.guava + guava + + + + + com.iqser.red.commons + test-commons + test + + + org.springframework.amqp + spring-rabbit-test + 2.3.1 + test + + + org.springframework.boot + spring-boot-starter-test + + + org.springframework.boot + spring-boot-starter-tomcat + + + + + org.apache.commons + commons-lang3 + 3.12.0 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + lombok.launch.AnnotationProcessorHider$AnnotationProcessor + com.dslplatform.json.processor.CompiledJsonAnnotationProcessor + + + + + + pl.project13.maven + git-commit-id-plugin + + + + revision + + + true + + true + + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + + repackage + + + true + + + + + + + + + + pdftron + PDFNet Maven + https://pdftron.com/maven/release + + + + + diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java new file mode 100644 index 0000000..1150ef1 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/Application.java @@ -0,0 +1,45 @@ +package com.iqser.red.service.ocr.v1.server; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.cloud.openfeign.EnableFeignClients; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Import; +import org.springframework.scheduling.annotation.EnableAsync; + +import com.iqser.red.commons.spring.DefaultWebMvcConfiguration; +import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient; +import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; +import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings; + +import io.micrometer.core.aop.TimedAspect; +import io.micrometer.core.instrument.MeterRegistry; + +@EnableAsync +@EnableConfigurationProperties(OcrServiceSettings.class) +@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class}) +@Import({DefaultWebMvcConfiguration.class, MessagingConfiguration.class}) +@EnableFeignClients(basePackageClasses = FileStatusProcessingUpdateClient.class) +public class Application { + + /** + * Entry point to the service application. + * + * @param args Any command line parameter given upon startup. + */ + public static void main(String[] args) { + + SpringApplication.run(Application.class, args); + } + + + @Bean + public TimedAspect timedAspect(MeterRegistry registry) { + + return new TimedAspect(registry); + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java new file mode 100644 index 0000000..b0c51e8 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/client/FileStatusProcessingUpdateClient.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.ocr.v1.server.client; + +import org.springframework.cloud.openfeign.FeignClient; + +import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource; + +@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}") +public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource { + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java new file mode 100644 index 0000000..2c298dd --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/configuration/MessagingConfiguration.java @@ -0,0 +1,42 @@ +package com.iqser.red.service.ocr.v1.server.configuration; + +import org.springframework.amqp.core.Queue; +import org.springframework.amqp.core.QueueBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import lombok.RequiredArgsConstructor; + +@Configuration +@RequiredArgsConstructor +public class MessagingConfiguration { + + public static final String OCR_QUEUE = "ocrQueue"; + public static final String OCR_DLQ = "ocrDLQ"; + + public static final String X_DEAD_LETTER_EXCHANGE = "x-dead-letter-exchange"; + public static final String X_DEAD_LETTER_ROUTING_KEY = "x-dead-letter-routing-key"; + public static final String X_MAX_PRIORITY = "x-max-priority"; + + public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue"; + + + @Bean + public Queue ocrQueue() { + + return QueueBuilder.durable(OCR_QUEUE) + .withArgument(X_DEAD_LETTER_EXCHANGE, "") + .withArgument(X_DEAD_LETTER_ROUTING_KEY, OCR_DLQ) + .withArgument(X_MAX_PRIORITY, 2) + .maxPriority(2) + .build(); + } + + + @Bean + public Queue ocrDeadLetterQueue() { + + return QueueBuilder.durable(OCR_DLQ).build(); + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java new file mode 100644 index 0000000..babf7b3 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/initializer/PDFNetInitializer.java @@ -0,0 +1,35 @@ +package com.iqser.red.service.ocr.v1.server.initializer; + +import javax.annotation.PostConstruct; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import com.pdftron.pdf.PDFNet; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; + +@Component +@RequiredArgsConstructor +public class PDFNetInitializer { + + @Value("${pdftron.license:}") + private String pdftronLicense; + + @Value("${pdftron.ocrmodule.path:/tmp}") + private String ocrModulePath; + + + @SneakyThrows + @PostConstruct + // Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError. + public void init() { + + PDFNet.setTempPath("/tmp/pdftron"); + PDFNet.addResourceSearchPath(ocrModulePath); + PDFNet.initialize(pdftronLicense); + + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java new file mode 100644 index 0000000..72fa107 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/ImagePosition.java @@ -0,0 +1,15 @@ +package com.iqser.red.service.ocr.v1.server.model; + +import com.iqser.red.service.redaction.v1.model.Rectangle; + +import lombok.AllArgsConstructor; +import lombok.Data; + +@Data +@AllArgsConstructor +public class ImagePosition { + + private Rectangle rectangle; + private boolean hasTransparency; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java new file mode 100644 index 0000000..15c018f --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Classification.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import java.util.HashMap; +import java.util.Map; + +import lombok.Data; + +@Data +public class Classification { + + private Map probabilities = new HashMap<>(); + private String label; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java new file mode 100644 index 0000000..19d54b8 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/FilterGeometry.java @@ -0,0 +1,11 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class FilterGeometry { + + private ImageSize imageSize; + private ImageFormat imageFormat; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java new file mode 100644 index 0000000..31143c7 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Filters.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class Filters { + + private FilterGeometry geometry; + private Probability probability; + private boolean allPassed; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java new file mode 100644 index 0000000..9e61053 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Geometry.java @@ -0,0 +1,11 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class Geometry { + + private float width; + private float height; + +} \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java new file mode 100644 index 0000000..94424a0 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageFormat.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class ImageFormat { + + private float quotient; + private boolean tooTall; + private boolean tooWide; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java new file mode 100644 index 0000000..5a4c6c6 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageMetadata.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class ImageMetadata { + + private Classification classification; + private Position position; + private Geometry geometry; + private Filters filters; + private boolean alpha; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java new file mode 100644 index 0000000..4bc072e --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageServiceResponse.java @@ -0,0 +1,31 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import java.util.ArrayList; +import java.util.List; + +import com.dslplatform.json.CompiledJson; +import com.dslplatform.json.JsonAttribute; +import com.fasterxml.jackson.annotation.JsonAlias; +import com.fasterxml.jackson.annotation.JsonProperty; + +import lombok.Data; + +@Data +@CompiledJson +public class ImageServiceResponse { + + private String dossierId; + private String fileId; + + @JsonProperty(value = "imageMetadata") + @JsonAlias("data") + @JsonAttribute(alternativeNames = {"imageMetadata"}) + private List data = new ArrayList<>(); + + + @JsonProperty(value = "imageMetadata") + @JsonAlias("data") + @JsonAttribute(alternativeNames = {"imageMetadata"}) + public void setData(List data) {this.data = data;} + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java new file mode 100644 index 0000000..ecc740f --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/ImageSize.java @@ -0,0 +1,12 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class ImageSize { + + private float quotient; + private boolean tooLarge; + private boolean tooSmall; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java new file mode 100644 index 0000000..b4feb05 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Position.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class Position { + + private float x1; + private float x2; + private float y1; + private float y2; + private int pageNumber; + +} \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java new file mode 100644 index 0000000..30173a7 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/model/image/Probability.java @@ -0,0 +1,10 @@ +package com.iqser.red.service.ocr.v1.server.model.image; + +import lombok.Data; + +@Data +public class Probability { + + private boolean unconfident; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java new file mode 100644 index 0000000..fa8d6e3 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/FileStorageService.java @@ -0,0 +1,69 @@ +package com.iqser.red.service.ocr.v1.server.service; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; + +import org.apache.commons.io.IOUtils; +import org.springframework.stereotype.Service; + +import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class FileStorageService { + + private final StorageService storageService; + + + public static String getStorageId(String dossierId, String fileId, FileType fileType) { + + return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension(); + } + + + @SneakyThrows + public byte[] getOriginalFile(String dossierId, String fileId) { + + return IOUtils.toByteArray(storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream()); + } + + + @SneakyThrows + public InputStream getOriginalFileAsStream(String dossierId, String fileId) { + + return storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream(); + } + + + public void storeOriginalFile(String dossierId, String fileId, InputStream stream) { + + storageService.storeObject(getStorageId(dossierId, fileId, FileType.ORIGIN), stream); + } + + + public boolean untouchedFileExists(String dossierId, String fileId) { + + return storageService.objectExists(getStorageId(dossierId, fileId, FileType.UNTOUCHED)); + } + + + public void storeUntouchedFile(String dossierId, String fileId, byte[] data) { + + storageService.storeObject(getStorageId(dossierId, fileId, FileType.UNTOUCHED), new ByteArrayInputStream(data)); + } + + + @SneakyThrows + public ImageServiceResponse getImageServiceResponse(String dossierId, String fileId) { + + return storageService.readJSONObject(getStorageId(dossierId, fileId, FileType.IMAGE_INFO), ImageServiceResponse.class); + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java new file mode 100644 index 0000000..1b00f0c --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java @@ -0,0 +1,325 @@ +package com.iqser.red.service.ocr.v1.server.service; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.io.IOUtils; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse; +import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; +import com.iqser.red.service.ocr.v1.server.model.ImagePosition; +import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse; +import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings; +import com.iqser.red.service.persistence.service.v1.api.utils.SuppressFBWarnings; +import com.iqser.red.service.redaction.v1.model.Point; +import com.iqser.red.service.redaction.v1.model.Rectangle; +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.Element; +import com.pdftron.pdf.ElementReader; +import com.pdftron.pdf.ElementWriter; +import com.pdftron.pdf.OCRModule; +import com.pdftron.pdf.OCROptions; +import com.pdftron.pdf.Optimizer; +import com.pdftron.pdf.PDFDoc; +import com.pdftron.pdf.Page; +import com.pdftron.pdf.PageIterator; +import com.pdftron.pdf.Rect; +import com.pdftron.pdf.RectCollection; +import com.pdftron.sdf.Obj; +import com.pdftron.sdf.SDFDoc; + +import io.micrometer.core.annotation.Timed; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class OCRService { + + public static final String ENGLISH = "eng"; + + private final FileStorageService fileStorageService; + private final OcrServiceSettings settings; + + private final RabbitTemplate rabbitTemplate; + + private final ObjectMapper objectMapper; + + + @Timed("redactmanager_PDFTron-ocrDocument") + @SneakyThrows + public InputStream ocrDocument(String dossierId, String fileId) { + + var fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId); + + var imageServiceResponse = fileStorageService.getImageServiceResponse(dossierId, fileId); + + var fileBytes = IOUtils.toByteArray(fileStream); + + var ocrBytes = ocr(fileBytes, fileId, imageServiceResponse); + + return new ByteArrayInputStream(ocrBytes); + + } + + + @SuppressFBWarnings("REC_CATCH_EXCEPTION") + private byte[] ocr(byte[] file, String fileId, ImageServiceResponse imageServiceResponse) { + + PDFDoc pdfDoc = null; + try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { + pdfDoc = new PDFDoc(file); + removeInvisibleText(pdfDoc); + + Map> pages = new HashMap<>(); + + // TODO take logic to ignore small and combine images from image-service. + // TODO Then replace logic so ocr-service is independent from image-service. + imageServiceResponse.getData() + .forEach(imageMetadata -> pages.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>()) + .add(new ImagePosition(new Rectangle(new Point(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1()), + imageMetadata.getGeometry().getWidth(), + imageMetadata.getGeometry().getHeight(), + imageMetadata.getPosition().getPageNumber()), imageMetadata.isAlpha()))); + + Map pdfDocMap = Collections.synchronizedMap(new HashMap<>()); + + rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, + objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(pages.keySet().size()).build())); + + ocrPages(pdfDoc, fileId, pages, pdfDocMap); + + for (var entry : pdfDocMap.entrySet()) { + + var ocrDoc = entry.getValue(); + var page = entry.getKey(); + + Page ocrPage = ocrDoc.getPageIterator(1).next(); + pdfDoc.pageInsert(pdfDoc.getPageIterator(page), ocrPage); + pdfDoc.pageRemove(pdfDoc.getPageIterator(page + 1)); + + ocrDoc.close(); + } + + Optimizer.optimize(pdfDoc); + pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null); + pdfDoc.close(); + + rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, + objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder() + .fileId(fileId) + .numberOfPagesToOCR(pages.keySet().size()) + .numberOfOCRedPages(pages.keySet().size()) + .ocrFinished(true) + .build())); + + return out.toByteArray(); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + if (pdfDoc != null) { + try { + pdfDoc.close(); + } catch (Exception e) { + log.debug("Failed to close document", e); + } + } + } + } + + + @SneakyThrows + private void ocrPages(PDFDoc pdfDoc, String fileId, Map> pages, Map pdfDocMap) { + + int numberOfOCRedPages = 0; + for (var pageEntry : pages.entrySet()) { + + try { + RectCollection rectCollection = new RectCollection(); + + var page = pageEntry.getKey(); + + Page pdfPage = pdfDoc.getPageIterator(page).next(); + + pdfPage.setMediaBox(pdfPage.getCropBox()); + + for (ImagePosition imagePosition : pageEntry.getValue()) { + Rectangle rectangle = imagePosition.getRectangle(); + + // Warning coordinate system is different in this call macOs/Linux + double y = -rectangle.getTopLeft().getY() + pdfPage.getCropBox().getY2() - rectangle.getHeight(); + rectCollection.addRect(rectangle.getTopLeft().getX(), y, rectangle.getTopLeft().getX() + rectangle.getWidth(), y + rectangle.getHeight()); + } + + PDFDoc ocrDoc = new PDFDoc(); + ocrDoc.pagePushBack(pdfPage); + pdfDocMap.put(pageEntry.getKey(), ocrDoc); + + OCROptions options = new OCROptions(); + options.addTextZonesForPage(rectCollection, 1); + options.addLang(ENGLISH); + options.addDPI(settings.getOcrDPI()); + OCRModule.processPDF(ocrDoc, options); + + rectCollection.clear(); + + } catch (Exception e) { + log.warn("Failed to process PDF page {} - {}", pageEntry.getKey(), e); + } + + rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, + objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder() + .fileId(fileId) + .numberOfPagesToOCR(pages.keySet().size()) + .numberOfOCRedPages(++numberOfOCRedPages) + .build())); + + log.warn("Done page {}", pageEntry); + + } + } + + + /** + * There are 2 possibilities to have invisible Text in pdfs. + * 1. gState is set to invisible, this is ocr text. + * 2. Filled Path elements in front of the text. + */ + @SneakyThrows + private void removeInvisibleText(PDFDoc pdfDoc) { + + ElementWriter writer = new ElementWriter(); + ElementReader reader = new ElementReader(); + Set visited = new TreeSet<>(); + + for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) { + Page page = iterator.next(); + removeOverlapText(page, reader, writer, visited); + } + } + + + @SneakyThrows + private void removeOverlapText(Page page, ElementReader reader, ElementWriter writer, Set visited) { + + visited.add((int) page.getSDFObj().getObjNum()); + reader.begin(page); + writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); + processElements(reader, writer, visited, false); + writer.end(); + reader.end(); + } + + + @SneakyThrows + private void processElements(ElementReader reader, ElementWriter writer, Set visited, boolean isInForm) { + + Set filledRectangles = new HashSet<>(); + for (Element element = reader.next(); element != null; element = reader.next()) + + switch (element.getType()) { + case Element.e_image: + case Element.e_inline_image: + processImage(element, writer, isInForm); + break; + + case Element.e_text: + processText(element, writer, filledRectangles); + break; + + case Element.e_path: + processPath(element, writer, filledRectangles); + break; + + case Element.e_form: + processForm(reader, writer, element, visited); + break; + + default: + writer.writeElement(element); + } + } + + + @SneakyThrows + private void processImage(Element element, ElementWriter writer, boolean isInForm) { + + if (!isInForm || !settings.isRemoveWatermark()) { + writer.writeElement(element); + } + } + + + @SneakyThrows + private void processText(Element element, ElementWriter writer, Set filledRectangles) { + + if (element.getBBox() == null) { + writer.writeElement(element); + return; + } + + double x = element.getBBox().getX1(); + double y = element.getBBox().getY1(); + boolean filledRectangleIntersection = filledRectangles.stream().anyMatch(r -> { + try { + return r.contains(x, y); + } catch (PDFNetException e) { + throw new RuntimeException("Internal pdftron error during removal of overlap text", e); + } + }); + + var gState = element.getGState(); + + //See PDF Reference 5.3 Text rendering modes, 3 = Invisible, however this ocr does not use it. + if (!filledRectangleIntersection && gState.getTextRenderMode() != 3) { + writer.writeElement(element); + } + } + + + @SneakyThrows + private void processPath(Element element, ElementWriter writer, Set filledRectangles) { + + if (element.getPathData() != null && element.getPathData().getPoints().length > 4) { + filledRectangles.add(element.getBBox()); + } + writer.writeElement(element); + } + + + @SneakyThrows + private void processForm(ElementReader reader, ElementWriter writer, Element element, Set visited) { + + writer.writeElement(element); + Obj formObj = element.getXObject(); + + if (!visited.contains((int) formObj.getObjNum())) { + visited.add((int) formObj.getObjNum()); + ElementWriter new_writer = new ElementWriter(); + reader.formBegin(); + new_writer.begin(formObj); + + reader.clearChangeList(); + new_writer.setDefaultGState(reader); + + processElements(reader, new_writer, visited, true); + new_writer.end(); + reader.end(); + } + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java new file mode 100644 index 0000000..772ffb8 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OcrMessageReceiver.java @@ -0,0 +1,79 @@ +package com.iqser.red.service.ocr.v1.server.service; + +import org.springframework.amqp.AmqpRejectAndDontRequeueException; +import org.springframework.amqp.rabbit.annotation.RabbitHandler; +import org.springframework.amqp.rabbit.annotation.RabbitListener; +import org.springframework.http.HttpStatus; +import org.springframework.stereotype.Service; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient; +import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration; +import com.iqser.red.service.ocr.v1.api.model.DocumentRequest; + +import feign.FeignException; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +@RequiredArgsConstructor +public class OcrMessageReceiver { + + private final ObjectMapper objectMapper; + private final FileStorageService fileStorageService; + private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient; + + private final OCRService ocrService; + + + @RabbitHandler + @RabbitListener(queues = MessagingConfiguration.OCR_QUEUE, concurrency = "1") + public void receiveOcr(String in) throws JsonProcessingException { + + DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class); + + long start = System.currentTimeMillis(); + log.info("Start ocr for file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + + setStatusOcrProcessing(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + + if (!fileStorageService.untouchedFileExists(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId())) { + byte[] originalFile = fileStorageService.getOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + fileStorageService.storeUntouchedFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), originalFile); + } + + var ocrResult = ocrService.ocrDocument(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + + fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), ocrResult); + + long end = System.currentTimeMillis(); + log.info("Successfully processed ocr for file with dossierId {} and fileId {}, took {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), end - start); + + fileStatusProcessingUpdateClient.ocrSuccessful(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + } + + + @RabbitHandler + @RabbitListener(queues = MessagingConfiguration.OCR_DLQ, concurrency = "1") + public void receiveOcrDQL(String in) throws JsonProcessingException { + + DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class); + log.info("OCR DQL received: {}", ocrRequestMessage); + fileStatusProcessingUpdateClient.ocrFailed(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId()); + } + + + private void setStatusOcrProcessing(String dossierId, String fileId) { + + try { + fileStatusProcessingUpdateClient.ocrProcessing(dossierId, fileId); + } catch (FeignException e) { + if (e.status() == HttpStatus.CONFLICT.value()) { + throw new AmqpRejectAndDontRequeueException(e.getMessage()); + } + } + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java new file mode 100644 index 0000000..f1f60e3 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/settings/OcrServiceSettings.java @@ -0,0 +1,14 @@ +package com.iqser.red.service.ocr.v1.server.settings; + +import org.springframework.boot.context.properties.ConfigurationProperties; + +import lombok.Data; + +@Data +@ConfigurationProperties("ocr-service") +public class OcrServiceSettings { + + private int ocrDPI = 300; + private boolean removeWatermark; + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json b/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json new file mode 100644 index 0000000..899d259 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/META-INF/additional-spring-configuration-metadata.json @@ -0,0 +1,9 @@ +{ + "properties": [ + { + "name": "persistence-service.url", + "type": "java.lang.String", + "description": "URL of the persistence service." + } + ] +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml new file mode 100644 index 0000000..12a5a2d --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application-dev.yaml @@ -0,0 +1,10 @@ +server: + port: 8086 + +persistence-service.url: "http://persistence-service-v1:8080" + +storage: + bucket-name: 'redaction' + endpoint: 'http://localhost:9000' + key: minioadmin + secret: minioadmin diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml new file mode 100644 index 0000000..a838588 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/application.yml @@ -0,0 +1,49 @@ +info: + description: OCR Service V1 Server + +persistence-service.url: "http://persistence-service-v1:8080" + +server: + port: 8080 + +spring: + main: + allow-circular-references: true # FIXME + profiles: + active: kubernetes + rabbitmq: + host: ${RABBITMQ_HOST:localhost} + port: ${RABBITMQ_PORT:5672} + username: ${RABBITMQ_USERNAME:user} + password: ${RABBITMQ_PASSWORD:rabbitmq} + listener: + simple: + acknowledge-mode: AUTO + concurrency: 2 + retry: + enabled: true + max-attempts: 3 + max-interval: 15000 + prefetch: 1 + +platform.multi-tenancy: + enabled: false + + +management: + endpoint: + metrics.enabled: ${monitoring.enabled:false} + prometheus.enabled: ${monitoring.enabled:false} + health.enabled: true + endpoints.web.exposure.include: prometheus, health + metrics.export.prometheus.enabled: ${monitoring.enabled:false} + + +storage: + signer-type: 'AWSS3V4SignerType' + bucket-name: 'redaction' + region: 'us-east-1' + endpoint: 'https://s3.amazonaws.com' + backend: 's3' + +pdftron.license: ${PDFTRON_LICENSE} \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt b/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt new file mode 100644 index 0000000..1e16e74 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/banner.txt @@ -0,0 +1,22 @@ +------------------------------------------------------------------ +| | +| OCR Service V1 Server | +| | + ________________________________________________________________ +| | +| ___ ________ ________ _________ _________ | +| | | / \ / || || \ | +| | | / ____ \ / _____|| _____||______ \ | +| | | / / \ \| / | | \ | | +| | || | | || \____ | |____ ______/ | | +| | || | | || \ | | | | | +| | || | ___| | \_____ || ____| | _ _/ | +| | || | \ \ | \ || | | | \ \ | +| | | \ \_ \ / ______/ || |_____ | | \ \ | +| | | \ \ \ \ | /| || | \ \ | +| |___| \____\ \___\|_________/ |_________||___| \___\ | +| | +| | +| F r o m d a t a t o i n f o r m a t i o n | +| | +|________________________________________________________________| \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml b/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml new file mode 100644 index 0000000..d210e44 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/main/resources/bootstrap.yml @@ -0,0 +1,7 @@ +spring: + application: + name: ocr-service-v1 + +management.endpoints: + web.base-path: / + enabled-by-default: false \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java new file mode 100644 index 0000000..f19d07e --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/OcrServiceIntegrationTest.java @@ -0,0 +1,112 @@ +package com.iqser.red.service.ocr.v1.server; + +import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory; +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.File; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.springframework.amqp.rabbit.core.RabbitTemplate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.context.annotation.Primary; +import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService; +import com.iqser.red.service.ocr.v1.server.service.OCRService; +import com.iqser.red.service.ocr.v1.server.service.FileStorageService; +import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType; +import com.iqser.red.storage.commons.StorageAutoConfiguration; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +@ExtendWith(SpringExtension.class) +@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT // + , properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"}) +@Import(OcrServiceIntegrationTest.TestConfiguration.class) +public class OcrServiceIntegrationTest { + + @Autowired + protected StorageService storageService; + + @Autowired + protected FileStorageService fileStorageService; + + @Autowired + protected ObjectMapper objectMapper; + + @MockBean + protected RabbitTemplate rabbitTemplate; + + @Autowired + private OCRService ocrService; + + + @Test + @Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top. + @SneakyThrows + public void testOCR() { + + String fileName = "Watermark"; + + ClassPathResource imageInfoResource = new ClassPathResource("files/" + fileName + ".IMAGE_INFO.json"); + ClassPathResource pdfFileResource = new ClassPathResource("files/" + fileName + ".pdf"); + + var originId = FileStorageService.getStorageId("dossier", "file", FileType.ORIGIN); + storageService.storeObject(originId, pdfFileResource.getInputStream()); + + var imageId = FileStorageService.getStorageId("dossier", "file", FileType.IMAGE_INFO); + storageService.storeObject(imageId, imageInfoResource.getInputStream()); + + var response = ocrService.ocrDocument("dossier", "file"); + + var out = FileUtils.openOutputStream(new File(getTemporaryDirectory() + "/" + fileName + ".pdf")); + IOUtils.copy(response, out); + } + + + @SneakyThrows + public void dummyTest() { + + // Build needs one text to not fail. + assertThat(1).isEqualTo(1); + } + + + @AfterEach + public void cleanupStorage() { + + if (this.storageService instanceof FileSystemBackedStorageService) { + ((FileSystemBackedStorageService) this.storageService).clearStorage(); + } + } + + + @Configuration + @EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class}) + public static class TestConfiguration { + + @Bean + @Primary + public StorageService inmemoryStorage() { + + return new FileSystemBackedStorageService(); + } + + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java new file mode 100644 index 0000000..6016b05 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/FileSystemBackedStorageService.java @@ -0,0 +1,125 @@ +package com.iqser.red.service.ocr.v1.server.utils; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.io.IOUtils; +import org.springframework.core.io.InputStreamResource; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.iqser.red.commons.jackson.ObjectMapperFactory; +import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist; +import com.iqser.red.storage.commons.service.StorageService; + +import lombok.SneakyThrows; + +public class FileSystemBackedStorageService implements StorageService { + + private final Map dataMap = new HashMap<>(); + + + public FileSystemBackedStorageService() { + + } + + + @SneakyThrows + @Override + public InputStreamResource getObject(String objectId) { + + var res = dataMap.get(objectId); + if (res == null) { + throw new StorageObjectDoesNotExist(new RuntimeException()); + } + return new InputStreamResource(new FileInputStream(res)); + + } + + + @Override + public void deleteObject(String objectId) { + + dataMap.remove(objectId); + } + + + @Override + public boolean objectExists(String objectId) { + + return dataMap.containsKey(objectId); + } + + + @Override + public void init() { + + } + + + @Override + @SneakyThrows + public void storeJSONObject(String objectId, T any) { + + File tempFile = File.createTempFile("test", ".tmp"); + getMapper().writeValue(new FileOutputStream(tempFile), any); + dataMap.put(objectId, tempFile); + } + + + private ObjectMapper getMapper() { + + return ObjectMapperFactory.create(); + } + + + @Override + @SneakyThrows + public T readJSONObject(String objectId, Class clazz) { + + if (dataMap.get(objectId) == null || !dataMap.get(objectId).exists()) { + throw new StorageObjectDoesNotExist("Stored object not found"); + } + return getMapper().readValue(new FileInputStream(dataMap.get(objectId)), clazz); + } + + + public List listPaths() { + + return new ArrayList<>(dataMap.keySet()); + } + + + public List listFilePaths() { + + return dataMap.values().stream().map(File::getAbsolutePath).collect(Collectors.toList()); + } + + + @Override + @SneakyThrows + public void storeObject(String objectId, InputStream stream) { + + File tempFile = File.createTempFile("test", ".tmp"); + + try (var fileOutputStream = new FileOutputStream(tempFile)) { + IOUtils.copy(stream, fileOutputStream); + } + + dataMap.put(objectId, tempFile); + } + + + public void clearStorage() { + + this.dataMap.forEach((k, v) -> v.delete()); + this.dataMap.clear(); + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java new file mode 100644 index 0000000..ccbff74 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/java/com/iqser/red/service/ocr/v1/server/utils/OsUtils.java @@ -0,0 +1,58 @@ +package com.iqser.red.service.ocr.v1.server.utils; + +import org.apache.commons.lang3.StringUtils; + +import lombok.SneakyThrows; + +public final class OsUtils { + + private static final String SERVICE_NAME = "pdftron-redaction-service-v1"; + + + private OsUtils() { + + throw new IllegalStateException("Utility class"); + } + + + @SneakyThrows + public static String getTemporaryDirectory(String suffix, String fileId) { + + return addBackSlashAtEnd(getTemporaryDirectory()) + addBackSlashAtEnd(SERVICE_NAME) + addBackSlashAtEnd(suffix) + addBackSlashAtEnd(fileId); + } + + + private static boolean isWindows() { + + return StringUtils.containsIgnoreCase(System.getProperty("os.name"), "Windows"); + } + + + private static String addBackSlashAtEnd(String s) { + + return removeSlashAtBegin(StringUtils.endsWithIgnoreCase(s, "\\") ? s : removeForwardSlashAtEnd(s) + "\\"); + } + + + private static String removeForwardSlashAtEnd(String s) { + + return StringUtils.endsWithIgnoreCase(s, "/") ? StringUtils.substring(s, s.length() - 1) : s; + } + + + private static String removeSlashAtBegin(String s) { + + return StringUtils.startsWithIgnoreCase(s, "/") || StringUtils.startsWithIgnoreCase(s, "\\") ? StringUtils.substring(s, 1) : s; + } + + + public static String getTemporaryDirectory() { + + String tmpdir = System.getProperty("java.io.tmpdir"); + if (isWindows() && StringUtils.isNotBlank(tmpdir)) { + return tmpdir; + } + return "/tmp"; + } + +} diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml b/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml new file mode 100644 index 0000000..18b57b2 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/application.yml @@ -0,0 +1,8 @@ +persistence-service.url: "http://persistence-service-v1:8080" + +spring: + main: + allow-circular-references: true # FIXME + + +pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json new file mode 100644 index 0000000..7c13b44 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/CropboxNotEqualToMediaBox.IMAGE_INFO.json @@ -0,0 +1 @@ +{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "917b9d9c9f548f85fef3679db45ff46c", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9927, "logo": 0.0038, "other": 0.0034, "formula": 0.0}}, "representation": "FFF2CF0F7C74FFC1070830FFF", "position": {"x1": -7, "x2": 603, "y1": 0, "y2": 852, "pageNumber": 1}, "geometry": {"width": 610, "height": 852}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 1.0096, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.716, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []} \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json new file mode 100644 index 0000000..ac6c30f --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/MediaBoxBiggerThanCropBox.IMAGE_INFO.json @@ -0,0 +1 @@ +{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "d7f1e0e37cba4e28ebdf894a79d3bd67", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9872, "logo": 0.0064, "other": 0.0063, "formula": 0.0001}}, "representation": "FFFCF10608F6F89747BFFC301", "position": {"x1": -9, "x2": 584, "y1": 9, "y2": 849, "pageNumber": 1}, "geometry": {"width": 593, "height": 840}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.9992, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.706, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []} \ No newline at end of file diff --git a/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json new file mode 100644 index 0000000..a4eb3b4 --- /dev/null +++ b/ocr-service-v1/ocr-service-server-v1/src/test/resources/files/Watermark.IMAGE_INFO.json @@ -0,0 +1 @@ +{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "32b19ec38896f5105c09041def470c90", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "logo", "probabilities": {"logo": 0.9999, "signature": 0.0001, "formula": 0.0, "other": 0.0}}, "representation": "307EF8F6E9833CE9D7AF9EFFF", "position": {"x1": 26, "x2": 586, "y1": -2, "y2": 794, "pageNumber": 1}, "geometry": {"width": 560, "height": 796}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.959, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.7035, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}, {"classification": {"label": "logo", "probabilities": {"logo": 1.0, "formula": 0.0, "other": 0.0, "signature": 0.0}}, "representation": "FFF7FFD2000000018F3FFEFFF", "position": {"x1": 90, "x2": 210, "y1": 676, "y2": 720, "pageNumber": 1}, "geometry": {"width": 120, "height": 44}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.1044, "tooLarge": false, "tooSmall": false}, "imageFormat": {"quotient": 2.7273, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": true}}], "dataCV": []} \ No newline at end of file diff --git a/ocr-service-v1/pom.xml b/ocr-service-v1/pom.xml new file mode 100644 index 0000000..b9e5d2e --- /dev/null +++ b/ocr-service-v1/pom.xml @@ -0,0 +1,95 @@ + + + 4.0.0 + + + com.iqser.red + platform-dependency + 1.14.0 + + + + com.iqser.red.service + ocr-service-v1 + 1.0-SNAPSHOT + + pom + + + ocr-service-api-v1 + ocr-service-server-v1 + + + + + + com.iqser.red + platform-commons-dependency + 1.20.0 + import + pom + + + + + + + + org.sonarsource.scanner.maven + sonar-maven-plugin + 3.9.0.2155 + + + org.owasp + dependency-check-maven + 6.3.1 + + ALL + + + + org.jacoco + jacoco-maven-plugin + + + prepare-agent + + prepare-agent + + + + report + + report + + + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.8 + + + prepare-agent + + prepare-agent + + + + report + + report-aggregate + + verify + + + + + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..8dfcae3 --- /dev/null +++ b/pom.xml @@ -0,0 +1,19 @@ + + + 4.0.0 + + com.iqser.red.service + ocr-service + 1.0-SNAPSHOT + + pom + + + bamboo-specs + ocr-service-v1 + ocr-service-image-v1 + + +