RED-4556: Implemented ocr standalone service

This commit is contained in:
deiflaender 2022-12-05 12:15:55 +01:00
parent 5a3dcde3ad
commit 998d69ba48
47 changed files with 1990 additions and 0 deletions

19
.dev/docker-compose.yaml Normal file
View File

@ -0,0 +1,19 @@
version: '2'
services:
rabbitmq:
image: 'rabbitmq:3.9-alpine'
mem_limit: 500m
environment:
- RABBITMQ_DEFAULT_USER=user
- RABBITMQ_DEFAULT_PASS=rabbitmq
ports:
- 5672:5672
- 15672:15672
minio:
mem_limit: 500m
image: 'minio/minio:latest'
command: server /entity
ports:
- 9000:9000

13
CHANGELOG.md Normal file
View File

@ -0,0 +1,13 @@
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]
### Fixed
### Added
### Changed
### Removed

37
bamboo-specs/pom.xml Normal file
View File

@ -0,0 +1,37 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs-parent</artifactId>
<version>8.1.3</version>
<relativePath/>
</parent>
<artifactId>bamboo-specs</artifactId>
<version>1.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs-api</artifactId>
</dependency>
<dependency>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs</artifactId>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- run 'mvn test' to perform offline validation of the plan -->
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
</project>

View File

@ -0,0 +1,125 @@
package buildjob;
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
import java.time.LocalTime;
import com.atlassian.bamboo.specs.api.BambooSpec;
import com.atlassian.bamboo.specs.api.builders.BambooKey;
import com.atlassian.bamboo.specs.api.builders.Variable;
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
import com.atlassian.bamboo.specs.api.builders.plan.Job;
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
import com.atlassian.bamboo.specs.api.builders.project.Project;
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
import com.atlassian.bamboo.specs.builders.trigger.ScheduledTrigger;
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
import com.atlassian.bamboo.specs.util.BambooServer;
/**
* Plan configuration for Bamboo.
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
*/
@BambooSpec
public class PlanSpec {
private static final String SERVICE_NAME = "ocr-service";
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", "");
/**
* Run main to publish plan on Bamboo
*/
public static void main(final String[] args) throws Exception {
//By default credentials are read from the '.credentials' file.
BambooServer bambooServer = new BambooServer("http://localhost:8085");
Plan plan = new PlanSpec().createPlan();
bambooServer.publish(plan);
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
bambooServer.publish(planPermission);
Plan secPlan = new PlanSpec().createSecBuild();
bambooServer.publish(secPlan);
PlanPermissions secPlanPermission = new PlanSpec().createPlanPermission(secPlan.getIdentifier());
bambooServer.publish(secPlanPermission);
}
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
Permissions permission = new Permissions().userPermissions("atlbamboo",
PermissionType.EDIT,
PermissionType.VIEW,
PermissionType.ADMIN,
PermissionType.CLONE,
PermissionType.BUILD)
.groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
.groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
.loggedInUserPermissions(PermissionType.VIEW)
.anonymousUserPermissionView();
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
}
private Project project() {
return new Project().name("RED").key(new BambooKey("RED"));
}
public Plan createPlan() {
return new Plan(project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)).description("Plan created from (enter repository url of your plan)")
.variables(new Variable("maven_add_param", ""))
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask().description("Build").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh").argument(SERVICE_NAME),
createJUnitParserTask().description("Resultparser")
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
.enabled(true),
new InjectVariablesTask().description("Inject git Tag").path("git.tag").namespace("g").scope(InjectVariablesScope.LOCAL),
new VcsTagTask().description("${bamboo.g.gitTag}").tagName("${bamboo.g.gitTag}").defaultRepository())
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
.volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
.linkedRepositories("RED / " + SERVICE_NAME)
.triggers(new BitbucketServerTrigger())
.planBranchManagement(new PlanBranchManagement().createForVcsBranch()
.delete(new BranchCleanup().whenInactiveInRepositoryAfterDays(14))
.notificationForCommitters());
}
public Plan createSecBuild() {
return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
new VcsCheckoutTask().description("Checkout Default Repository").checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask().description("Sonar").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh").argument(SERVICE_NAME))
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
.dockerRunArguments("--net=host")
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
.linkedRepositories("RED / " + SERVICE_NAME)
.triggers(new ScheduledTrigger().scheduleOnceDaily(LocalTime.of(23, 00)))
.planBranchManagement(new PlanBranchManagement().createForVcsBranchMatching("release.*").notificationForCommitters());
}
}

View File

@ -0,0 +1,58 @@
#!/bin/bash
set -e
SERVICE_NAME=$1
if [[ "$bamboo_planRepository_branchName" == "master" ]]
then
branchVersion=$(cat pom.xml | grep -Eo "<version>.*" | sed -s 's|<version>\(.*\)\..*\(-*.*\)</version>|\1|')
latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1)
newVersion="$(semver $latestVersion -p -i minor)"
elif [[ "$bamboo_planRepository_branchName" == release* ]]
then
branchVersion=$(echo $bamboo_planRepository_branchName | sed -s 's|release\/\([0-9]\+\.[0-9]\+\)\.x|\1|')
latestVersion=$(semver $( git tag -l $branchVersion.* ) | tail -n1)
newVersion="$(semver $latestVersion -p -i patch)"
elif [[ "${bamboo_version_tag}" != "dev" ]]
then
newVersion="${bamboo_version_tag}"
else
mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
--no-transfer-progress \
${bamboo_maven_add_param} \
clean install \
-Djava.security.egd=file:/dev/./urandomelse
echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
exit 0
fi
echo "gitTag=${newVersion}" > git.tag
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
versions:set \
-DnewVersion=${newVersion}
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
versions:set \
-DnewVersion=${newVersion}
mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
--no-transfer-progress \
clean deploy \
${bamboo_maven_add_param} \
-e \
-DdeployAtEnd=true \
-Dmaven.wagon.http.ssl.insecure=true \
-Dmaven.wagon.http.ssl.allowall=true \
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
package
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-image-v1/pom.xml \
docker:push

View File

@ -0,0 +1,44 @@
#!/bin/bash
set -e
SERVICE_NAME=$1
echo "build jar binaries"
mvn -f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
--no-transfer-progress \
clean install \
-Djava.security.egd=file:/dev/./urandomelse
echo "dependency-check:aggregate"
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
org.owasp:dependency-check-maven:aggregate
if [[ -z "${bamboo_repository_pr_key}" ]]
then
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
sonar:sonar \
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
-Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \
-Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \
-Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html
else
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
mvn --no-transfer-progress \
-f ${bamboo_build_working_directory}/$SERVICE_NAME-v1/pom.xml \
sonar:sonar \
-Dsonar.projectKey=RED_$SERVICE_NAME \
-Dsonar.host.url=https://sonarqube.iqser.com \
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
-Dsonar.dependencyCheck.jsonReportPath=target/dependency-check-report.json \
-Dsonar.dependencyCheck.xmlReportPath=target/dependency-check-report.xml \
-Dsonar.dependencyCheck.htmlReportPath=target/dependency-check-report.html
fi

View File

@ -0,0 +1,22 @@
package buildjob;
import org.junit.Test;
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
public class PlanSpecTest {
@Test
public void checkYourPlanOffline() throws PropertiesValidationException {
Plan plan = new PlanSpec().createPlan();
EntityPropertiesBuilders.build(plan);
Plan secPlan = new PlanSpec().createSecBuild();
EntityPropertiesBuilders.build(secPlan);
}
}

View File

@ -0,0 +1,118 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.iqser.red</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>1.2.0</version>
<relativePath/>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>ocr-service-image-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<properties>
<service.server>ocr-service-server-v1</service.server>
<platform.jar>${service.server}.jar</platform.jar>
<docker.skip.push>false</docker.skip.push>
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>download-platform-jar</id>
<phase>prepare-package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>${service.server}</artifactId>
<version>${version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<destFileName>${platform.jar}</destFileName>
</dependency>
</artifactItems>
<outputDirectory>${docker.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<configuration>
<images>
<image>
<name>${docker.image.name}</name>
<build>
<dockerFileDir>${docker.build.directory}</dockerFileDir>
<args>
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
</args>
<tags>
<tag>${docker.image.version}</tag>
<tag>latest</tag>
</tags>
</build>
</image>
</images>
</configuration>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-resources</id>
<phase>prepare-package</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${basedir}/target/build/libs/</outputDirectory>
<resources>
<resource>
<directory>libs</directory>
<filtering>false</filtering>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -0,0 +1,14 @@
FROM red/base-image:2.0.0
COPY "libs/pdftron/OCRModuleLinux.tar.gz" .
RUN tar xvzf OCRModuleLinux.tar.gz
RUN mkdir /OCRModule
RUN mv Lib/* /OCRModule/
ARG PLATFORM_JAR
ENV PLATFORM_JAR ${PLATFORM_JAR}
ENV USES_ELASTICSEARCH false
COPY ["${PLATFORM_JAR}", "/"]

View File

@ -0,0 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.iqser.red.service</groupId>
<artifactId>ocr-service-v1</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>ocr-service-api-v1</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<persistence-service.version>1.269.0</persistence-service.version>
<redaction-service.version>3.155.0</redaction-service.version>
<dsljson.version>1.9.9</dsljson.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.dslplatform/dsl-json-java8 -->
<dependency>
<groupId>com.dslplatform</groupId>
<artifactId>dsl-json-java8</artifactId>
<version>${dsljson.version}</version>
</dependency>
<dependency>
<!-- This dependency contains annotations that are used in specifying REST endpoints. -->
<!-- It is optional since not all users of this API might use Feign. -->
<groupId>io.github.openfeign</groupId>
<artifactId>feign-core</artifactId>
<optional>true</optional>
</dependency>
<!-- spring -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-api-v1</artifactId>
<exclusions>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
</exclusion>
</exclusions>
<version>${persistence-service.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
<exclusions>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-api-v1</artifactId>
</exclusion>
</exclusions>
<version>${redaction-service.version}</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.ocr.v1.api.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class DocumentRequest {
protected String dossierId;
protected String fileId;
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.ocr.v1.api.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class OCRStatusUpdateResponse {
private String fileId;
private int numberOfPagesToOCR;
private int numberOfOCRedPages;
private boolean ocrFinished;
}

View File

@ -0,0 +1,145 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.iqser.red.service</groupId>
<artifactId>ocr-service-v1</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>ocr-service-server-v1</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>ocr-service-api-v1</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>metric-commons</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>com.pdftron</groupId>
<artifactId>PDFNet</artifactId>
<version>9.4.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId>
<version>2.3.1.RELEASE</version>
</dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-kms</artifactId>
<version>1.12.158</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- Test -->
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>test-commons</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.amqp</groupId>
<artifactId>spring-rabbit-test</artifactId>
<version>2.3.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessors>
<annotationProcessor>lombok.launch.AnnotationProcessorHider$AnnotationProcessor</annotationProcessor>
<annotationProcessor>com.dslplatform.json.processor.CompiledJsonAnnotationProcessor</annotationProcessor>
</annotationProcessors>
</configuration>
</plugin>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
</gitDescribe>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- repackages the generated jar into a runnable fat-jar and makes it
executable -->
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
<configuration>
<executable>true</executable>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>pdftron</id>
<name>PDFNet Maven</name>
<url>https://pdftron.com/maven/release</url>
</repository>
</repositories>
</project>

View File

@ -0,0 +1,45 @@
package com.iqser.red.service.ocr.v1.server;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.cloud.openfeign.EnableFeignClients;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
import org.springframework.scheduling.annotation.EnableAsync;
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
import io.micrometer.core.aop.TimedAspect;
import io.micrometer.core.instrument.MeterRegistry;
@EnableAsync
@EnableConfigurationProperties(OcrServiceSettings.class)
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
@Import({DefaultWebMvcConfiguration.class, MessagingConfiguration.class})
@EnableFeignClients(basePackageClasses = FileStatusProcessingUpdateClient.class)
public class Application {
/**
* Entry point to the service application.
*
* @param args Any command line parameter given upon startup.
*/
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
@Bean
public TimedAspect timedAspect(MeterRegistry registry) {
return new TimedAspect(registry);
}
}

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.ocr.v1.server.client;
import org.springframework.cloud.openfeign.FeignClient;
import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource;
@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}")
public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource {
}

View File

@ -0,0 +1,42 @@
package com.iqser.red.service.ocr.v1.server.configuration;
import org.springframework.amqp.core.Queue;
import org.springframework.amqp.core.QueueBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import lombok.RequiredArgsConstructor;
@Configuration
@RequiredArgsConstructor
public class MessagingConfiguration {
public static final String OCR_QUEUE = "ocrQueue";
public static final String OCR_DLQ = "ocrDLQ";
public static final String X_DEAD_LETTER_EXCHANGE = "x-dead-letter-exchange";
public static final String X_DEAD_LETTER_ROUTING_KEY = "x-dead-letter-routing-key";
public static final String X_MAX_PRIORITY = "x-max-priority";
public static final String OCR_STATUS_UPDATE_RESPONSE_QUEUE = "ocr_status_update_response_queue";
@Bean
public Queue ocrQueue() {
return QueueBuilder.durable(OCR_QUEUE)
.withArgument(X_DEAD_LETTER_EXCHANGE, "")
.withArgument(X_DEAD_LETTER_ROUTING_KEY, OCR_DLQ)
.withArgument(X_MAX_PRIORITY, 2)
.maxPriority(2)
.build();
}
@Bean
public Queue ocrDeadLetterQueue() {
return QueueBuilder.durable(OCR_DLQ).build();
}
}

View File

@ -0,0 +1,35 @@
package com.iqser.red.service.ocr.v1.server.initializer;
import javax.annotation.PostConstruct;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.pdftron.pdf.PDFNet;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@Component
@RequiredArgsConstructor
public class PDFNetInitializer {
@Value("${pdftron.license:}")
private String pdftronLicense;
@Value("${pdftron.ocrmodule.path:/tmp}")
private String ocrModulePath;
@SneakyThrows
@PostConstruct
// Do not change back to application runner, if it is application runner it takes messages from the queue before PDFNet is initialized, that leads to UnsatisfiedLinkError.
public void init() {
PDFNet.setTempPath("/tmp/pdftron");
PDFNet.addResourceSearchPath(ocrModulePath);
PDFNet.initialize(pdftronLicense);
}
}

View File

@ -0,0 +1,15 @@
package com.iqser.red.service.ocr.v1.server.model;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class ImagePosition {
private Rectangle rectangle;
private boolean hasTransparency;
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import java.util.HashMap;
import java.util.Map;
import lombok.Data;
@Data
public class Classification {
private Map<String, Float> probabilities = new HashMap<>();
private String label;
}

View File

@ -0,0 +1,11 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class FilterGeometry {
private ImageSize imageSize;
private ImageFormat imageFormat;
}

View File

@ -0,0 +1,12 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class Filters {
private FilterGeometry geometry;
private Probability probability;
private boolean allPassed;
}

View File

@ -0,0 +1,11 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class Geometry {
private float width;
private float height;
}

View File

@ -0,0 +1,12 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class ImageFormat {
private float quotient;
private boolean tooTall;
private boolean tooWide;
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class ImageMetadata {
private Classification classification;
private Position position;
private Geometry geometry;
private Filters filters;
private boolean alpha;
}

View File

@ -0,0 +1,31 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import java.util.ArrayList;
import java.util.List;
import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
@Data
@CompiledJson
public class ImageServiceResponse {
private String dossierId;
private String fileId;
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
@JsonAttribute(alternativeNames = {"imageMetadata"})
private List<ImageMetadata> data = new ArrayList<>();
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
@JsonAttribute(alternativeNames = {"imageMetadata"})
public void setData(List<ImageMetadata> data) {this.data = data;}
}

View File

@ -0,0 +1,12 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class ImageSize {
private float quotient;
private boolean tooLarge;
private boolean tooSmall;
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class Position {
private float x1;
private float x2;
private float y1;
private float y2;
private int pageNumber;
}

View File

@ -0,0 +1,10 @@
package com.iqser.red.service.ocr.v1.server.model.image;
import lombok.Data;
@Data
public class Probability {
private boolean unconfident;
}

View File

@ -0,0 +1,69 @@
package com.iqser.red.service.ocr.v1.server.service;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class FileStorageService {
private final StorageService storageService;
public static String getStorageId(String dossierId, String fileId, FileType fileType) {
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
}
@SneakyThrows
public byte[] getOriginalFile(String dossierId, String fileId) {
return IOUtils.toByteArray(storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream());
}
@SneakyThrows
public InputStream getOriginalFileAsStream(String dossierId, String fileId) {
return storageService.getObject(getStorageId(dossierId, fileId, FileType.ORIGIN)).getInputStream();
}
public void storeOriginalFile(String dossierId, String fileId, InputStream stream) {
storageService.storeObject(getStorageId(dossierId, fileId, FileType.ORIGIN), stream);
}
public boolean untouchedFileExists(String dossierId, String fileId) {
return storageService.objectExists(getStorageId(dossierId, fileId, FileType.UNTOUCHED));
}
public void storeUntouchedFile(String dossierId, String fileId, byte[] data) {
storageService.storeObject(getStorageId(dossierId, fileId, FileType.UNTOUCHED), new ByteArrayInputStream(data));
}
@SneakyThrows
public ImageServiceResponse getImageServiceResponse(String dossierId, String fileId) {
return storageService.readJSONObject(getStorageId(dossierId, fileId, FileType.IMAGE_INFO), ImageServiceResponse.class);
}
}

View File

@ -0,0 +1,325 @@
package com.iqser.red.service.ocr.v1.server.service;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.io.IOUtils;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.ocr.v1.api.model.OCRStatusUpdateResponse;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.server.model.ImagePosition;
import com.iqser.red.service.ocr.v1.server.model.image.ImageServiceResponse;
import com.iqser.red.service.ocr.v1.server.settings.OcrServiceSettings;
import com.iqser.red.service.persistence.service.v1.api.utils.SuppressFBWarnings;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.Element;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.OCRModule;
import com.pdftron.pdf.OCROptions;
import com.pdftron.pdf.Optimizer;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.Page;
import com.pdftron.pdf.PageIterator;
import com.pdftron.pdf.Rect;
import com.pdftron.pdf.RectCollection;
import com.pdftron.sdf.Obj;
import com.pdftron.sdf.SDFDoc;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class OCRService {
public static final String ENGLISH = "eng";
private final FileStorageService fileStorageService;
private final OcrServiceSettings settings;
private final RabbitTemplate rabbitTemplate;
private final ObjectMapper objectMapper;
@Timed("redactmanager_PDFTron-ocrDocument")
@SneakyThrows
public InputStream ocrDocument(String dossierId, String fileId) {
var fileStream = fileStorageService.getOriginalFileAsStream(dossierId, fileId);
var imageServiceResponse = fileStorageService.getImageServiceResponse(dossierId, fileId);
var fileBytes = IOUtils.toByteArray(fileStream);
var ocrBytes = ocr(fileBytes, fileId, imageServiceResponse);
return new ByteArrayInputStream(ocrBytes);
}
@SuppressFBWarnings("REC_CATCH_EXCEPTION")
private byte[] ocr(byte[] file, String fileId, ImageServiceResponse imageServiceResponse) {
PDFDoc pdfDoc = null;
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
pdfDoc = new PDFDoc(file);
removeInvisibleText(pdfDoc);
Map<Integer, List<ImagePosition>> pages = new HashMap<>();
// TODO take logic to ignore small and combine images from image-service.
// TODO Then replace logic so ocr-service is independent from image-service.
imageServiceResponse.getData()
.forEach(imageMetadata -> pages.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
.add(new ImagePosition(new Rectangle(new Point(imageMetadata.getPosition().getX1(), imageMetadata.getPosition().getY1()),
imageMetadata.getGeometry().getWidth(),
imageMetadata.getGeometry().getHeight(),
imageMetadata.getPosition().getPageNumber()), imageMetadata.isAlpha())));
Map<Integer, PDFDoc> pdfDocMap = Collections.synchronizedMap(new HashMap<>());
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder().fileId(fileId).numberOfPagesToOCR(pages.keySet().size()).build()));
ocrPages(pdfDoc, fileId, pages, pdfDocMap);
for (var entry : pdfDocMap.entrySet()) {
var ocrDoc = entry.getValue();
var page = entry.getKey();
Page ocrPage = ocrDoc.getPageIterator(1).next();
pdfDoc.pageInsert(pdfDoc.getPageIterator(page), ocrPage);
pdfDoc.pageRemove(pdfDoc.getPageIterator(page + 1));
ocrDoc.close();
}
Optimizer.optimize(pdfDoc);
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
pdfDoc.close();
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
.fileId(fileId)
.numberOfPagesToOCR(pages.keySet().size())
.numberOfOCRedPages(pages.keySet().size())
.ocrFinished(true)
.build()));
return out.toByteArray();
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (pdfDoc != null) {
try {
pdfDoc.close();
} catch (Exception e) {
log.debug("Failed to close document", e);
}
}
}
}
@SneakyThrows
private void ocrPages(PDFDoc pdfDoc, String fileId, Map<Integer, List<ImagePosition>> pages, Map<Integer, PDFDoc> pdfDocMap) {
int numberOfOCRedPages = 0;
for (var pageEntry : pages.entrySet()) {
try {
RectCollection rectCollection = new RectCollection();
var page = pageEntry.getKey();
Page pdfPage = pdfDoc.getPageIterator(page).next();
pdfPage.setMediaBox(pdfPage.getCropBox());
for (ImagePosition imagePosition : pageEntry.getValue()) {
Rectangle rectangle = imagePosition.getRectangle();
// Warning coordinate system is different in this call macOs/Linux
double y = -rectangle.getTopLeft().getY() + pdfPage.getCropBox().getY2() - rectangle.getHeight();
rectCollection.addRect(rectangle.getTopLeft().getX(), y, rectangle.getTopLeft().getX() + rectangle.getWidth(), y + rectangle.getHeight());
}
PDFDoc ocrDoc = new PDFDoc();
ocrDoc.pagePushBack(pdfPage);
pdfDocMap.put(pageEntry.getKey(), ocrDoc);
OCROptions options = new OCROptions();
options.addTextZonesForPage(rectCollection, 1);
options.addLang(ENGLISH);
options.addDPI(settings.getOcrDPI());
OCRModule.processPDF(ocrDoc, options);
rectCollection.clear();
} catch (Exception e) {
log.warn("Failed to process PDF page {} - {}", pageEntry.getKey(), e);
}
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
.fileId(fileId)
.numberOfPagesToOCR(pages.keySet().size())
.numberOfOCRedPages(++numberOfOCRedPages)
.build()));
log.warn("Done page {}", pageEntry);
}
}
/**
* There are 2 possibilities to have invisible Text in pdfs.
* 1. gState is set to invisible, this is ocr text.
* 2. Filled Path elements in front of the text.
*/
@SneakyThrows
private void removeInvisibleText(PDFDoc pdfDoc) {
ElementWriter writer = new ElementWriter();
ElementReader reader = new ElementReader();
Set<Integer> visited = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
removeOverlapText(page, reader, writer, visited);
}
}
@SneakyThrows
private void removeOverlapText(Page page, ElementReader reader, ElementWriter writer, Set<Integer> visited) {
visited.add((int) page.getSDFObj().getObjNum());
reader.begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
processElements(reader, writer, visited, false);
writer.end();
reader.end();
}
@SneakyThrows
private void processElements(ElementReader reader, ElementWriter writer, Set<Integer> visited, boolean isInForm) {
Set<Rect> filledRectangles = new HashSet<>();
for (Element element = reader.next(); element != null; element = reader.next())
switch (element.getType()) {
case Element.e_image:
case Element.e_inline_image:
processImage(element, writer, isInForm);
break;
case Element.e_text:
processText(element, writer, filledRectangles);
break;
case Element.e_path:
processPath(element, writer, filledRectangles);
break;
case Element.e_form:
processForm(reader, writer, element, visited);
break;
default:
writer.writeElement(element);
}
}
@SneakyThrows
private void processImage(Element element, ElementWriter writer, boolean isInForm) {
if (!isInForm || !settings.isRemoveWatermark()) {
writer.writeElement(element);
}
}
@SneakyThrows
private void processText(Element element, ElementWriter writer, Set<Rect> filledRectangles) {
if (element.getBBox() == null) {
writer.writeElement(element);
return;
}
double x = element.getBBox().getX1();
double y = element.getBBox().getY1();
boolean filledRectangleIntersection = filledRectangles.stream().anyMatch(r -> {
try {
return r.contains(x, y);
} catch (PDFNetException e) {
throw new RuntimeException("Internal pdftron error during removal of overlap text", e);
}
});
var gState = element.getGState();
//See PDF Reference 5.3 Text rendering modes, 3 = Invisible, however this ocr does not use it.
if (!filledRectangleIntersection && gState.getTextRenderMode() != 3) {
writer.writeElement(element);
}
}
@SneakyThrows
private void processPath(Element element, ElementWriter writer, Set<Rect> filledRectangles) {
if (element.getPathData() != null && element.getPathData().getPoints().length > 4) {
filledRectangles.add(element.getBBox());
}
writer.writeElement(element);
}
@SneakyThrows
private void processForm(ElementReader reader, ElementWriter writer, Element element, Set<Integer> visited) {
writer.writeElement(element);
Obj formObj = element.getXObject();
if (!visited.contains((int) formObj.getObjNum())) {
visited.add((int) formObj.getObjNum());
ElementWriter new_writer = new ElementWriter();
reader.formBegin();
new_writer.begin(formObj);
reader.clearChangeList();
new_writer.setDefaultGState(reader);
processElements(reader, new_writer, visited, true);
new_writer.end();
reader.end();
}
}
}

View File

@ -0,0 +1,79 @@
package com.iqser.red.service.ocr.v1.server.service;
import org.springframework.amqp.AmqpRejectAndDontRequeueException;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.http.HttpStatus;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.ocr.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.ocr.v1.server.configuration.MessagingConfiguration;
import com.iqser.red.service.ocr.v1.api.model.DocumentRequest;
import feign.FeignException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class OcrMessageReceiver {
private final ObjectMapper objectMapper;
private final FileStorageService fileStorageService;
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
private final OCRService ocrService;
@RabbitHandler
@RabbitListener(queues = MessagingConfiguration.OCR_QUEUE, concurrency = "1")
public void receiveOcr(String in) throws JsonProcessingException {
DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class);
long start = System.currentTimeMillis();
log.info("Start ocr for file with dossierId {} and fileId {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
setStatusOcrProcessing(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
if (!fileStorageService.untouchedFileExists(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId())) {
byte[] originalFile = fileStorageService.getOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
fileStorageService.storeUntouchedFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), originalFile);
}
var ocrResult = ocrService.ocrDocument(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
fileStorageService.storeOriginalFile(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), ocrResult);
long end = System.currentTimeMillis();
log.info("Successfully processed ocr for file with dossierId {} and fileId {}, took {}", ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId(), end - start);
fileStatusProcessingUpdateClient.ocrSuccessful(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
}
@RabbitHandler
@RabbitListener(queues = MessagingConfiguration.OCR_DLQ, concurrency = "1")
public void receiveOcrDQL(String in) throws JsonProcessingException {
DocumentRequest ocrRequestMessage = objectMapper.readValue(in, DocumentRequest.class);
log.info("OCR DQL received: {}", ocrRequestMessage);
fileStatusProcessingUpdateClient.ocrFailed(ocrRequestMessage.getDossierId(), ocrRequestMessage.getFileId());
}
private void setStatusOcrProcessing(String dossierId, String fileId) {
try {
fileStatusProcessingUpdateClient.ocrProcessing(dossierId, fileId);
} catch (FeignException e) {
if (e.status() == HttpStatus.CONFLICT.value()) {
throw new AmqpRejectAndDontRequeueException(e.getMessage());
}
}
}
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.ocr.v1.server.settings;
import org.springframework.boot.context.properties.ConfigurationProperties;
import lombok.Data;
@Data
@ConfigurationProperties("ocr-service")
public class OcrServiceSettings {
private int ocrDPI = 300;
private boolean removeWatermark;
}

View File

@ -0,0 +1,9 @@
{
"properties": [
{
"name": "persistence-service.url",
"type": "java.lang.String",
"description": "URL of the persistence service."
}
]
}

View File

@ -0,0 +1,10 @@
server:
port: 8086
persistence-service.url: "http://persistence-service-v1:8080"
storage:
bucket-name: 'redaction'
endpoint: 'http://localhost:9000'
key: minioadmin
secret: minioadmin

View File

@ -0,0 +1,49 @@
info:
description: OCR Service V1 Server
persistence-service.url: "http://persistence-service-v1:8080"
server:
port: 8080
spring:
main:
allow-circular-references: true # FIXME
profiles:
active: kubernetes
rabbitmq:
host: ${RABBITMQ_HOST:localhost}
port: ${RABBITMQ_PORT:5672}
username: ${RABBITMQ_USERNAME:user}
password: ${RABBITMQ_PASSWORD:rabbitmq}
listener:
simple:
acknowledge-mode: AUTO
concurrency: 2
retry:
enabled: true
max-attempts: 3
max-interval: 15000
prefetch: 1
platform.multi-tenancy:
enabled: false
management:
endpoint:
metrics.enabled: ${monitoring.enabled:false}
prometheus.enabled: ${monitoring.enabled:false}
health.enabled: true
endpoints.web.exposure.include: prometheus, health
metrics.export.prometheus.enabled: ${monitoring.enabled:false}
storage:
signer-type: 'AWSS3V4SignerType'
bucket-name: 'redaction'
region: 'us-east-1'
endpoint: 'https://s3.amazonaws.com'
backend: 's3'
pdftron.license: ${PDFTRON_LICENSE}

View File

@ -0,0 +1,22 @@
------------------------------------------------------------------
| |
| OCR Service V1 Server |
| |
________________________________________________________________
| |
| ___ ________ ________ _________ _________ |
| | | / \ / || || \ |
| | | / ____ \ / _____|| _____||______ \ |
| | | / / \ \| / | | \ | |
| | || | | || \____ | |____ ______/ | |
| | || | | || \ | | | | |
| | || | ___| | \_____ || ____| | _ _/ |
| | || | \ \ | \ || | | | \ \ |
| | | \ \_ \ / ______/ || |_____ | | \ \ |
| | | \ \ \ \ | /| || | \ \ |
| |___| \____\ \___\|_________/ |_________||___| \___\ |
| |
| |
| F r o m d a t a t o i n f o r m a t i o n |
| |
|________________________________________________________________|

View File

@ -0,0 +1,7 @@
spring:
application:
name: ocr-service-v1
management.endpoints:
web.base-path: /
enabled-by-default: false

View File

@ -0,0 +1,112 @@
package com.iqser.red.service.ocr.v1.server;
import static com.iqser.red.service.ocr.v1.server.utils.OsUtils.getTemporaryDirectory;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.ocr.v1.server.utils.FileSystemBackedStorageService;
import com.iqser.red.service.ocr.v1.server.service.OCRService;
import com.iqser.red.service.ocr.v1.server.service.FileStorageService;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT //
, properties = {"pdftron.ocrmodule.path=/YourOCRModulePath"})
@Import(OcrServiceIntegrationTest.TestConfiguration.class)
public class OcrServiceIntegrationTest {
@Autowired
protected StorageService storageService;
@Autowired
protected FileStorageService fileStorageService;
@Autowired
protected ObjectMapper objectMapper;
@MockBean
protected RabbitTemplate rabbitTemplate;
@Autowired
private OCRService ocrService;
@Test
@Disabled // OCRModule is not available on build server. If you want to run the test set the property at the top.
@SneakyThrows
public void testOCR() {
String fileName = "Watermark";
ClassPathResource imageInfoResource = new ClassPathResource("files/" + fileName + ".IMAGE_INFO.json");
ClassPathResource pdfFileResource = new ClassPathResource("files/" + fileName + ".pdf");
var originId = FileStorageService.getStorageId("dossier", "file", FileType.ORIGIN);
storageService.storeObject(originId, pdfFileResource.getInputStream());
var imageId = FileStorageService.getStorageId("dossier", "file", FileType.IMAGE_INFO);
storageService.storeObject(imageId, imageInfoResource.getInputStream());
var response = ocrService.ocrDocument("dossier", "file");
var out = FileUtils.openOutputStream(new File(getTemporaryDirectory() + "/" + fileName + ".pdf"));
IOUtils.copy(response, out);
}
@SneakyThrows
public void dummyTest() {
// Build needs one text to not fail.
assertThat(1).isEqualTo(1);
}
@AfterEach
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
}
@Configuration
@EnableAutoConfiguration(exclude = {StorageAutoConfiguration.class, RabbitAutoConfiguration.class})
public static class TestConfiguration {
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
}

View File

@ -0,0 +1,125 @@
package com.iqser.red.service.ocr.v1.server.utils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.springframework.core.io.InputStreamResource;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
public class FileSystemBackedStorageService implements StorageService {
private final Map<String, File> dataMap = new HashMap<>();
public FileSystemBackedStorageService() {
}
@SneakyThrows
@Override
public InputStreamResource getObject(String objectId) {
var res = dataMap.get(objectId);
if (res == null) {
throw new StorageObjectDoesNotExist(new RuntimeException());
}
return new InputStreamResource(new FileInputStream(res));
}
@Override
public void deleteObject(String objectId) {
dataMap.remove(objectId);
}
@Override
public boolean objectExists(String objectId) {
return dataMap.containsKey(objectId);
}
@Override
public void init() {
}
@Override
@SneakyThrows
public <T> void storeJSONObject(String objectId, T any) {
File tempFile = File.createTempFile("test", ".tmp");
getMapper().writeValue(new FileOutputStream(tempFile), any);
dataMap.put(objectId, tempFile);
}
private ObjectMapper getMapper() {
return ObjectMapperFactory.create();
}
@Override
@SneakyThrows
public <T> T readJSONObject(String objectId, Class<T> clazz) {
if (dataMap.get(objectId) == null || !dataMap.get(objectId).exists()) {
throw new StorageObjectDoesNotExist("Stored object not found");
}
return getMapper().readValue(new FileInputStream(dataMap.get(objectId)), clazz);
}
public List<String> listPaths() {
return new ArrayList<>(dataMap.keySet());
}
public List<String> listFilePaths() {
return dataMap.values().stream().map(File::getAbsolutePath).collect(Collectors.toList());
}
@Override
@SneakyThrows
public void storeObject(String objectId, InputStream stream) {
File tempFile = File.createTempFile("test", ".tmp");
try (var fileOutputStream = new FileOutputStream(tempFile)) {
IOUtils.copy(stream, fileOutputStream);
}
dataMap.put(objectId, tempFile);
}
public void clearStorage() {
this.dataMap.forEach((k, v) -> v.delete());
this.dataMap.clear();
}
}

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.ocr.v1.server.utils;
import org.apache.commons.lang3.StringUtils;
import lombok.SneakyThrows;
public final class OsUtils {
private static final String SERVICE_NAME = "pdftron-redaction-service-v1";
private OsUtils() {
throw new IllegalStateException("Utility class");
}
@SneakyThrows
public static String getTemporaryDirectory(String suffix, String fileId) {
return addBackSlashAtEnd(getTemporaryDirectory()) + addBackSlashAtEnd(SERVICE_NAME) + addBackSlashAtEnd(suffix) + addBackSlashAtEnd(fileId);
}
private static boolean isWindows() {
return StringUtils.containsIgnoreCase(System.getProperty("os.name"), "Windows");
}
private static String addBackSlashAtEnd(String s) {
return removeSlashAtBegin(StringUtils.endsWithIgnoreCase(s, "\\") ? s : removeForwardSlashAtEnd(s) + "\\");
}
private static String removeForwardSlashAtEnd(String s) {
return StringUtils.endsWithIgnoreCase(s, "/") ? StringUtils.substring(s, s.length() - 1) : s;
}
private static String removeSlashAtBegin(String s) {
return StringUtils.startsWithIgnoreCase(s, "/") || StringUtils.startsWithIgnoreCase(s, "\\") ? StringUtils.substring(s, 1) : s;
}
public static String getTemporaryDirectory() {
String tmpdir = System.getProperty("java.io.tmpdir");
if (isWindows() && StringUtils.isNotBlank(tmpdir)) {
return tmpdir;
}
return "/tmp";
}
}

View File

@ -0,0 +1,8 @@
persistence-service.url: "http://persistence-service-v1:8080"
spring:
main:
allow-circular-references: true # FIXME
pdftron.license: demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a

View File

@ -0,0 +1 @@
{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "917b9d9c9f548f85fef3679db45ff46c", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9927, "logo": 0.0038, "other": 0.0034, "formula": 0.0}}, "representation": "FFF2CF0F7C74FFC1070830FFF", "position": {"x1": -7, "x2": 603, "y1": 0, "y2": 852, "pageNumber": 1}, "geometry": {"width": 610, "height": 852}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 1.0096, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.716, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []}

View File

@ -0,0 +1 @@
{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "d7f1e0e37cba4e28ebdf894a79d3bd67", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "signature", "probabilities": {"signature": 0.9872, "logo": 0.0064, "other": 0.0063, "formula": 0.0001}}, "representation": "FFFCF10608F6F89747BFFC301", "position": {"x1": -9, "x2": 584, "y1": 9, "y2": 849, "pageNumber": 1}, "geometry": {"width": 593, "height": 840}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.9992, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.706, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}], "dataCV": []}

View File

@ -0,0 +1 @@
{"dossierId": "55547c91-6b0e-4aa6-9009-2e7c4cd90f13", "fileId": "32b19ec38896f5105c09041def470c90", "targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz", "data": [{"classification": {"label": "logo", "probabilities": {"logo": 0.9999, "signature": 0.0001, "formula": 0.0, "other": 0.0}}, "representation": "307EF8F6E9833CE9D7AF9EFFF", "position": {"x1": 26, "x2": 586, "y1": -2, "y2": 794, "pageNumber": 1}, "geometry": {"width": 560, "height": 796}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.959, "tooLarge": true, "tooSmall": false}, "imageFormat": {"quotient": 0.7035, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": false}}, {"classification": {"label": "logo", "probabilities": {"logo": 1.0, "formula": 0.0, "other": 0.0, "signature": 0.0}}, "representation": "FFF7FFD2000000018F3FFEFFF", "position": {"x1": 90, "x2": 210, "y1": 676, "y2": 720, "pageNumber": 1}, "geometry": {"width": 120, "height": 44}, "alpha": false, "filters": {"geometry": {"imageSize": {"quotient": 0.1044, "tooLarge": false, "tooSmall": false}, "imageFormat": {"quotient": 2.7273, "tooTall": false, "tooWide": false}}, "probability": {"unconfident": false}, "allPassed": true}}], "dataCV": []}

95
ocr-service-v1/pom.xml Normal file
View File

@ -0,0 +1,95 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.iqser.red</groupId>
<artifactId>platform-dependency</artifactId>
<version>1.14.0</version>
<relativePath/>
</parent>
<groupId>com.iqser.red.service</groupId>
<artifactId>ocr-service-v1</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>ocr-service-api-v1</module>
<module>ocr-service-server-v1</module>
</modules>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>1.20.0</version>
<scope>import</scope>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.sonarsource.scanner.maven</groupId>
<artifactId>sonar-maven-plugin</artifactId>
<version>3.9.0.2155</version>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>6.3.1</version>
<configuration>
<format>ALL</format>
</configuration>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.8</version>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report-aggregate</goal>
</goals>
<phase>verify</phase>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

19
pom.xml Normal file
View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.iqser.red.service</groupId>
<artifactId>ocr-service</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>bamboo-specs</module>
<module>ocr-service-v1</module>
<module>ocr-service-image-v1</module>
</modules>
</project>