Test gradle

This commit is contained in:
Kilian Schüttler 2023-08-01 12:09:00 +02:00
parent d9f7b3f516
commit af9b581b4f
223 changed files with 1156 additions and 52825 deletions

43
.gitignore vendored
View File

@ -9,6 +9,49 @@
**/tmp/
**/.apt_generated/
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### maven build ###
*.class
/out/
/build/
/target/
**/out/
**/build/
**/target/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
.gradle
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
.factorypath
.springBeans

View File

@ -3,4 +3,4 @@ variables:
include:
- project: 'gitlab/gitlab'
ref: 'main'
file: 'ci-templates/maven_java.yml'
file: 'ci-templates/gradle_java.yml'

View File

@ -0,0 +1,7 @@
plugins {
`kotlin-dsl`
}
repositories {
gradlePluginPortal()
}

View File

@ -0,0 +1,75 @@
plugins {
`java-library`
`maven-publish`
pmd
checkstyle
jacoco
}
group = "com.iqser.red"
java.sourceCompatibility = JavaVersion.VERSION_17
java.targetCompatibility = JavaVersion.VERSION_17
tasks.pmdMain {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
}
tasks.pmdTest {
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
}
tasks.named<Test>("test") {
useJUnitPlatform()
reports {
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
}
}
tasks.test {
finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run
}
tasks.jacocoTestReport {
dependsOn(tasks.test) // tests are required to run before generating the report
reports {
xml.required.set(true)
csv.required.set(false)
html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml"))
}
}
allprojects {
publishing {
publications {
create<MavenPublication>(name) {
from(components["java"])
}
}
repositories {
maven {
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}
}
}
java {
withJavadocJar()
}
repositories {
mavenLocal()
mavenCentral()
maven {
url = uri("https://nexus.knecon.com/repository/gindev/");
credentials {
username = providers.gradleProperty("mavenUser").getOrNull();
password = providers.gradleProperty("mavenPassword").getOrNull();
}
}
}

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
<module name="Checker">
<property
name="severity"
value="error"/>
<module name="TreeWalker">
<module name="SuppressWarningsHolder"/>
<module name="MissingDeprecated"/>
<module name="MissingOverride"/>
<module name="AnnotationLocation"/>
<module name="JavadocStyle"/>
<module name="NonEmptyAtclauseDescription"/>
<module name="IllegalImport"/>
<module name="RedundantImport"/>
<module name="RedundantModifier"/>
<module name="EmptyBlock"/>
<module name="DefaultComesLast"/>
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="ExplicitInitialization"/>
<module name="IllegalInstantiation"/>
<module name="ModifiedControlVariable"/>
<module name="MultipleVariableDeclarations"/>
<module name="PackageDeclaration"/>
<module name="ParameterAssignment"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<module name="StringLiteralEquality"/>
<module name="OneStatementPerLine"/>
<module name="FinalClass"/>
<module name="ArrayTypeStyle"/>
<module name="UpperEll"/>
<module name="OuterTypeFilename"/>
</module>
<module name="FileTabCharacter"/>
<module name="SuppressWarningsFilter"/>
</module>

21
config/pmd/pmd.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="DataflowAnomalyAnalysis"/>
<exclude name="MissingSerialVersionUID"/>
<exclude name="NullAssignment"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
<exclude name="AssignmentInOperand"/>
</rule>
</ruleset>

24
config/pmd/test_pmd.xml Normal file
View File

@ -0,0 +1,24 @@
<?xml version="1.0"?>
<ruleset name="Custom ruleset"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
<description>
Knecon test ruleset checks the code for bad stuff
</description>
<rule ref="category/java/errorprone.xml">
<exclude name="DataflowAnomalyAnalysis"/>
<exclude name="MissingSerialVersionUID"/>
<exclude name="NullAssignment"/>
<exclude name="AvoidLiteralsInIfCondition"/>
<exclude name="AvoidDuplicateLiterals"/>
<exclude name="AvoidFieldNameMatchingMethodName"/>
<exclude name="AvoidFieldNameMatchingTypeName"/>
<exclude name="AssignmentInOperand"/>
<exclude name="TestClassWithoutTestCases"/>
</rule>
</ruleset>

1
gradle.properties.kts Normal file
View File

@ -0,0 +1 @@
version = 4.0-SNAPSHOT

20
pom.xml
View File

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>4.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>redaction-service-v1</module>
<module>redaction-service-image-v1</module>
</modules>
</project>

View File

@ -1,98 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>0.1.0</version>
<relativePath/>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service-image-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>4.0-SNAPSHOT</version>
<packaging>pom</packaging>
<properties>
<service.server>redaction-service-server-v1</service.server>
<platform.jar>${service.server}.jar</platform.jar>
<docker.skip.push>false</docker.skip.push>
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>download-platform-jar</id>
<phase>prepare-package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>${service.server}</artifactId>
<version>${project.version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<destFileName>${platform.jar}</destFileName>
</dependency>
</artifactItems>
<outputDirectory>${docker.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<configuration>
<images>
<image>
<name>${docker.image.name}</name>
<build>
<dockerFileDir>${docker.build.directory}</dockerFileDir>
<args>
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
</args>
<tags>
<tag>${docker.image.version}</tag>
<tag>latest</tag>
</tags>
</build>
</image>
</images>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -1,9 +0,0 @@
FROM red/redaction-service-base-v1:2.0.0
ARG PLATFORM_JAR
ENV PLATFORM_JAR ${PLATFORM_JAR}
ENV USES_ELASTICSEARCH false
COPY ["${PLATFORM_JAR}", "/"]

View File

@ -1,114 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>2.2.0</version>
<relativePath/>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>4.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>redaction-service-api-v1</module>
<module>redaction-service-server-v1</module>
</modules>
<properties>
<pdfbox.version>2.0.24</pdfbox.version>
<lombok.version>1.18.26</lombok.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>2.5.0</version>
<scope>import</scope>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.sonarsource.scanner.maven</groupId>
<artifactId>sonar-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<configuration>
<format>ALL</format>
</configuration>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.8</version>
<configuration>
<excludes>
<exclude>org/drools/**/*</exclude>
</excludes>
</configuration>
<executions>
<execution>
<id>prepare-agent</id>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-maven-plugin</artifactId>
<version>1.18.20.0</version>
<executions>
<execution>
<id>delombok</id>
<phase>generate-sources</phase>
<goals>
<goal>delombok</goal>
</goals>
<configuration>
<addOutputDirectory>false</addOutputDirectory>
<sourceDirectory>src/main/java</sourceDirectory>
<outputDirectory>${delomboked.sources}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,14 @@
plugins {
id("com.iqser.red.service.java-conventions")
id("io.freefair.lombok") version "8.1.0"
}
description = "redaction-service-api-v1"
dependencies {
implementation("org.springframework:spring-web:6.0.6")
implementation("com.iqser.red.service:persistence-service-internal-api-v1:RED-6725")
}

View File

@ -1,61 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>4.0-SNAPSHOT</version>
</parent>
<artifactId>redaction-service-api-v1</artifactId>
<properties>
<persistence-service.version>2.93.0</persistence-service.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-internal-api-v1</artifactId>
<version>${persistence-service.version}</version>
<exclusions>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
</exclusion>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-api-v1</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,90 @@
import org.springframework.boot.gradle.tasks.bundling.BootBuildImage
plugins {
application
id("com.iqser.red.service.java-conventions")
id("org.springframework.boot") version "3.0.6"
id("io.spring.dependency-management") version "1.1.0"
id("org.sonarqube") version "4.2.1.3168"
id("io.freefair.lombok") version "8.1.0"
}
description = "redaction-service-server-v1"
val layoutParserVersion = "0.18.0"
val jacksonVersion = "2.14.2"
val droolsVersion = "8.37.0.Final"
val pdfBoxVersion = "3.0.0-alpha2"
configurations {
all {
exclude(group = "org.springframework.boot", module = "spring-boot-starter-logging")
}
}
dependencies {
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.119.0") { exclude(group = "org.springframework.boot") }
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
implementation("com.iqser.red.commons:spring-commons:2.1.0")
implementation("com.iqser.red.commons:metric-commons:2.1.0")
implementation("com.iqser.red.commons:dictionary-merge-commons:1.3.0")
implementation("com.iqser.red.commons:storage-commons:2.1.0")
implementation("com.knecon.fforesight:tenant-commons:0.10.0")
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
implementation("org.ahocorasick:ahocorasick:0.6.3")
implementation("org.javassist:javassist:3.29.2-GA")
implementation("org.drools:drools-engine:${droolsVersion}")
implementation("org.drools:drools-mvel:${droolsVersion}")
implementation("org.kie:kie-spring:7.73.0.Final")
implementation("org.locationtech.jts:jts-core:1.19.0")
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.1")
implementation("org.springframework.boot:spring-boot-starter-amqp:3.0.4")
testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
testImplementation("org.springframework.boot:spring-boot-starter-test:3.0.4")
testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") {
exclude(
group = "com.iqser.red.service",
module = "persistence-service-shared-api-v1"
)
}
}
tasks.test {
configure<JacocoTaskExtension> {
excludes = listOf("org/drools/**/*")
}
}
tasks.named<BootBuildImage>("bootBuildImage") {
imageName.set("nexus.knecon.com:5001/red/${project.name}:${project.version}")
if (project.hasProperty("buildbootDockerHostNetwork")) {
network.set("host")
}
docker {
if (project.hasProperty("buildbootDockerHostNetwork")) {
bindHostToBuilder.set(true)
}
verboseLogging.set(true)
publishRegistry {
username.set(providers.gradleProperty("mavenUser").getOrNull())
password.set(providers.gradleProperty("mavenPassword").getOrNull())
email.set(providers.gradleProperty("mavenEmail").getOrNull())
url.set("https://nexus.knecon.com:5001/")
}
}
}

View File

@ -1,217 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>4.0-SNAPSHOT</version>
</parent>
<artifactId>redaction-service-server-v1</artifactId>
<properties>
<drools.version>8.37.0.Final</drools.version>
<kie.version>7.73.0.Final</kie.version>
<locationtech.version>1.19.0</locationtech.version>
<javaassist.version>3.29.2-GA</javaassist.version>
<ahocorasick.version>0.6.3</ahocorasick.version>
<jackson.version>2.14.2</jackson.version>
<tennat-commons.version>0.10.0</tennat-commons.version>
</properties>
<dependencies>
<dependency>
<groupId>com.knecon.fforesight</groupId>
<artifactId>tenant-commons</artifactId>
<version>${tennat-commons.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>dictionary-merge-commons</artifactId>
<version>1.3.0</version>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-afterburner</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.ahocorasick</groupId>
<artifactId>ahocorasick</artifactId>
<version>${ahocorasick.version}</version>
</dependency>
<dependency>
<groupId>org.javassist</groupId>
<artifactId>javassist</artifactId>
<version>${javaassist.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-engine</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-mvel</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.kie</groupId>
<artifactId>kie-spring</artifactId>
<version>${kie.version}</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>${locationtech.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- commons -->
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>logging-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>metric-commons</artifactId>
</dependency>
<!-- other external -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
</dependency>
<!-- spring -->
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>test-commons</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
</gitDescribe>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>original-jar</id>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>original</classifier>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- repackages the generated jar into a runnable fat-jar and makes it
executable -->
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
<configuration>
<executable>true</executable>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.document.data;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.experimental.FieldDefaults;
@Data
@Builder
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class DocumentData {
DocumentPage[] documentPages;
DocumentTextData[] documentTextData;
DocumentPositionData[] documentPositionData;
DocumentStructure documentStructure;
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper;
package com.iqser.red.service.redaction.v1.server.document.data.mapper;
import java.util.Arrays;
import java.util.HashSet;
@ -7,26 +7,26 @@ import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentTreeData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.PageData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Header;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Footer;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Header;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
import lombok.experimental.UtilityClass;
@ -39,23 +39,23 @@ public class DocumentGraphMapper {
DocumentTree documentTree = new DocumentTree(document);
Context context = new Context(documentData, documentTree);
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentTreeData().getRoot().getChildren(), context));
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
document.setDocumentTree(context.documentTree);
document.setPages(new HashSet<>(context.pages));
document.setNumberOfPages(documentData.getPages().length);
document.setPages(new HashSet<>(context.pageData));
document.setNumberOfPages(documentData.getDocumentPages().length);
document.setTextBlock(document.getTextBlock());
return document;
}
private List<DocumentTree.Entry> buildEntries(List<DocumentTreeData.EntryData> entries, Context context) {
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
List<DocumentTree.Entry> newEntries = new LinkedList<>();
for (DocumentTreeData.EntryData entryData : entries) {
for (DocumentStructure.EntryData entryData : entries) {
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
@ -154,14 +154,14 @@ public class DocumentGraphMapper {
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
return AtomicTextBlock.fromAtomicTextBlockData(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)),
context.atomicPositionBlockData.get(Math.toIntExact(atomicTextBlockId)),
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
parent,
getPage(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
}
private Page buildPage(PageData p) {
private Page buildPage(DocumentPage p) {
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
}
@ -169,7 +169,7 @@ public class DocumentGraphMapper {
private Page getPage(Long pageIndex, Context context) {
return context.pages.stream()
return context.pageData.stream()
.filter(page -> page.getNumber() == Math.toIntExact(pageIndex))
.findFirst()
.orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex)));
@ -179,17 +179,17 @@ public class DocumentGraphMapper {
static final class Context {
private final DocumentTree documentTree;
private final List<Page> pages;
private final List<AtomicTextBlockData> atomicTextBlockData;
private final List<AtomicPositionBlockData> atomicPositionBlockData;
private final List<Page> pageData;
private final List<DocumentTextData> documentTextData;
private final List<DocumentPositionData> documentPositionData;
Context(DocumentData documentData, DocumentTree documentTree) {
this.documentTree = documentTree;
this.pages = new LinkedList<>();
this.atomicTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
this.atomicPositionBlockData = Arrays.stream(documentData.getAtomicPositionBlocks()).toList();
this.pageData = new LinkedList<>();
this.documentTextData = Arrays.stream(documentData.getDocumentTextData()).toList();
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()).toList();
}

View File

@ -0,0 +1,52 @@
package com.iqser.red.service.redaction.v1.server.document.data.mapper;
import java.awt.geom.Rectangle2D;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
import lombok.AccessLevel;
import lombok.experimental.FieldDefaults;
import lombok.experimental.UtilityClass;
@UtilityClass
public class PropertiesMapper {
public void parseImageProperties(Map<String, String> properties, Image.ImageBuilder builder) {
builder.imageType(ImageType.fromString(properties.get(DocumentStructure.ImageProperties.IMAGE_TYPE)));
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructure.ImageProperties.TRANSPARENT)));
builder.position(parseRectangle2D(properties.get(DocumentStructure.ImageProperties.POSITION)));
builder.id(properties.get(DocumentStructure.ImageProperties.ID));
}
public void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder builder) {
builder.row(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.ROW)));
builder.col(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.COL)));
builder.header(Boolean.parseBoolean(properties.get(DocumentStructure.TableCellProperties.HEADER)));
builder.bBox(parseRectangle2D(properties.get(DocumentStructure.TableCellProperties.B_BOX)));
}
public void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_ROWS)));
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_COLS)));
}
private Rectangle2D parseRectangle2D(String bBox) {
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
}
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
package com.iqser.red.service.redaction.v1.server.document.graph;
import static java.lang.String.format;
@ -6,7 +6,7 @@ import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.EqualsAndHashCode;
import lombok.Setter;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
package com.iqser.red.service.redaction.v1.server.document.graph;
import java.util.LinkedList;
import java.util.List;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
package com.iqser.red.service.redaction.v1.server.document.graph;
import static java.lang.String.format;
@ -7,14 +7,14 @@ import java.util.LinkedList;
import java.util.List;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.GenericSemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.GenericSemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
public enum EntityType {
ENTITY,

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.util.Collections;
import java.util.Objects;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.util.Collection;
import java.util.HashSet;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
@ -11,9 +11,9 @@ import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.AccessLevel;

View File

@ -1,9 +1,9 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.awt.geom.Rectangle2D;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
import java.util.Objects;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.awt.geom.Rectangle2D;
import java.util.Collections;
@ -10,10 +10,10 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,12 +1,12 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
public interface GenericSemanticNode extends SemanticNode {
}

View File

@ -1,12 +1,12 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,13 +1,13 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.awt.geom.Rectangle2D;
import java.util.Collections;
@ -9,12 +9,12 @@ import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRuleHolder;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRuleHolder;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,6 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.Locale;
public enum ImageType {
LOGO,
@ -10,7 +12,7 @@ public enum ImageType {
public static ImageType fromString(String imageType) {
return switch (imageType.toLowerCase()) {
return switch (imageType.toLowerCase(Locale.ROOT)) {
case "logo" -> ImageType.LOGO;
case "formula" -> ImageType.FORMULA;
case "signature" -> ImageType.SIGNATURE;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.Locale;

View File

@ -1,14 +1,13 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -48,18 +47,6 @@ public class Page {
Set<Image> images = new HashSet<>();
public static Page fromClassificationPage(ClassificationPage classificationPage) {
return Page.builder()
.height((int) classificationPage.getPageHeight())
.width((int) classificationPage.getPageWidth())
.number(classificationPage.getPageNumber())
.rotation(classificationPage.getRotation())
.mainBody(new LinkedList<>())
.build();
}
public TextBlock getMainBodyTextBlock() {
return mainBody.stream().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());

View File

@ -1,12 +1,12 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,13 +1,13 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.util.Collections;
import java.util.LinkedList;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import static java.lang.String.format;
@ -7,18 +7,19 @@ import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility;
public interface SemanticNode {
@ -371,7 +372,7 @@ public interface SemanticNode {
*/
default boolean containsStringIgnoreCase(String string) {
return getTextBlock().getSearchText().toLowerCase().contains(string.toLowerCase());
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
}

View File

@ -1,18 +1,19 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import static java.lang.String.format;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -66,7 +67,7 @@ public class Table implements SemanticNode {
*/
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase();
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
return strings.stream().map(String::toLowerCase).allMatch(rowText::contains);
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashMap;
@ -7,10 +7,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
import static java.lang.String.format;
@ -12,13 +12,12 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.factory.SearchTextWithTextPositionDto;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
@ -58,28 +57,6 @@ public class AtomicTextBlock implements TextBlock {
return lineBreaks.size() + 1;
}
public static AtomicTextBlock fromSearchTextWithTextPositionDto(SearchTextWithTextPositionDto searchTextWithTextPositionDto,
SemanticNode parent,
int stringOffset,
Long textBlockIdx,
Integer numberOnPage,
Page page) {
return AtomicTextBlock.builder()
.id(textBlockIdx)
.parent(parent)
.searchText(searchTextWithTextPositionDto.getSearchText())
.numberOnPage(numberOnPage)
.page(page)
.lineBreaks(searchTextWithTextPositionDto.getLineBreaks())
.positions(searchTextWithTextPositionDto.getPositions())
.stringIdxToPositionIdx(searchTextWithTextPositionDto.getStringCoordsToPositionCoords())
.boundary(new Boundary(stringOffset, stringOffset + searchTextWithTextPositionDto.getSearchText().length()))
.build();
}
public static AtomicTextBlock empty(Long textBlockIdx, int stringOffset, Page page, int numberOnPage, SemanticNode parent) {
return AtomicTextBlock.builder()
@ -96,8 +73,8 @@ public class AtomicTextBlock implements TextBlock {
}
public static AtomicTextBlock fromAtomicTextBlockData(AtomicTextBlockData atomicTextBlockData,
AtomicPositionBlockData atomicPositionBlockData,
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData,
DocumentPositionData atomicPositionBlockData,
SemanticNode parent,
Page page) {

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
import static java.lang.String.format;
@ -10,8 +10,8 @@ import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import lombok.AccessLevel;
import lombok.Data;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
import static java.lang.String.format;
@ -10,9 +10,9 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
public interface TextBlock extends CharSequence {

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
import java.util.Set;
import java.util.function.BiConsumer;

View File

@ -1,8 +1,6 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
package com.iqser.red.service.redaction.v1.server.document.services;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
import static com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility.anyMatch;
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
import static java.util.stream.Collectors.toMap;
@ -22,20 +20,20 @@ import org.kie.api.runtime.KieSession;
import com.google.common.base.Functions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.ConsecutiveBoundaryCollector;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility;
import com.iqser.red.service.redaction.v1.server.document.graph.ConsecutiveBoundaryCollector;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
@ -450,14 +448,14 @@ public class EntityCreationService {
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
int expandedStart = getExpandedStartByRegex(entity, regexPattern);
int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern);
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
}
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
int expandedEnd = getExpandedEndByRegex(entity, regexPattern);
int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern);
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
return byBoundary(new Boundary(entity.getBoundary().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
}

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
package com.iqser.red.service.redaction.v1.server.document.services;
import java.util.Arrays;
import java.util.List;
@ -6,8 +6,8 @@ import java.util.Objects;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;

View File

@ -1,14 +1,14 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
package com.iqser.red.service.redaction.v1.server.document.services;
import java.awt.geom.Rectangle2D;
import java.util.NoSuchElementException;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
import lombok.RequiredArgsConstructor;

View File

@ -1,4 +1,4 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
package com.iqser.red.service.redaction.v1.server.document.utils;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
@ -12,39 +12,16 @@ import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
public class RectangleTransformations {
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
PDRectangle annotationPosition = new PDRectangle();
annotationPosition.setLowerLeftX((float) rectangle2D.getMinX());
annotationPosition.setLowerLeftY((float) rectangle2D.getMinY());
annotationPosition.setUpperRightX((float) rectangle2D.getMaxX());
annotationPosition.setUpperRightY((float) rectangle2D.getMaxY());
return annotationPosition;
}
public static Rectangle2D abstractPageBlockBBox(List<AbstractPageBlock> abstractPageBlocks) {
return abstractPageBlocks.stream()
.map(abstractPageBlock -> new Rectangle2D.Double(abstractPageBlock.getMinX(),
abstractPageBlock.getMinY(),
abstractPageBlock.getWidth(),
abstractPageBlock.getHeight())).collect(new Rectangle2DBBoxCollector());
}
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
@ -114,12 +91,6 @@ public class RectangleTransformations {
}
public static Rectangle2D toRectangle2D(PDRectangle cropBox) {
return new Rectangle2D.Double(cropBox.getLowerLeftX(), cropBox.getLowerLeftY(), cropBox.getWidth(), cropBox.getHeight());
}
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
@Override

View File

@ -1,18 +1,17 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
package com.iqser.red.service.redaction.v1.server.document.utils;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
import lombok.experimental.UtilityClass;

View File

@ -1,80 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image.ImageServiceResponse;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@Service
@RequiredArgsConstructor
public class ImageServiceResponseAdapter {
private final ObjectMapper objectMapper;
private final RedactionStorageService redactionStorageService;
@SneakyThrows
public Map<Integer, List<ClassifiedImage>> convertImages(String dossierId, String fileId) {
var imageClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO));
ImageServiceResponse imageServiceResponse = objectMapper.readValue(imageClassificationStream, ImageServiceResponse.class);
Map<Integer, List<ClassifiedImage>> images = new HashMap<>();
imageServiceResponse.getData().forEach(imageMetadata -> {
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification()
.getLabel()
.toUpperCase(Locale.ROOT)) : ImageType.OTHER;
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
imageMetadata.getPosition().getY1(),
imageMetadata.getGeometry().getWidth(),
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
});
// Currently This is a copy but, it will be changed later because i don' t think that we should unclassified images.
imageServiceResponse.getDataCV().forEach(imageMetadata -> {
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification()
.getLabel()
.toUpperCase(Locale.ROOT)) : ImageType.OTHER;
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
imageMetadata.getPosition().getY1(),
imageMetadata.getGeometry().getWidth(),
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
});
return images;
}
public void findOcr(ClassificationPage page) {
page.getImages().forEach(image -> {
if (image.getImageType().equals(ImageType.OTHER)) {
page.getTextBlocks().forEach(textblock -> {
if (image.getPosition().contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
image.setImageType(ImageType.OCR);
}
});
}
});
}
}

View File

@ -1,63 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.PdfTableCell;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.TableCells;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.TableServiceResponse;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class TableServiceResponseAdapter {
private final ObjectMapper objectMapper;
private final RedactionStorageService redactionStorageService;
@SneakyThrows
public Map<Integer, List<PdfTableCell>> convertTables(String dossierId, String fileId) {
var tableClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES));
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
Map<Integer, List<PdfTableCell>> tableCells = new HashMap<>();
tableServiceResponse.getData()
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>())
.addAll(convertTableCells(tableData.getTableCells())));
return tableCells;
}
private Collection<? extends PdfTableCell> convertTableCells(List<TableCells> tableCells) {
List<PdfTableCell> pdfTableCells = new ArrayList<>();
tableCells.forEach(t -> pdfTableCells.add(PdfTableCell.builder()
.y0(t.getY0())
.x1(t.getX1())
.y1(t.getY1())
.x0(t.getX0())
.width(t.getWidth())
.height(t.getHeight())
.build()));
return pdfTableCells;
}
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import java.util.HashMap;
import java.util.Map;
import lombok.Data;
@Data
public class Classification {
private Map<String, Float> probabilities = new HashMap<>();
private String label;
}

View File

@ -1,11 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class FilterGeometry {
private ImageSize imageSize;
private ImageFormat imageFormat;
}

View File

@ -1,12 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class Filters {
private FilterGeometry geometry;
private Probability probability;
private boolean allPassed;
}

View File

@ -1,11 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class Geometry {
private float width;
private float height;
}

View File

@ -1,12 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class ImageFormat {
private float quotient;
private boolean tooTall;
private boolean tooWide;
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class ImageMetadata {
private Classification classification;
private Position position;
private Geometry geometry;
private Filters filters;
private boolean alpha;
}

View File

@ -1,46 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
@Data
public class ImageServiceResponse {
private String dossierId;
private String fileId;
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
private List<ImageMetadata> data = new ArrayList<>();
private List<ImageMetadata> dataCV = new ArrayList<>();
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
public void setData(List<ImageMetadata> data) {this.data = data;}
public List<ImageMetadata> getData() {
if (this.data == null) {
this.data = new ArrayList<>();
}
return data;
}
public List<ImageMetadata> getDataCV() {
if (this.dataCV == null) {
this.dataCV = new ArrayList<>();
}
return dataCV;
}
}

View File

@ -1,12 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class ImageSize {
private float quotient;
private boolean tooLarge;
private boolean tooSmall;
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class Position {
private float x1;
private float x2;
private float y1;
private float y2;
private int pageNumber;
}

View File

@ -1,10 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
import lombok.Data;
@Data
public class Probability {
private boolean unconfident;
}

View File

@ -1,13 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
import lombok.Data;
@Data
public class PageInfo {
private int number;
private int rotation;
private float width;
private float height;
}

View File

@ -1,21 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@RequiredArgsConstructor
public class PdfTableCell {
private float x0;
private float y0;
private float x1;
private float y1;
private float width;
private float height;
}

View File

@ -1,15 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
import lombok.Data;
@Data
public class TableCells {
private float x0;
private float y0;
private float x1;
private float y1;
private float width;
private float height;
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
import java.util.ArrayList;
import java.util.List;
import lombok.Data;
@Data
public class TableData {
private PageInfo pageInfo;
private List<TableCells> tableCells = new ArrayList<>();
}

View File

@ -1,19 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
import java.util.ArrayList;
import java.util.List;
import lombok.Data;
@Data
public class TableServiceResponse {
private String dossierId;
private String fileId;
private String operation;
private String targetFileExtension;
private String responseFileExtension;
private List<TableData> data = new ArrayList<>();
}

View File

@ -1,88 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public abstract class AbstractPageBlock {
@JsonIgnore
protected float minX;
@JsonIgnore
protected float maxX;
@JsonIgnore
protected float minY;
@JsonIgnore
protected float maxY;
@JsonIgnore
protected PageBlockType classification;
@JsonIgnore
protected int page;
int columnIndex;
@JsonIgnore
private Orientation orientation = Orientation.NONE;
public abstract String getText();
public boolean isHeadline() {
return this instanceof TextPageBlock && this.getClassification() != null && this.getClassification().isHeadline();
}
public boolean containsBlock(TextPageBlock other) {
return this.minX <= other.getPdfMinX() && this.maxX >= other.getPdfMaxX() && this.minY >= other.getPdfMinY() && this.maxY <= other.getPdfMaxY();
}
public boolean contains(AbstractPageBlock other) {
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
}
public boolean contains(Rectangle other) {
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft()
.getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
}
@JsonIgnore
public float getHeight() {
return maxY - minY;
}
@JsonIgnore
public float getWidth() {
return maxX - minX;
}
public boolean intersectsY(AbstractPageBlock atc) {
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
}
public boolean intersectsX(AbstractPageBlock atc) {
return this.minX <= atc.getMaxX() && this.maxX >= atc.getMinX();
}
}

View File

@ -1,33 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.UnclassifiedText;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class ClassificationDocument {
private List<ClassificationPage> pages = new ArrayList<>();
private List<ClassificationSection> sections = new ArrayList<>();
private List<ClassificationHeader> headers = new ArrayList<>();
private List<ClassificationFooter> footers = new ArrayList<>();
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private boolean headlines;
private SectionGrid sectionGrid = new SectionGrid();
private DictionaryVersion dictionaryVersion;
private long rulesVersion;
}

View File

@ -1,16 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class ClassificationFooter {
private List<TextPageBlock> textBlocks;
}

View File

@ -1,16 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class ClassificationHeader {
private List<TextPageBlock> textBlocks;
}

View File

@ -1,38 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class ClassificationPage {
@NonNull
private List<AbstractPageBlock> textBlocks;
private List<ClassifiedImage> images = new ArrayList<>();
private Rectangle bodyTextFrame;
private boolean landscape;
private int rotation;
private int pageNumber;
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private float pageWidth;
private float pageHeight;
}

View File

@ -1,32 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.TablePageBlock;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class ClassificationSection {
private List<AbstractPageBlock> pageBlocks = new ArrayList<>();
private List<ClassifiedImage> images = new ArrayList<>();
private String headline;
public List<TablePageBlock> getTables() {
List<TablePageBlock> tables = new ArrayList<>();
pageBlocks.forEach(block -> {
if (block instanceof TablePageBlock) {
tables.add((TablePageBlock) block);
}
});
return tables;
}
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.awt.geom.Rectangle2D;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class Column {
int index;
ColumnType columnType;
Rectangle2D bBox;
}

View File

@ -1,6 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
public enum ColumnType {
RULING,
DISTANCE
}

View File

@ -1,77 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Getter;
public class FloatFrequencyCounter {
@Getter
Map<Float, Integer> countPerValue = new HashMap<>();
public void add(float value) {
if (!countPerValue.containsKey(value)) {
countPerValue.put(value, 1);
} else {
countPerValue.put(value, countPerValue.get(value) + 1);
}
}
public void addAll(Map<Float, Integer> otherCounter) {
for (Map.Entry<Float, Integer> entry : otherCounter.entrySet()) {
if (countPerValue.containsKey(entry.getKey())) {
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
} else {
countPerValue.put(entry.getKey(), entry.getValue());
}
}
}
public Float getMostPopular() {
Map.Entry<Float, Integer> mostPopular = null;
for (Map.Entry<Float, Integer> entry : countPerValue.entrySet()) {
if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
mostPopular = entry;
}
}
return mostPopular != null ? mostPopular.getKey() : null;
}
public List<Float> getHighterThanMostPopular() {
Float mostPopular = getMostPopular();
List<Float> higher = new ArrayList<>();
for (Float value : countPerValue.keySet()) {
if (value > mostPopular) {
higher.add(value);
}
}
return higher.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
}
public Float getHighest() {
Float highest = null;
for (Float value : countPerValue.keySet()) {
if (highest == null || value > highest) {
highest = value;
}
}
return highest;
}
}

View File

@ -1,8 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
public enum Orientation {
NONE,
LEFT,
RIGHT
}

View File

@ -1,38 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
public enum PageBlockType {
H1,
H2,
H3,
H4,
H5,
H6,
HEADER,
FOOTER,
TITLE,
PARAGRAPH,
PARAGRAPH_BOLD,
PARAGRAPH_ITALIC,
PARAGRAPH_UNKNOWN,
OTHER,
TABLE;
public static PageBlockType getHeadlineType(int i) {
return switch (i) {
case 1 -> PageBlockType.H1;
case 2 -> PageBlockType.H2;
case 3 -> PageBlockType.H3;
case 4 -> PageBlockType.H4;
case 5 -> PageBlockType.H5;
default -> PageBlockType.H6;
};
}
public boolean isHeadline() {
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
}
}

View File

@ -1,25 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image;
import java.awt.geom.Rectangle2D;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class ClassifiedImage {
@NonNull
private Rectangle2D position;
@NonNull
private ImageType imageType;
private boolean isAppendedToSection;
@NonNull
private boolean hasTransparency;
@NonNull
private int page;
}

View File

@ -1,79 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
@SuppressWarnings("serial")
@Data
@EqualsAndHashCode(callSuper = true)
@NoArgsConstructor
public class Cell extends Rectangle {
private List<TextPageBlock> textBlocks = new ArrayList<>();
private List<Cell> headerCells = new ArrayList<>();
private boolean isHeaderCell;
private static final int MIN_SIZE = 1;
private int pageNumber;
public Cell(Point2D topLeft, Point2D bottomRight) {
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
}
public void addTextBlock(TextPageBlock textBlock) {
textBlocks.add(textBlock);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Iterator<TextPageBlock> itty = textBlocks.iterator();
TextPositionSequence previous = null;
while (itty.hasNext()) {
TextPageBlock textBlock = itty.next();
for (TextPositionSequence word : textBlock.getSequences()) {
if (previous != null) {
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
}
public boolean hasMinimumSize() {
return this.getHeight() >= MIN_SIZE && this.getWidth() >= MIN_SIZE;
}
}

View File

@ -1,22 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import lombok.RequiredArgsConstructor;
import lombok.Value;
@Value
@RequiredArgsConstructor
public class CellPosition implements Comparable<CellPosition> {
int row;
int col;
@Override
public int compareTo(CellPosition other) {
int rowDiff = row - other.row;
return rowDiff != 0 ? rowDiff : col - other.col;
}
}

View File

@ -1,15 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import java.util.List;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class CleanRulings {
List<Ruling> horizontal;
List<Ruling> vertical;
}

View File

@ -1,218 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.Comparator;
import java.util.List;
@SuppressWarnings("all")
public class Rectangle extends Rectangle2D.Float {
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
/**
* Ill-defined comparator, from when Rectangle was Comparable.
* <p>
* see https://github.com/tabulapdf/tabula-java/issues/116
*
* @deprecated with no replacement
*/
@Deprecated
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
@Override
public int compare(Rectangle o1, Rectangle o2) {
if (o1.equals(o2)) {
return 0;
}
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 ? -java.lang.Double.compare(o1.getX(), o2.getX()) : java.lang.Double.compare(o1.getX(), o2.getX());
} else {
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
}
}
};
public Rectangle() {
super();
}
public Rectangle(float top, float left, float width, float height) {
super();
this.setRect(left, top, width, height);
}
/**
* @param rectangles
* @return minimum bounding box that contains all the rectangles
*/
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
float minx = java.lang.Float.MAX_VALUE;
float miny = java.lang.Float.MAX_VALUE;
float maxx = java.lang.Float.MIN_VALUE;
float maxy = java.lang.Float.MIN_VALUE;
for (Rectangle r : rectangles) {
minx = (float) Math.min(r.getMinX(), minx);
miny = (float) Math.min(r.getMinY(), miny);
maxx = (float) Math.max(r.getMaxX(), maxx);
maxy = (float) Math.max(r.getMaxY(), maxy);
}
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
}
public int compareTo(Rectangle other) {
return ILL_DEFINED_ORDER.compare(this, other);
}
// I'm bad at Java and need this for fancy sorting in
// technology.tabula.TextChunk.
public int isLtrDominant() {
return 0;
}
public float getArea() {
return this.width * this.height;
}
public float verticalOverlap(Rectangle other) {
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
}
public boolean verticallyOverlaps(Rectangle other) {
return verticalOverlap(other) > 0;
}
public float horizontalOverlap(Rectangle other) {
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
}
public boolean horizontallyOverlaps(Rectangle other) {
return horizontalOverlap(other) > 0;
}
public float verticalOverlapRatio(Rectangle other) {
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) {
rv = (other.getBottom() - this.getTop()) / delta;
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) {
rv = (this.getBottom() - other.getTop()) / delta;
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) {
rv = (other.getBottom() - other.getTop()) / delta;
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) {
rv = (this.getBottom() - this.getTop()) / delta;
}
return rv;
}
public float overlapRatio(Rectangle other) {
double intersectionWidth = Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
double intersectionHeight = Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
double unionArea = this.getArea() + other.getArea() - intersectionArea;
return (float) (intersectionArea / unionArea);
}
public Rectangle merge(Rectangle other) {
this.setRect(this.createUnion(other));
return this;
}
public float getTop() {
return (float) this.getMinY();
}
public void setTop(float top) {
float deltaHeight = top - this.y;
this.setRect(this.x, top, this.width, this.height - deltaHeight);
}
public float getRight() {
return (float) this.getMaxX();
}
public void setRight(float right) {
this.setRect(this.x, this.y, right - this.x, this.height);
}
public float getLeft() {
return (float) this.getMinX();
}
public void setLeft(float left) {
float deltaWidth = left - this.x;
this.setRect(left, this.y, this.width - deltaWidth, this.height);
}
public float getBottom() {
return (float) this.getMaxY();
}
public void setBottom(float bottom) {
this.setRect(this.x, this.y, this.width, bottom - this.y);
}
public Point2D[] getPoints() {
return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()), new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(),
this.getBottom()), new Point2D.Float(this.getLeft(), this.getBottom())};
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
String s = super.toString();
sb.append(s.substring(0, s.length() - 1));
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
return sb.toString();
}
}

View File

@ -1,437 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Formatter;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.CohenSutherlandClipping;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.Utils;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@SuppressWarnings("all")
public class Ruling extends Line2D.Float {
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
public Ruling(Point2D p1, Point2D p2) {
super(p1, p2);
}
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
ArrayList<Ruling> rv = new ArrayList<>();
for (Ruling r : rulings) {
if (r.intersects(area)) {
rv.add(r.intersect(area));
}
}
return rv;
}
// log(n) implementation of find_intersections
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
class SortObject {
protected SOType type;
protected float position;
protected Ruling ruling;
public SortObject(SOType type, float position, Ruling ruling) {
this.type = type;
this.position = position;
this.ruling = ruling;
}
}
List<SortObject> sos = new ArrayList<>();
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
@Override
public int compare(Ruling o1, Ruling o2) {
return java.lang.Double.compare(o1.getTop(), o2.getTop());
}
});
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
@Override
public int compare(Point2D o1, Point2D o2) {
if (o1.getY() > o2.getY()) {
return 1;
}
if (o1.getY() < o2.getY()) {
return -1;
}
if (o1.getX() > o2.getX()) {
return 1;
}
if (o1.getX() < o2.getX()) {
return -1;
}
return 0;
}
});
for (Ruling h : horizontals) {
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
}
for (Ruling v : verticals) {
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
}
Collections.sort(sos, new Comparator<SortObject>() {
@Override
public int compare(SortObject a, SortObject b) {
int rv;
if (Utils.feq(a.position, b.position)) {
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
rv = 1;
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
rv = -1;
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
rv = -1;
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
rv = 1;
} else {
rv = java.lang.Double.compare(a.position, b.position);
}
} else {
return java.lang.Double.compare(a.position, b.position);
}
return rv;
}
});
for (SortObject so : sos) {
switch (so.type) {
case VERTICAL:
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
try {
Point2D i = h.getKey().intersectionPoint(so.ruling);
if (i == null) {
continue;
}
rv.put(i, new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
} catch (UnsupportedOperationException e) {
log.info("Some line are oblique, ignoring...");
continue;
}
}
break;
case HRIGHT:
tree.remove(so.ruling);
break;
case HLEFT:
tree.put(so.ruling, true);
break;
}
}
return rv;
}
public boolean vertical() {
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
public boolean horizontal() {
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
// attributes that make sense only for non-oblique lines
// these are used to have a single collapse method (in page, currently)
public boolean oblique() {
return !(this.vertical() || this.horizontal());
}
public float getPosition() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getLeft() : this.getTop();
}
public float getStart() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getTop() : this.getLeft();
}
public void setStart(float v) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setTop(v);
} else {
this.setLeft(v);
}
}
public float getEnd() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getBottom() : this.getRight();
}
public void setEnd(float v) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setBottom(v);
} else {
this.setRight(v);
}
}
public void setStartEnd(float start, float end) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setTop(start);
this.setBottom(end);
} else {
this.setLeft(start);
this.setRight(end);
}
}
public boolean perpendicularTo(Ruling other) {
return this.vertical() == other.horizontal();
}
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
if (this.intersectsLine(another)) {
return true;
}
boolean rv = false;
if (this.perpendicularTo(another)) {
rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another);
} else {
rv = this.expand(colinearOrParallelExpandAmount).intersectsLine(another.expand(colinearOrParallelExpandAmount));
}
return rv;
}
public double length() {
return Math.sqrt(Math.pow(this.x1 - this.x2, 2) + Math.pow(this.y1 - this.y2, 2));
}
public Ruling intersect(Rectangle2D clip) {
Float clipee = (Float) this.clone();
boolean clipped = new CohenSutherlandClipping(clip).clip(clipee);
if (clipped) {
return new Ruling(clipee.getP1(), clipee.getP2());
} else {
return this;
}
}
public Ruling expand(float amount) {
Ruling r = (Ruling) this.clone();
try {
r.setStart(this.getStart() - amount);
r.setEnd(this.getEnd() + amount);
} catch (UnsupportedOperationException e) {
log.warn("Could not expand ruling!");
}
return r;
}
public Point2D intersectionPoint(Ruling other) {
Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
Ruling horizontal, vertical;
if (!this_l.intersectsLine(other_l)) {
return null;
}
if (this_l.horizontal() && other_l.vertical()) {
horizontal = this_l;
vertical = other_l;
} else if (this_l.vertical() && other_l.horizontal()) {
vertical = this_l;
horizontal = other_l;
} else {
log.warn("lines must be orthogonal, vertical and horizontal");
return null;
}
return new Point2D.Float(vertical.getLeft(), horizontal.getTop());
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (!(other instanceof Ruling)) {
return false;
}
Ruling o = (Ruling) other;
return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2());
}
@Override
public int hashCode() {
return super.hashCode();
}
public float getTop() {
return this.y1;
}
public void setTop(float v) {
setLine(this.getLeft(), v, this.getRight(), this.getBottom());
}
public float getLeft() {
return this.x1;
}
public void setLeft(float v) {
setLine(v, this.getTop(), this.getRight(), this.getBottom());
}
public float getBottom() {
return this.y2;
}
public void setBottom(float v) {
setLine(this.getLeft(), this.getTop(), this.getRight(), v);
}
public float getRight() {
return this.x2;
}
public void setRight(float v) {
setLine(this.getLeft(), this.getTop(), v, this.getBottom());
}
public float getWidth() {
return this.getRight() - this.getLeft();
}
public float getHeight() {
return this.getBottom() - this.getTop();
}
public double getAngle() {
double angle = Math.toDegrees(Math.atan2(this.getP2().getY() - this.getP1().getY(), this.getP2().getX() - this.getP1().getX()));
if (angle < 0) {
angle += 360;
}
return angle;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Formatter formatter = new Formatter(sb);
String rv = formatter.format("%s[minX=%f minY=%f maxX=%f maxY=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString();
formatter.close();
return rv;
}
private enum SOType {
VERTICAL,
HRIGHT,
HLEFT
}
}

View File

@ -1,342 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.collections4.CollectionUtils;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.PageBlockType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class TablePageBlock extends AbstractPageBlock {
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
private final int rotation;
@Getter
@Setter
private String headline;
private int unrotatedRowCount;
private int unrotatedColCount;
private List<List<Cell>> rows;
public TablePageBlock(List<Cell> cells, Rectangle area, int rotation) {
addCells(cells);
minX = area.getLeft();
minY = area.getBottom();
maxX = area.getRight();
maxY = area.getTop();
classification = PageBlockType.TABLE;
this.rotation = rotation;
}
public List<List<Cell>> getRows() {
if (rows == null) {
rows = computeRows();
// Ignore rows that does not contain any cells and values.
List<List<Cell>> rowsToRemove = new ArrayList<>();
for (List<Cell> row : rows) {
if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) {
rowsToRemove.add(row);
}
}
rows.removeAll(rowsToRemove);
computeHeaders();
}
return rows;
}
public int getRowCount() {
return getRows().size();
}
public int getColCount() {
return getRows().stream().mapToInt(List::size).max().orElse(0);
}
/**
* Detect header cells (either first row or first column):
* Column is marked as header if cell text is bold and row cell text is not bold.
* Defaults to row.
*/
private void computeHeaders() {
if (rows == null) {
rows = computeRows();
}
// A bold cell is a header cell as long as every cell to the left/top is bold, too
// we move from left to right and top to bottom
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
List<Cell> rowCells = rows.get(rowIndex);
if (rowCells.size() == 1) {
continue;
}
for (int colIndex = 0; colIndex < rowCells.size(); colIndex++) {
Cell cell = rowCells.get(colIndex);
List<Cell> cellsToTheLeft = rowCells.subList(0, colIndex);
Cell lastHeaderCell = null;
for (Cell leftCell : cellsToTheLeft) {
if (leftCell.isHeaderCell()) {
lastHeaderCell = leftCell;
} else {
break;
}
}
if (lastHeaderCell != null) {
cell.getHeaderCells().add(lastHeaderCell);
}
List<Cell> cellsToTheTop = new ArrayList<>();
for (int i = 0; i < rowIndex; i++) {
try {
cellsToTheTop.add(rows.get(i).get(colIndex));
} catch (IndexOutOfBoundsException e) {
log.debug("No cell {} in row {}, ignoring.", colIndex, rowIndex);
}
}
for (Cell topCell : cellsToTheTop) {
if (topCell.isHeaderCell()) {
lastHeaderCell = topCell;
} else {
break;
}
}
if (lastHeaderCell != null) {
cell.getHeaderCells().add(lastHeaderCell);
}
if (CollectionUtils.isNotEmpty(cell.getTextBlocks()) && cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
cell.setHeaderCell(true);
}
}
}
}
private List<List<Cell>> computeRows() {
List<List<Cell>> rows = new ArrayList<>();
if (rotation == 90) {
for (int i = 0; i < unrotatedColCount; i++) { // rows
List<Cell> lastRow = new ArrayList<>();
for (int j = unrotatedRowCount - 1; j >= 0; j--) { // cols
Cell cell = cells.get(new CellPosition(j, i));
if (cell != null) {
lastRow.add(cell);
}
}
rows.add(lastRow);
}
} else if (rotation == 270) {
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
List<Cell> lastRow = new ArrayList<>();
for (int j = 0; j < unrotatedRowCount; j++) { // cols
Cell cell = cells.get(new CellPosition(j, i));
if (cell != null) {
lastRow.add(cell);
}
}
rows.add(lastRow);
}
} else {
for (int i = 0; i < unrotatedRowCount; i++) {
List<Cell> lastRow = new ArrayList<>();
for (int j = 0; j < unrotatedColCount; j++) {
Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
if (cell != null) {
lastRow.add(cell);
}
}
rows.add(lastRow);
}
}
return rows;
}
private void add(Cell chunk, int row, int col) {
unrotatedRowCount = Math.max(unrotatedRowCount, row + 1);
unrotatedColCount = Math.max(unrotatedColCount, col + 1);
CellPosition cp = new CellPosition(row, col);
cells.put(cp, chunk);
}
private void addCells(List<Cell> cells) {
if (cells.isEmpty()) {
return;
}
cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1);
List<List<Cell>> rowsOfCells = calculateStructure(cells);
for (int i = 0; i < rowsOfCells.size(); i++) {
for (int j = 0; j < rowsOfCells.get(i).size(); j++) {
add(rowsOfCells.get(i).get(j), i, j);
}
}
}
/**
* Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted.
*
* @param cells The found cells
* @return TablePageBlock Structure
*/
private List<List<Cell>> calculateStructure(List<Cell> cells) {
List<List<Cell>> matrix = new ArrayList<>();
if (cells.isEmpty()) {
return matrix;
}
Set<Float> uniqueX = new HashSet<>();
Set<Float> uniqueY = new HashSet<>();
cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> {
uniqueX.add(c.getLeft());
uniqueX.add(c.getRight());
uniqueY.add(c.getBottom());
uniqueY.add(c.getTop());
});
var sortedUniqueX = uniqueX.stream().sorted().toList();
var sortedUniqueY = uniqueY.stream().sorted().toList();
Float prevY = null;
for (Float y : sortedUniqueY) {
List<Cell> row = new ArrayList<>();
Float prevX = null;
for (Float x : sortedUniqueX) {
if (prevY != null && prevX != null) {
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst();
intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks()));
if (cell.hasMinimumSize()) {
row.add(cell);
}
}
prevX = x;
}
if (prevY != null && prevX != null && !row.isEmpty()) {
matrix.add(row);
}
prevY = y;
}
Collections.reverse(matrix);
return matrix;
}
@Override
public String getText() {
StringBuilder sb = new StringBuilder();
List<List<Cell>> rows = getRows();
int i = 0;
for (List<Cell> row : rows) {
if (i != 0) {
sb.append("\n");
}
if (!row.isEmpty()) {
boolean firstColumn = true;
for (Cell column : row) {
if (!firstColumn) {
sb.append(",");
}
if (column != null && column.getTextBlocks() != null) {
boolean first = true;
for (TextPageBlock textBlock : column.getTextBlocks()) {
if (!first) {
sb.append("\n");
}
sb.append('\"').append(textBlock.getText().replaceAll("\"", "\\\"")).append('\"');
first = false;
}
}
firstColumn = false;
}
}
i++;
}
return sb.toString();
}
public String getTextAsHtml() {
StringBuilder sb = new StringBuilder();
List<List<Cell>> rows = getRows();
sb.append("<table border=\"1\">");
int i = 0;
for (List<Cell> row : rows) {
sb.append("\n<tr>");
if (!row.isEmpty()) {
for (Cell column : row) {
sb.append(i == 0 ? "\n<th>" : "\n<td>");
if (column != null && column.getTextBlocks() != null) {
boolean first = true;
for (TextPageBlock textBlock : column.getTextBlocks()) {
if (!first) {
sb.append("<br />");
}
sb.append(textBlock.getText().replaceAll("\\n", "<br />"));
first = false;
}
}
sb.append(i == 0 ? "</th>" : "</td>");
}
}
sb.append("</tr>");
i++;
}
sb.append("</table>");
return sb.toString();
}
}

View File

@ -1,100 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.beans.BeanUtils;
import com.fasterxml.jackson.annotation.JsonIgnore;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.SneakyThrows;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class RedTextPosition {
private String textMatrix;
private float[] position;
@JsonIgnore
private int rotation;
@JsonIgnore
private float pageHeight;
@JsonIgnore
private float pageWidth;
private String unicode;
@JsonIgnore
private float dir;
// not used in reanalysis
@JsonIgnore
private float widthOfSpace;
// not used in reanalysis
@JsonIgnore
private float fontSizeInPt;
// not used in reanalysis
@JsonIgnore
private String fontName;
@SneakyThrows
public static RedTextPosition fromTextPosition(TextPosition textPosition) {
var pos = new RedTextPosition();
BeanUtils.copyProperties(textPosition, pos);
pos.setFontName(textPosition.getFont().getName());
pos.setFontSizeInPt(textPosition.getFontSizeInPt());
pos.setTextMatrix(textPosition.getTextMatrix().toString());
var position = new float[4];
position[0] = textPosition.getXDirAdj();
position[1] = textPosition.getYDirAdj();
position[2] = textPosition.getWidthDirAdj();
position[3] = textPosition.getHeightDir();
pos.setPosition(position);
return pos;
}
@JsonIgnore
public float getXDirAdj() {
return position[0];
}
@JsonIgnore
public float getYDirAdj() {
return position[1];
}
@JsonIgnore
public float getWidthDirAdj() {
return position[2];
}
@JsonIgnore
public float getHeightDir() {
return position[3];
}
}

View File

@ -1,49 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.Getter;
public class SearchableText {
@Getter
private final List<TextPositionSequence> sequences = new ArrayList<>();
public void add(TextPositionSequence textPositionSequence) {
sequences.add(textPositionSequence);
}
public void addAll(List<TextPositionSequence> textPositionSequences) {
sequences.addAll(textPositionSequences);
}
@Override
public String toString() {
return buildString(sequences);
}
public static String buildString(List<TextPositionSequence> sequences) {
StringBuilder sb = new StringBuilder();
for (TextPositionSequence word : sequences) {
sb.append(word);
sb.append(' ');
}
String text = sb.toString();
text = TextNormalizationUtilities.removeHyphenLineBreaks(text);
text = TextNormalizationUtilities.removeLineBreaks(text);
text = TextNormalizationUtilities.removeRepeatingWhitespaces(text);
return text;
}
}

View File

@ -1,17 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class SimplifiedSectionText {
private int sectionNumber;
private String text;
}

View File

@ -1,20 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.util.ArrayList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class SimplifiedText {
private int numberOfPages;
private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
}

View File

@ -1,47 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
public class StringFrequencyCounter {
@Getter
private final Map<String, Integer> countPerValue = new HashMap<>();
public void add(String value) {
if (!countPerValue.containsKey(value)) {
countPerValue.put(value, 1);
} else {
countPerValue.put(value, countPerValue.get(value) + 1);
}
}
public void addAll(Map<String, Integer> otherCounter) {
for (Map.Entry<String, Integer> entry : otherCounter.entrySet()) {
if (countPerValue.containsKey(entry.getKey())) {
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
} else {
countPerValue.put(entry.getKey(), entry.getValue());
}
}
}
public String getMostPopular() {
Map.Entry<String, Integer> mostPopular = null;
for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
if (mostPopular == null || entry.getValue() > mostPopular.getValue()) {
mostPopular = entry;
}
}
return mostPopular != null ? mostPopular.getKey() : null;
}
}

View File

@ -1,47 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonValue;
import lombok.Getter;
@Getter
public enum TextDirection {
ZERO(0f),
QUARTER_CIRCLE(90f),
HALF_CIRCLE(180f),
THREE_QUARTER_CIRCLE(270f);
public static final String VALUE_STRING_SUFFIX = "°";
@JsonValue
private final float degrees;
private final float radians;
TextDirection(float degreeValue) {
degrees = degreeValue;
radians = (float) Math.toRadians(degreeValue);
}
@Override
public String toString() {
return degrees + VALUE_STRING_SUFFIX;
}
@JsonCreator(mode = JsonCreator.Mode.DELEGATING)
public static TextDirection fromDegrees(float degrees) {
for (var dir : TextDirection.values()) {
if (degrees == dir.degrees) {
return dir;
}
}
throw new IllegalArgumentException(String.format("A value of %f is not supported by TextDirection", degrees));
}
}

View File

@ -1,302 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.PageBlockType;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@AllArgsConstructor
@Builder
@Data
@NoArgsConstructor
public class TextPageBlock extends AbstractPageBlock {
@Builder.Default
private List<TextPositionSequence> sequences = new ArrayList<>();
@JsonIgnore
private int rotation;
@JsonIgnore
private String mostPopularWordFont;
@JsonIgnore
private String mostPopularWordStyle;
@JsonIgnore
private float mostPopularWordFontSize;
@JsonIgnore
private float mostPopularWordHeight;
@JsonIgnore
private float mostPopularWordSpaceWidth;
@JsonIgnore
private float highestFontSize;
@JsonIgnore
private PageBlockType classification;
@JsonIgnore
public TextDirection getDir() {
return sequences.get(0).getDir();
}
@JsonIgnore
private float getPageHeight() {
return sequences.get(0).getPageHeight();
}
@JsonIgnore
private float getPageWidth() {
return sequences.get(0).getPageWidth();
}
/**
* Returns the minX value in pdf coordinate system.
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
* 0 -> LowerLeft
* 90 -> UpperLeft
* 180 -> UpperRight
* 270 -> LowerRight
*
* @return the minX value in pdf coordinate system
*/
@JsonIgnore
public float getPdfMinX() {
if (getDir().getDegrees() == 90) {
return minY;
} else if (getDir().getDegrees() == 180) {
return getPageWidth() - maxX;
} else if (getDir().getDegrees() == 270) {
return getPageWidth() - maxY;
} else {
return minX;
}
}
/**
* Returns the maxX value in pdf coordinate system.
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
* 0 -> LowerLeft
* 90 -> UpperLeft
* 180 -> UpperRight
* 270 -> LowerRight
*
* @return the maxX value in pdf coordinate system
*/
@JsonIgnore
public float getPdfMaxX() {
if (getDir().getDegrees() == 90) {
return maxY;
} else if (getDir().getDegrees() == 180) {
return getPageWidth() - minX;
} else if (getDir().getDegrees() == 270) {
return getPageWidth() - minY;
} else {
return maxX;
}
}
/**
* Returns the minY value in pdf coordinate system.
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
* 0 -> LowerLeft
* 90 -> UpperLeft
* 180 -> UpperRight
* 270 -> LowerRight
*
* @return the minY value in pdf coordinate system
*/
@JsonIgnore
public float getPdfMinY() {
if (getDir().getDegrees() == 90) {
return minX;
} else if (getDir().getDegrees() == 180) {
return maxY;
} else if (getDir().getDegrees() == 270) {
return getPageHeight() - maxX;
} else {
return getPageHeight() - maxY;
}
}
/**
* Returns the maxY value in pdf coordinate system.
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
* 0 -> LowerLeft
* 90 -> UpperLeft
* 180 -> UpperRight
* 270 -> LowerRight
*
* @return the maxY value in pdf coordinate system
*/
@JsonIgnore
public float getPdfMaxY() {
if (getDir().getDegrees() == 90) {
return maxX;
} else if (getDir().getDegrees() == 180) {
return minY;
} else if (getDir().getDegrees() == 270) {
return getPageHeight() - minX;
} else {
return getPageHeight() - minY;
}
}
public TextPageBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
this.minX = minX;
this.maxX = maxX;
this.minY = minY;
this.maxY = maxY;
this.sequences = sequences;
this.rotation = rotation;
}
public TextPageBlock union(TextPositionSequence r) {
TextPageBlock union = this.copy();
union.add(r);
return union;
}
public TextPageBlock union(TextPageBlock r) {
TextPageBlock union = this.copy();
union.add(r);
return union;
}
public void add(TextPageBlock r) {
if (r.getMinX() < minX) {
minX = r.getMinX();
}
if (r.getMaxX() > maxX) {
maxX = r.getMaxX();
}
if (r.getMinY() < minY) {
minY = r.getMinY();
}
if (r.getMaxY() > maxY) {
maxY = r.getMaxY();
}
sequences.addAll(r.getSequences());
}
public void add(TextPositionSequence r) {
if (r.getMinXDirAdj() < minX) {
minX = r.getMinXDirAdj();
}
if (r.getMaxXDirAdj() > maxX) {
maxX = r.getMaxXDirAdj();
}
if (r.getMinYDirAdj() < minY) {
minY = r.getMinYDirAdj();
}
if (r.getMaxYDirAdj() > maxY) {
maxY = r.getMaxYDirAdj();
}
}
public TextPageBlock copy() {
return new TextPageBlock(minX, maxX, minY, maxY, sequences, rotation);
}
public void resize(float x1, float y1, float width, float height) {
set(x1, y1, x1 + width, y1 + height);
}
public void set(float x1, float y1, float x2, float y2) {
this.minX = Math.min(x1, x2);
this.maxX = Math.max(x1, x2);
this.minY = Math.min(y1, y2);
this.maxY = Math.max(y1, y2);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < sequences.size(); i++) {
String sequenceAsString = sequences.get(i).toString();
// Fix for missing Whitespace. This is recognized in getSequences method. See PDFTextStripper Line 1730.
if (i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) != ' ' && sequenceAsString.charAt(0) != ' ') {
builder.append(' ');
}
builder.append(sequenceAsString);
}
return builder.toString();
}
@Override
@JsonIgnore
public String getText() {
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
}
}

View File

@ -1,301 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.awt.geom.AffineTransform;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.text.TextPosition;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonIgnoreProperties({"empty"})
public class TextPositionSequence implements CharSequence {
public static final int HEIGHT_PADDING = 2;
private int page;
private List<RedTextPosition> textPositions = new ArrayList<>();
private TextDirection dir;
private int rotation;
private float pageHeight;
private float pageWidth;
public TextPositionSequence(int page) {
this.page = page;
}
public TextPositionSequence(List<TextPosition> textPositions, int page) {
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
this.page = page;
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
this.rotation = textPositions.get(0).getRotation();
this.pageHeight = textPositions.get(0).getPageHeight();
this.pageWidth = textPositions.get(0).getPageWidth();
}
@Override
public int length() {
return textPositions.size();
}
@Override
public char charAt(int index) {
RedTextPosition textPosition = textPositionAt(index);
String text = textPosition.getUnicode();
return text.charAt(0);
}
public char charAt(int index, boolean caseInSensitive) {
RedTextPosition textPosition = textPositionAt(index);
String text = textPosition.getUnicode();
return caseInSensitive ? text.toLowerCase().charAt(0) : text.charAt(0);
}
@Override
public TextPositionSequence subSequence(int start, int end) {
var textPositionSequence = new TextPositionSequence();
textPositionSequence.textPositions = textPositions.subList(start, end);
textPositionSequence.page = page;
textPositionSequence.dir = dir;
textPositionSequence.rotation = rotation;
textPositionSequence.pageHeight = pageHeight;
textPositionSequence.pageWidth = pageWidth;
return textPositionSequence;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder(length());
for (int i = 0; i < length(); i++) {
builder.append(charAt(i));
}
return builder.toString();
}
public RedTextPosition textPositionAt(int index) {
return textPositions.get(index);
}
public void add(TextPositionSequence textPositionSequence, RedTextPosition textPosition) {
this.textPositions.add(textPosition);
this.page = textPositionSequence.getPage();
this.dir = textPositionSequence.getDir();
this.rotation = textPositionSequence.getRotation();
this.pageHeight = textPositionSequence.getPageHeight();
this.pageWidth = textPositionSequence.getPageWidth();
}
public void add(TextPosition textPosition) {
this.textPositions.add(RedTextPosition.fromTextPosition(textPosition));
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
this.rotation = textPositions.get(0).getRotation();
this.pageHeight = textPositions.get(0).getPageHeight();
this.pageWidth = textPositions.get(0).getPageWidth();
}
/**
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
*
* @return the text direction adjusted minX value
*/
@JsonIgnore
public float getMinXDirAdj() {
return textPositions.get(0).getXDirAdj();
}
/**
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
*
* @return the text direction adjusted maxX value
*/
@JsonIgnore
public float getMaxXDirAdj() {
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + HEIGHT_PADDING;
}
/**
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
*
* @return the text direction adjusted minY value. The upper border of the bounding box of the word.
*/
@JsonIgnore
public float getMinYDirAdj() {
return textPositions.get(0).getYDirAdj() - getTextHeight();
}
/**
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
*
* @return the text direction adjusted maxY value. The lower border of the bounding box of the word.
*/
@JsonIgnore
public float getMaxYDirAdj() {
return textPositions.get(0).getYDirAdj();
}
@JsonIgnore
public float getTextHeight() {
return textPositions.get(0).getHeightDir() + HEIGHT_PADDING;
}
@JsonIgnore
public float getHeight() {
return getMaxYDirAdj() - getMinYDirAdj();
}
@JsonIgnore
public float getWidth() {
return getMaxXDirAdj() - getMinXDirAdj();
}
@JsonIgnore
public String getFont() {
return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
}
@JsonIgnore
public String getFontStyle() {
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase();
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
return "bold, italic";
} else if (lowercaseFontName.contains("bold")) {
return "bold";
} else if (lowercaseFontName.contains("italic")) {
return "italic";
} else {
return "standard";
}
}
@JsonIgnore
public float getFontSize() {
return textPositions.get(0).getFontSizeInPt();
}
@JsonIgnore
public float getSpaceWidth() {
return textPositions.get(0).getWidthOfSpace();
}
/**
* This returns the bounding box of the word in Pdf Coordinate System where {0,0} rotated with the page rotation.
* 0 -> LowerLeft
* 90 -> UpperLeft
* 180 -> UpperRight
* 270 -> LowerRight
*
* @return bounding box of the word in Pdf Coordinate System
*/
@JsonIgnore
@SneakyThrows
public Rectangle getRectangle() {
log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, this, rotation, dir);
float textHeight = getTextHeight();
RedTextPosition firstTextPos = textPositions.get(0);
RedTextPosition lastTextPos = textPositions.get(textPositions.size() - 1);
Point2D bottomLeft = new Point2D.Double(firstTextPos.getXDirAdj(), firstTextPos.getYDirAdj() - HEIGHT_PADDING);
Point2D topRight = new Point2D.Double(lastTextPos.getXDirAdj() + lastTextPos.getWidthDirAdj(), lastTextPos.getYDirAdj() + textHeight + HEIGHT_PADDING);
AffineTransform transform = new AffineTransform();
if (dir == TextDirection.ZERO || dir == TextDirection.HALF_CIRCLE) {
transform.rotate(dir.getRadians(), pageWidth / 2f, pageHeight / 2f);
transform.translate(0f, pageHeight + textHeight);
transform.scale(1., -1.);
} else if (dir == TextDirection.QUARTER_CIRCLE) {
transform.rotate(dir.getRadians(), pageWidth / 2f, pageWidth / 2f);
transform.translate(0f, pageWidth + textHeight);
transform.scale(1., -1.);
} else {
transform.rotate(dir.getRadians(), pageHeight / 2f, pageHeight / 2f);
transform.translate(0f, pageWidth + textHeight);
transform.scale(1., -1.);
}
bottomLeft = transform.transform(bottomLeft, null);
topRight = transform.transform(topRight, null);
return new Rectangle( //
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
(float) (topRight.getX() - bottomLeft.getX()),
(float) (topRight.getY() - bottomLeft.getY()),
page);
}
}

View File

@ -1,14 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Data;
@Data
@AllArgsConstructor
public class UnclassifiedText {
private List<TextPageBlock> textBlocks;
}

View File

@ -1,384 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.fontbox.util.BoundingBox;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.contentstream.operator.text.BeginText;
import org.apache.pdfbox.contentstream.operator.text.EndText;
import org.apache.pdfbox.contentstream.operator.text.MoveText;
import org.apache.pdfbox.contentstream.operator.text.MoveTextSetLeading;
import org.apache.pdfbox.contentstream.operator.text.NextLine;
import org.apache.pdfbox.contentstream.operator.text.SetCharSpacing;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.contentstream.operator.text.SetTextHorizontalScaling;
import org.apache.pdfbox.contentstream.operator.text.SetTextLeading;
import org.apache.pdfbox.contentstream.operator.text.SetTextRenderingMode;
import org.apache.pdfbox.contentstream.operator.text.SetTextRise;
import org.apache.pdfbox.contentstream.operator.text.SetWordSpacing;
import org.apache.pdfbox.contentstream.operator.text.ShowText;
import org.apache.pdfbox.contentstream.operator.text.ShowTextAdjusted;
import org.apache.pdfbox.contentstream.operator.text.ShowTextLine;
import org.apache.pdfbox.contentstream.operator.text.ShowTextLineAndSpace;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.font.PDType3Font;
import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.text.TextPosition;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
/**
* LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
* <p>
* This class exists only so that we don't break the code of users who have their own subclasses of
* PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine
* with a heuristic implementation which is backwards compatible.
* <p>
* DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
* THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD.
*/
@SuppressWarnings({"PMD", "checkstyle:all"})
class LegacyPDFStreamEngine extends PDFStreamEngine {
private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class);
private int pageRotation;
private PDRectangle pageSize;
private Matrix translateMatrix;
private final GlyphList glyphList;
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
/**
* Constructor.
*/
LegacyPDFStreamEngine() throws IOException {
addOperator(new BeginText());
addOperator(new Concatenate());
addOperator(new DrawObject()); // special text version
addOperator(new EndText());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new NextLine());
addOperator(new SetCharSpacing());
addOperator(new MoveText());
addOperator(new MoveTextSetLeading());
addOperator(new SetFontAndSize());
addOperator(new ShowText());
addOperator(new ShowTextAdjusted());
addOperator(new SetTextLeading());
addOperator(new SetMatrix());
addOperator(new SetTextRenderingMode());
addOperator(new SetTextRise());
addOperator(new SetWordSpacing());
addOperator(new SetTextHorizontalScaling());
addOperator(new ShowTextLine());
addOperator(new ShowTextLineAndSpace());
// load additional glyph list for Unicode mapping
String path = "/org/apache/pdfbox/resources/glyphlist/additional.txt";
InputStream input = GlyphList.class.getResourceAsStream(path);
glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
}
/**
* This will initialize and process the contents of the stream.
*
* @param page the page to process
* @throws java.io.IOException if there is an error accessing the stream.
*/
@Override
public void processPage(PDPage page) throws IOException {
this.pageRotation = page.getRotation();
this.pageSize = page.getCropBox();
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
translateMatrix = null;
} else {
// translation matrix for cropbox
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
}
super.processPage(page);
}
/**
* Called when a glyph is to be processed. The heuristic calculations here were originally
* written by Ben Litchfield for PDFStreamEngine.
*/
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException {
//
// legacy calculations which were previously in PDFStreamEngine
//
// DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
// THIS CODE IS DELIBERATELY INCORRECT
//
PDGraphicsState state = getGraphicsState();
Matrix ctm = state.getCurrentTransformationMatrix();
float fontSize = state.getTextState().getFontSize();
float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
Matrix textMatrix = getTextMatrix();
float displacementX = displacement.getX();
// the sorting algorithm is based on the width of the character. As the displacement
// for vertical characters doesn't provide any suitable value for it, we have to
// calculate our own
if (font.isVertical()) {
displacementX = font.getWidth(code) / 1000;
// there may be an additional scaling factor for true type fonts
TrueTypeFont ttf = null;
if (font instanceof PDTrueTypeFont) {
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
} else if (font instanceof PDType0Font) {
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
if (cidFont instanceof PDCIDFontType2) {
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
}
}
if (ttf != null && ttf.getUnitsPerEm() != 1000) {
displacementX *= 1000f / ttf.getUnitsPerEm();
}
}
//
// legacy calculations which were previously in PDFStreamEngine
//
// DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
// THIS CODE IS DELIBERATELY INCORRECT
//
// (modified) combined displacement, this is calculated *without* taking the character
// spacing and word spacing into account, due to legacy code in TextStripper
float tx = displacementX * fontSize * horizontalScaling;
float ty = displacement.getY() * fontSize;
// (modified) combined displacement matrix
Matrix td = Matrix.getTranslateInstance(tx, ty);
// (modified) text rendering matrix
Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
float nextX = nextTextRenderingMatrix.getTranslateX();
float nextY = nextTextRenderingMatrix.getTranslateY();
// (modified) width and height calculations
float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
Float fontHeight = fontHeightMap.get(font.getCOSObject());
if (fontHeight == null) {
fontHeight = computeFontHeight(font);
fontHeightMap.put(font.getCOSObject(), fontHeight);
}
float dyDisplay = fontHeight * textRenderingMatrix.getScalingFactorY();
//
// start of the original method
//
// Note on variable names. There are three different units being used in this code.
// Character sizes are given in glyph units, text locations are initially given in text
// units, and we want to save the data in display units. The variable names should end with
// Text or Disp to represent if the values are in text or disp units (no glyph units are
// saved).
float glyphSpaceToTextSpaceFactor = 1 / 1000f;
if (font instanceof PDType3Font) {
glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
}
float spaceWidthText = 0;
try {
// to avoid crash as described in PDFBOX-614, see what the space displacement should be
spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
} catch (Throwable exception) {
LOG.warn(exception, exception);
}
if (spaceWidthText == 0) {
spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
// the average space width appears to be higher than necessary so make it smaller
spaceWidthText *= .80f;
}
if (spaceWidthText == 0) {
spaceWidthText = 1.0f; // if could not find font, use a generic value
}
// the space width has to be transformed into display units
float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX();
// use our additional glyph list for Unicode mapping
String unicodeMapping = font.toUnicode(code, glyphList);
// when there is no Unicode mapping available, Acrobat simply coerces the character code
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
// this, which is why we leave it until this point in PDFTextStreamEngine.
if (unicodeMapping == null) {
if (font instanceof PDSimpleFont) {
char c = (char) code;
unicodeMapping = new String(new char[]{c});
} else {
// Acrobat doesn't seem to coerce composite font's character codes, instead it
// skips them. See the "allah2.pdf" TestTextStripper file.
return;
}
}
// adjust for cropbox if needed
Matrix translatedTextRenderingMatrix;
if (translateMatrix == null) {
translatedTextRenderingMatrix = textRenderingMatrix;
} else {
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
nextX -= pageSize.getLowerLeftX();
nextY -= pageSize.getLowerLeftY();
}
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
if (unicodeMapping.length() == 2) {
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(0)),
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(1)),
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
} else {
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
unicodeMapping,
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
}
}
/**
* Compute the font height. Override this if you want to use own calculations.
*
* @param font the font.
* @return the font height.
* @throws IOException if there is an error while getting the font bounding box.
*/
protected float computeFontHeight(PDFont font) throws IOException {
BoundingBox bbox = font.getBoundingBox();
if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
// PDFBOX-2158 and PDFBOX-3130
// files by Salmat eSolutions / ClibPDF Library
bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
}
// 1/2 the bbox is used as the height todo: why?
float glyphHeight = bbox.getHeight() / 2;
// sometimes the bbox has very high values, but CapHeight is OK
PDFontDescriptor fontDescriptor = font.getFontDescriptor();
if (fontDescriptor != null) {
float capHeight = fontDescriptor.getCapHeight();
if (Float.compare(capHeight, 0) != 0 && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
glyphHeight = capHeight;
}
// PDFBOX-3464, PDFBOX-4480, PDFBOX-4553:
// sometimes even CapHeight has very high value, but Ascent and Descent are ok
float ascent = fontDescriptor.getAscent();
float descent = fontDescriptor.getDescent();
if (capHeight > ascent && ascent > 0 && descent < 0 && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
glyphHeight = (ascent - descent) / 2;
}
}
// transformPoint from glyph space -> text space
float height;
if (font instanceof PDType3Font) {
height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
} else {
height = glyphHeight / 1000;
}
return height;
}
/**
* A method provided as an event interface to allow a subclass to perform some specific
* functionality when text needs to be processed.
*
* @param text The text to be processed.
*/
protected void processTextPosition(TextPosition text) {
// subclasses can override to provide specific functionality
}
}

View File

@ -1,335 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.TextPosition;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.Ruling;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class PDFLinesTextStripper extends PDFTextStripper {
@Getter
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
@Getter
private final List<Ruling> rulings = new ArrayList<>();
private final List<Ruling> graphicsPath = new ArrayList<>();
@Setter
protected PDPage pdpage;
@Getter
private int minCharWidth;
@Getter
private int maxCharWidth;
@Getter
private int minCharHeight;
@Getter
private int maxCharHeight;
private float path_x;
private float path_y;
@Setter
private int pageNumber;
public PDFLinesTextStripper() throws IOException {
super();
this.addOperator(new SetStrokingColorSpace());
this.addOperator(new SetNonStrokingColorSpace());
this.addOperator(new SetLineDashPattern());
this.addOperator(new SetStrokingDeviceGrayColor());
this.addOperator(new SetNonStrokingDeviceGrayColor());
this.addOperator(new SetFlatness());
this.addOperator(new SetLineJoinStyle());
this.addOperator(new SetLineCapStyle());
this.addOperator(new SetStrokingDeviceCMYKColor());
this.addOperator(new SetNonStrokingDeviceCMYKColor());
this.addOperator(new SetLineMiterLimit());
this.addOperator(new SetStrokingDeviceRGBColor());
this.addOperator(new SetNonStrokingDeviceRGBColor());
this.addOperator(new SetRenderingIntent());
this.addOperator(new SetStrokingColor());
this.addOperator(new SetNonStrokingColor());
this.addOperator(new SetStrokingColorN());
this.addOperator(new SetNonStrokingColorN());
this.addOperator(new SetFontAndSize());
this.addOperator(new SetLineWidth());
}
@Override
protected void processOperator(Operator operator, List<COSBase> arguments) throws IOException {
String operation = operator.getName();
//move
switch (operation) {
case OperatorName.MOVE_TO:
if (arguments.size() == 2) {
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
path_x = (float) pos.getX();
path_y = (float) pos.getY();
}
break;
//line
case OperatorName.LINE_TO:
if (arguments.size() == 2) {
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
// The direction of vertical lines must always be from bottom to top for the table extraction algorithm.
if (pos.getY() > path_y) {
graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y)));
}
path_x = (float) pos.getX();
path_y = (float) pos.getY();
}
break;
//rectangle
case OperatorName.APPEND_RECT:
if (arguments.size() == 4) {
float x = floatValue(arguments.get(0));
float y = floatValue(arguments.get(1));
float width = floatValue(arguments.get(2));
float height = floatValue(arguments.get(3));
Point2D p1 = transformPosition(x, y);
Point2D p2 = transformPosition(x + width, y + height);
// Horizontal lines
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
// Vertical lines, direction must always be from bottom to top for the table extraction algorithm.
if (p2.getY() > p1.getY()) {
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
}
if (p2.getY() > p1.getY()) {
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY())));
}
}
break;
//fill
case OperatorName.FILL_NON_ZERO:
case OperatorName.LEGACY_FILL_NON_ZERO:
case OperatorName.FILL_EVEN_ODD:
addVisibleRulings(graphicsPath, false);
graphicsPath.clear();
break;
//stroke
case OperatorName.STROKE_PATH:
addVisibleRulings(graphicsPath, true);
graphicsPath.clear();
break;
//cancel path
case OperatorName.ENDPATH:
graphicsPath.clear();
break;
}
super.processOperator(operator, arguments);
}
private float floatValue(COSBase value) {
if (value instanceof COSNumber) {
return ((COSNumber) value).floatValue();
} else {
return 0;
}
}
private Point2D.Float transformPosition(float x, float y) {
return super.transformedPoint(x, y);
}
private void addVisibleRulings(List<Ruling> path, boolean stroke) throws IOException {
try {
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor()
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
rulings.addAll(path);
}
} catch (UnsupportedOperationException e) {
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor()
.getColorSpace()
.getName() + " does not support toRGB");
}
}
@Override
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
int startIndex = 0;
RedTextPosition previous = null;
textPositions.sort(Comparator.comparing(TextPosition::getXDirAdj));
for (int i = 0; i <= textPositions.size() - 1; i++) {
if (!textPositionSequences.isEmpty()) {
previous = textPositionSequences.get(textPositionSequences.size() - 1)
.getTextPositions()
.get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1);
}
int charWidth = (int) textPositions.get(i).getWidthDirAdj();
if (charWidth < minCharWidth) {
minCharWidth = charWidth;
}
if (charWidth > maxCharWidth) {
maxCharWidth = charWidth;
}
int charHeight = (int) textPositions.get(i).getHeightDir();
if (charHeight < minCharHeight) {
minCharHeight = charHeight;
}
if (charWidth > maxCharHeight) {
maxCharHeight = charHeight;
}
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
startIndex++;
continue;
}
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i)
.getUnicode()
.equals("\t")) && i <= textPositions.size() - 2) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
// Remove false sequence ends (whitespaces)
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
for (TextPosition textPosition : sublist) {
textPositionSequences.get(textPositionSequences.size() - 1).add(textPosition);
}
} else {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
}
startIndex = i + 1;
}
}
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1)
.getUnicode()
.equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
sublist = sublist.subList(0, sublist.size() - 1);
}
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
for (TextPosition t : sublist) {
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
}
} else {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
}
super.writeString(text);
}
@Override
public String getText(PDDocument doc) throws IOException {
minCharWidth = Integer.MAX_VALUE;
maxCharWidth = 0;
minCharHeight = Integer.MAX_VALUE;
maxCharHeight = 0;
textPositionSequences.clear();
rulings.clear();
graphicsPath.clear();
path_x = 0.0f;
path_y = 0.0f;
return super.getText(doc);
}
}

View File

@ -1,25 +0,0 @@
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
import static java.util.stream.Collectors.toSet;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.FloatFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.Orientation;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.Ruling;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.RulingTextDirAdjustUtil;
public interface BlockificationService {
ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines);
}

Some files were not shown because too many files have changed in this diff Show More