Test gradle
This commit is contained in:
parent
d9f7b3f516
commit
af9b581b4f
43
.gitignore
vendored
43
.gitignore
vendored
@ -9,6 +9,49 @@
|
||||
**/tmp/
|
||||
**/.apt_generated/
|
||||
|
||||
HELP.md
|
||||
target/
|
||||
!.mvn/wrapper/maven-wrapper.jar
|
||||
!**/src/main/**/target/
|
||||
!**/src/test/**/target/
|
||||
|
||||
### maven build ###
|
||||
*.class
|
||||
/out/
|
||||
/build/
|
||||
/target/
|
||||
**/out/
|
||||
**/build/
|
||||
**/target/
|
||||
|
||||
### STS ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
.gradle
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
build/
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
||||
|
||||
.factorypath
|
||||
.springBeans
|
||||
|
||||
@ -3,4 +3,4 @@ variables:
|
||||
include:
|
||||
- project: 'gitlab/gitlab'
|
||||
ref: 'main'
|
||||
file: 'ci-templates/maven_java.yml'
|
||||
file: 'ci-templates/gradle_java.yml'
|
||||
|
||||
7
buildSrc/build.gradle.kts
Normal file
7
buildSrc/build.gradle.kts
Normal file
@ -0,0 +1,7 @@
|
||||
plugins {
|
||||
`kotlin-dsl`
|
||||
}
|
||||
|
||||
repositories {
|
||||
gradlePluginPortal()
|
||||
}
|
||||
@ -0,0 +1,75 @@
|
||||
plugins {
|
||||
`java-library`
|
||||
`maven-publish`
|
||||
pmd
|
||||
checkstyle
|
||||
jacoco
|
||||
}
|
||||
|
||||
group = "com.iqser.red"
|
||||
|
||||
java.sourceCompatibility = JavaVersion.VERSION_17
|
||||
java.targetCompatibility = JavaVersion.VERSION_17
|
||||
|
||||
tasks.pmdMain {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/pmd.xml")
|
||||
}
|
||||
|
||||
tasks.pmdTest {
|
||||
pmd.ruleSetFiles = files("${rootDir}/config/pmd/test_pmd.xml")
|
||||
}
|
||||
|
||||
tasks.named<Test>("test") {
|
||||
useJUnitPlatform()
|
||||
reports {
|
||||
junitXml.outputLocation.set(layout.buildDirectory.dir("reports/junit"))
|
||||
}
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
finalizedBy(tasks.jacocoTestReport) // report is always generated after tests run
|
||||
}
|
||||
|
||||
tasks.jacocoTestReport {
|
||||
dependsOn(tasks.test) // tests are required to run before generating the report
|
||||
reports {
|
||||
xml.required.set(true)
|
||||
csv.required.set(false)
|
||||
html.outputLocation.set(layout.buildDirectory.dir("jacocoHtml"))
|
||||
}
|
||||
}
|
||||
|
||||
allprojects {
|
||||
publishing {
|
||||
publications {
|
||||
create<MavenPublication>(name) {
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
repositories {
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/red-platform-releases/")
|
||||
credentials {
|
||||
username = providers.gradleProperty("mavenUser").getOrNull();
|
||||
password = providers.gradleProperty("mavenPassword").getOrNull();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
java {
|
||||
withJavadocJar()
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
maven {
|
||||
url = uri("https://nexus.knecon.com/repository/gindev/");
|
||||
credentials {
|
||||
username = providers.gradleProperty("mavenUser").getOrNull();
|
||||
password = providers.gradleProperty("mavenPassword").getOrNull();
|
||||
}
|
||||
}
|
||||
}
|
||||
39
config/checkstyle/checkstyle.xml
Normal file
39
config/checkstyle/checkstyle.xml
Normal file
@ -0,0 +1,39 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN"
|
||||
"http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
|
||||
<module name="Checker">
|
||||
<property
|
||||
name="severity"
|
||||
value="error"/>
|
||||
<module name="TreeWalker">
|
||||
<module name="SuppressWarningsHolder"/>
|
||||
<module name="MissingDeprecated"/>
|
||||
<module name="MissingOverride"/>
|
||||
<module name="AnnotationLocation"/>
|
||||
<module name="JavadocStyle"/>
|
||||
<module name="NonEmptyAtclauseDescription"/>
|
||||
<module name="IllegalImport"/>
|
||||
<module name="RedundantImport"/>
|
||||
<module name="RedundantModifier"/>
|
||||
<module name="EmptyBlock"/>
|
||||
<module name="DefaultComesLast"/>
|
||||
<module name="EmptyStatement"/>
|
||||
<module name="EqualsHashCode"/>
|
||||
<module name="ExplicitInitialization"/>
|
||||
<module name="IllegalInstantiation"/>
|
||||
<module name="ModifiedControlVariable"/>
|
||||
<module name="MultipleVariableDeclarations"/>
|
||||
<module name="PackageDeclaration"/>
|
||||
<module name="ParameterAssignment"/>
|
||||
<module name="SimplifyBooleanExpression"/>
|
||||
<module name="SimplifyBooleanReturn"/>
|
||||
<module name="StringLiteralEquality"/>
|
||||
<module name="OneStatementPerLine"/>
|
||||
<module name="FinalClass"/>
|
||||
<module name="ArrayTypeStyle"/>
|
||||
<module name="UpperEll"/>
|
||||
<module name="OuterTypeFilename"/>
|
||||
</module>
|
||||
<module name="FileTabCharacter"/>
|
||||
<module name="SuppressWarningsFilter"/>
|
||||
</module>
|
||||
21
config/pmd/pmd.xml
Normal file
21
config/pmd/pmd.xml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
<rule ref="category/java/errorprone.xml">
|
||||
<exclude name="DataflowAnomalyAnalysis"/>
|
||||
<exclude name="MissingSerialVersionUID"/>
|
||||
<exclude name="NullAssignment"/>
|
||||
<exclude name="AvoidLiteralsInIfCondition"/>
|
||||
<exclude name="AvoidDuplicateLiterals"/>
|
||||
<exclude name="AvoidFieldNameMatchingMethodName"/>
|
||||
<exclude name="AssignmentInOperand"/>
|
||||
</rule>
|
||||
|
||||
</ruleset>
|
||||
24
config/pmd/test_pmd.xml
Normal file
24
config/pmd/test_pmd.xml
Normal file
@ -0,0 +1,24 @@
|
||||
<?xml version="1.0"?>
|
||||
<ruleset name="Custom ruleset"
|
||||
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
|
||||
|
||||
<description>
|
||||
Knecon test ruleset checks the code for bad stuff
|
||||
</description>
|
||||
|
||||
|
||||
<rule ref="category/java/errorprone.xml">
|
||||
<exclude name="DataflowAnomalyAnalysis"/>
|
||||
<exclude name="MissingSerialVersionUID"/>
|
||||
<exclude name="NullAssignment"/>
|
||||
<exclude name="AvoidLiteralsInIfCondition"/>
|
||||
<exclude name="AvoidDuplicateLiterals"/>
|
||||
<exclude name="AvoidFieldNameMatchingMethodName"/>
|
||||
<exclude name="AvoidFieldNameMatchingTypeName"/>
|
||||
<exclude name="AssignmentInOperand"/>
|
||||
<exclude name="TestClassWithoutTestCases"/>
|
||||
</rule>
|
||||
|
||||
</ruleset>
|
||||
1
gradle.properties.kts
Normal file
1
gradle.properties.kts
Normal file
@ -0,0 +1 @@
|
||||
version = 4.0-SNAPSHOT
|
||||
20
pom.xml
20
pom.xml
@ -1,20 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>4.0-SNAPSHOT</version>
|
||||
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>redaction-service-v1</module>
|
||||
<module>redaction-service-image-v1</module>
|
||||
</modules>
|
||||
|
||||
</project>
|
||||
@ -1,98 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>com.knecon.fforesight</groupId>
|
||||
<artifactId>platform-docker-dependency</artifactId>
|
||||
<version>0.1.0</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service-image-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>4.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
|
||||
<properties>
|
||||
<service.server>redaction-service-server-v1</service.server>
|
||||
<platform.jar>${service.server}.jar</platform.jar>
|
||||
<docker.skip.push>false</docker.skip.push>
|
||||
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-platform-jar</id>
|
||||
<phase>prepare-package</phase>
|
||||
<goals>
|
||||
<goal>copy</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<artifactItems>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>${service.server}</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>jar</type>
|
||||
<overWrite>true</overWrite>
|
||||
<destFileName>${platform.jar}</destFileName>
|
||||
</dependency>
|
||||
</artifactItems>
|
||||
<outputDirectory>${docker.build.directory}</outputDirectory>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<images>
|
||||
<image>
|
||||
<name>${docker.image.name}</name>
|
||||
<build>
|
||||
<dockerFileDir>${docker.build.directory}</dockerFileDir>
|
||||
<args>
|
||||
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
|
||||
</args>
|
||||
<tags>
|
||||
<tag>${docker.image.version}</tag>
|
||||
<tag>latest</tag>
|
||||
</tags>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
</project>
|
||||
@ -1,9 +0,0 @@
|
||||
FROM red/redaction-service-base-v1:2.0.0
|
||||
|
||||
ARG PLATFORM_JAR
|
||||
|
||||
ENV PLATFORM_JAR ${PLATFORM_JAR}
|
||||
|
||||
ENV USES_ELASTICSEARCH false
|
||||
|
||||
COPY ["${PLATFORM_JAR}", "/"]
|
||||
@ -1,114 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>platform-dependency</artifactId>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<version>2.2.0</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>4.0-SNAPSHOT</version>
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>redaction-service-api-v1</module>
|
||||
<module>redaction-service-server-v1</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
<pdfbox.version>2.0.24</pdfbox.version>
|
||||
<lombok.version>1.18.26</lombok.version>
|
||||
</properties>
|
||||
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-commons-dependency</artifactId>
|
||||
<version>2.5.0</version>
|
||||
<scope>import</scope>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<build>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.sonarsource.scanner.maven</groupId>
|
||||
<artifactId>sonar-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.owasp</groupId>
|
||||
<artifactId>dependency-check-maven</artifactId>
|
||||
<configuration>
|
||||
<format>ALL</format>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.jacoco</groupId>
|
||||
<artifactId>jacoco-maven-plugin</artifactId>
|
||||
<version>0.8.8</version>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>org/drools/**/*</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>prepare-agent</id>
|
||||
<goals>
|
||||
<goal>prepare-agent</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>report</id>
|
||||
<goals>
|
||||
<goal>report</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok-maven-plugin</artifactId>
|
||||
<version>1.18.20.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>delombok</id>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>delombok</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<addOutputDirectory>false</addOutputDirectory>
|
||||
<sourceDirectory>src/main/java</sourceDirectory>
|
||||
<outputDirectory>${delomboked.sources}</outputDirectory>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@ -0,0 +1,14 @@
|
||||
|
||||
plugins {
|
||||
id("com.iqser.red.service.java-conventions")
|
||||
id("io.freefair.lombok") version "8.1.0"
|
||||
}
|
||||
|
||||
description = "redaction-service-api-v1"
|
||||
|
||||
dependencies {
|
||||
implementation("org.springframework:spring-web:6.0.6")
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:RED-6725")
|
||||
}
|
||||
|
||||
|
||||
@ -1,61 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<persistence-service.version>2.93.0</persistence-service.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-web</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>persistence-service-internal-api-v1</artifactId>
|
||||
<version>${persistence-service.version}</version>
|
||||
<exclusions>
|
||||
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>persistence-service-api-v1</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<annotationProcessorPaths>
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@ -0,0 +1,90 @@
|
||||
import org.springframework.boot.gradle.tasks.bundling.BootBuildImage
|
||||
|
||||
plugins {
|
||||
application
|
||||
id("com.iqser.red.service.java-conventions")
|
||||
id("org.springframework.boot") version "3.0.6"
|
||||
id("io.spring.dependency-management") version "1.1.0"
|
||||
id("org.sonarqube") version "4.2.1.3168"
|
||||
id("io.freefair.lombok") version "8.1.0"
|
||||
}
|
||||
|
||||
description = "redaction-service-server-v1"
|
||||
|
||||
|
||||
val layoutParserVersion = "0.18.0"
|
||||
val jacksonVersion = "2.14.2"
|
||||
val droolsVersion = "8.37.0.Final"
|
||||
val pdfBoxVersion = "3.0.0-alpha2"
|
||||
|
||||
configurations {
|
||||
all {
|
||||
exclude(group = "org.springframework.boot", module = "spring-boot-starter-logging")
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
|
||||
implementation(project(":redaction-service-api-v1")) { exclude(group = "com.iqser.red.service", module = "persistence-service-internal-api-v1") }
|
||||
implementation("com.iqser.red.service:persistence-service-internal-api-v1:2.119.0") { exclude(group = "org.springframework.boot") }
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:${layoutParserVersion}")
|
||||
|
||||
implementation("com.iqser.red.commons:spring-commons:2.1.0")
|
||||
implementation("com.iqser.red.commons:metric-commons:2.1.0")
|
||||
|
||||
implementation("com.iqser.red.commons:dictionary-merge-commons:1.3.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.1.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.10.0")
|
||||
|
||||
implementation("com.fasterxml.jackson.module:jackson-module-afterburner:${jacksonVersion}")
|
||||
implementation("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${jacksonVersion}")
|
||||
implementation("org.ahocorasick:ahocorasick:0.6.3")
|
||||
implementation("org.javassist:javassist:3.29.2-GA")
|
||||
|
||||
implementation("org.drools:drools-engine:${droolsVersion}")
|
||||
implementation("org.drools:drools-mvel:${droolsVersion}")
|
||||
implementation("org.kie:kie-spring:7.73.0.Final")
|
||||
|
||||
implementation("org.locationtech.jts:jts-core:1.19.0")
|
||||
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.1")
|
||||
implementation("org.springframework.boot:spring-boot-starter-amqp:3.0.4")
|
||||
|
||||
testImplementation("org.apache.pdfbox:pdfbox:${pdfBoxVersion}")
|
||||
testImplementation("org.apache.pdfbox:pdfbox-tools:${pdfBoxVersion}")
|
||||
|
||||
testImplementation("org.springframework.boot:spring-boot-starter-test:3.0.4")
|
||||
testImplementation("com.knecon.fforesight:layoutparser-service-processor:${layoutParserVersion}") {
|
||||
exclude(
|
||||
group = "com.iqser.red.service",
|
||||
module = "persistence-service-shared-api-v1"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
configure<JacocoTaskExtension> {
|
||||
excludes = listOf("org/drools/**/*")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
tasks.named<BootBuildImage>("bootBuildImage") {
|
||||
imageName.set("nexus.knecon.com:5001/red/${project.name}:${project.version}")
|
||||
if (project.hasProperty("buildbootDockerHostNetwork")) {
|
||||
network.set("host")
|
||||
}
|
||||
docker {
|
||||
if (project.hasProperty("buildbootDockerHostNetwork")) {
|
||||
bindHostToBuilder.set(true)
|
||||
}
|
||||
verboseLogging.set(true)
|
||||
|
||||
publishRegistry {
|
||||
username.set(providers.gradleProperty("mavenUser").getOrNull())
|
||||
password.set(providers.gradleProperty("mavenPassword").getOrNull())
|
||||
email.set(providers.gradleProperty("mavenEmail").getOrNull())
|
||||
url.set("https://nexus.knecon.com:5001/")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,217 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>redaction-service-server-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<drools.version>8.37.0.Final</drools.version>
|
||||
<kie.version>7.73.0.Final</kie.version>
|
||||
<locationtech.version>1.19.0</locationtech.version>
|
||||
<javaassist.version>3.29.2-GA</javaassist.version>
|
||||
<ahocorasick.version>0.6.3</ahocorasick.version>
|
||||
<jackson.version>2.14.2</jackson.version>
|
||||
<tennat-commons.version>0.10.0</tennat-commons.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.knecon.fforesight</groupId>
|
||||
<artifactId>tenant-commons</artifactId>
|
||||
<version>${tennat-commons.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-aop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>dictionary-merge-commons</artifactId>
|
||||
<version>1.3.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>storage-commons</artifactId>
|
||||
<version>2.1.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-afterburner</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-jsr310</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.ahocorasick</groupId>
|
||||
<artifactId>ahocorasick</artifactId>
|
||||
<version>${ahocorasick.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.javassist</groupId>
|
||||
<artifactId>javassist</artifactId>
|
||||
<version>${javaassist.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
<artifactId>drools-engine</artifactId>
|
||||
<version>${drools.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
<artifactId>drools-mvel</artifactId>
|
||||
<version>${drools.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.kie</groupId>
|
||||
<artifactId>kie-spring</artifactId>
|
||||
<version>${kie.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.locationtech.jts</groupId>
|
||||
<artifactId>jts-core</artifactId>
|
||||
<version>${locationtech.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- commons -->
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>spring-commons</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>logging-commons</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>metric-commons</artifactId>
|
||||
</dependency>
|
||||
<!-- other external -->
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
</dependency>
|
||||
<!-- spring -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-amqp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- test dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>test-commons</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<annotationProcessorPaths>
|
||||
<path>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>${lombok.version}</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<!-- generate git.properties for exposure in /info -->
|
||||
<groupId>pl.project13.maven</groupId>
|
||||
<artifactId>git-commit-id-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>revision</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<generateGitPropertiesFile>true</generateGitPropertiesFile>
|
||||
<gitDescribe>
|
||||
<tags>true</tags>
|
||||
</gitDescribe>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>original-jar</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<classifier>original</classifier>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<!-- repackages the generated jar into a runnable fat-jar and makes it
|
||||
executable -->
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<executable>true</executable>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.data;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DocumentData {
|
||||
|
||||
DocumentPage[] documentPages;
|
||||
DocumentTextData[] documentTextData;
|
||||
DocumentPositionData[] documentPositionData;
|
||||
DocumentStructure documentStructure;
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper;
|
||||
package com.iqser.red.service.redaction.v1.server.document.data.mapper;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
@ -7,26 +7,26 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentTreeData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.PageData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.data.DocumentData;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -39,23 +39,23 @@ public class DocumentGraphMapper {
|
||||
DocumentTree documentTree = new DocumentTree(document);
|
||||
Context context = new Context(documentData, documentTree);
|
||||
|
||||
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentTreeData().getRoot().getChildren(), context));
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
|
||||
document.setDocumentTree(context.documentTree);
|
||||
document.setPages(new HashSet<>(context.pages));
|
||||
document.setNumberOfPages(documentData.getPages().length);
|
||||
document.setPages(new HashSet<>(context.pageData));
|
||||
document.setNumberOfPages(documentData.getDocumentPages().length);
|
||||
|
||||
document.setTextBlock(document.getTextBlock());
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentTreeData.EntryData> entries, Context context) {
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
|
||||
|
||||
List<DocumentTree.Entry> newEntries = new LinkedList<>();
|
||||
for (DocumentTreeData.EntryData entryData : entries) {
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
|
||||
|
||||
@ -154,14 +154,14 @@ public class DocumentGraphMapper {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.atomicPositionBlockData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
}
|
||||
|
||||
|
||||
private Page buildPage(PageData p) {
|
||||
private Page buildPage(DocumentPage p) {
|
||||
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
||||
}
|
||||
@ -169,7 +169,7 @@ public class DocumentGraphMapper {
|
||||
|
||||
private Page getPage(Long pageIndex, Context context) {
|
||||
|
||||
return context.pages.stream()
|
||||
return context.pageData.stream()
|
||||
.filter(page -> page.getNumber() == Math.toIntExact(pageIndex))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSuchElementException(String.format("ClassificationPage with number %d not found", pageIndex)));
|
||||
@ -179,17 +179,17 @@ public class DocumentGraphMapper {
|
||||
static final class Context {
|
||||
|
||||
private final DocumentTree documentTree;
|
||||
private final List<Page> pages;
|
||||
private final List<AtomicTextBlockData> atomicTextBlockData;
|
||||
private final List<AtomicPositionBlockData> atomicPositionBlockData;
|
||||
private final List<Page> pageData;
|
||||
private final List<DocumentTextData> documentTextData;
|
||||
private final List<DocumentPositionData> documentPositionData;
|
||||
|
||||
|
||||
Context(DocumentData documentData, DocumentTree documentTree) {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pages = new LinkedList<>();
|
||||
this.atomicTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
|
||||
this.atomicPositionBlockData = Arrays.stream(documentData.getAtomicPositionBlocks()).toList();
|
||||
this.pageData = new LinkedList<>();
|
||||
this.documentTextData = Arrays.stream(documentData.getDocumentTextData()).toList();
|
||||
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData()).toList();
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,52 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.data.mapper;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class PropertiesMapper {
|
||||
|
||||
public void parseImageProperties(Map<String, String> properties, Image.ImageBuilder builder) {
|
||||
|
||||
builder.imageType(ImageType.fromString(properties.get(DocumentStructure.ImageProperties.IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructure.ImageProperties.TRANSPARENT)));
|
||||
builder.position(parseRectangle2D(properties.get(DocumentStructure.ImageProperties.POSITION)));
|
||||
builder.id(properties.get(DocumentStructure.ImageProperties.ID));
|
||||
}
|
||||
|
||||
|
||||
public void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder builder) {
|
||||
|
||||
builder.row(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(DocumentStructure.TableCellProperties.HEADER)));
|
||||
builder.bBox(parseRectangle2D(properties.get(DocumentStructure.TableCellProperties.B_BOX)));
|
||||
}
|
||||
|
||||
|
||||
public void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
|
||||
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_COLS)));
|
||||
}
|
||||
|
||||
|
||||
private Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER)).map(Float::parseFloat).toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -6,7 +6,7 @@ import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Setter;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -7,14 +7,14 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.GenericSemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.GenericSemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
public enum EntityType {
|
||||
ENTITY,
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
@ -11,9 +11,9 @@ import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -1,9 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.entity;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collections;
|
||||
@ -10,10 +10,10 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,12 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -0,0 +1,5 @@
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
public interface GenericSemanticNode extends SemanticNode {
|
||||
|
||||
}
|
||||
@ -1,12 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,13 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collections;
|
||||
@ -9,12 +9,12 @@ import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRuleHolder;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.MatchedRuleHolder;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
public enum ImageType {
|
||||
LOGO,
|
||||
@ -10,7 +12,7 @@ public enum ImageType {
|
||||
|
||||
public static ImageType fromString(String imageType) {
|
||||
|
||||
return switch (imageType.toLowerCase()) {
|
||||
return switch (imageType.toLowerCase(Locale.ROOT)) {
|
||||
case "logo" -> ImageType.LOGO;
|
||||
case "formula" -> ImageType.FORMULA;
|
||||
case "signature" -> ImageType.SIGNATURE;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -48,18 +47,6 @@ public class Page {
|
||||
Set<Image> images = new HashSet<>();
|
||||
|
||||
|
||||
public static Page fromClassificationPage(ClassificationPage classificationPage) {
|
||||
|
||||
return Page.builder()
|
||||
.height((int) classificationPage.getPageHeight())
|
||||
.width((int) classificationPage.getPageWidth())
|
||||
.number(classificationPage.getPageNumber())
|
||||
.rotation(classificationPage.getRotation())
|
||||
.mainBody(new LinkedList<>())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return mainBody.stream().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
||||
@ -1,12 +1,12 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,13 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -7,18 +7,19 @@ import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility;
|
||||
|
||||
public interface SemanticNode {
|
||||
|
||||
@ -371,7 +372,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default boolean containsStringIgnoreCase(String string) {
|
||||
|
||||
return getTextBlock().getSearchText().toLowerCase().contains(string.toLowerCase());
|
||||
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
|
||||
@ -1,18 +1,19 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -66,7 +67,7 @@ public class Table implements SemanticNode {
|
||||
*/
|
||||
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
|
||||
|
||||
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase();
|
||||
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
|
||||
return strings.stream().map(String::toLowerCase).allMatch(rowText::contains);
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashMap;
|
||||
@ -7,10 +7,10 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlockCollector;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -12,13 +12,12 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicPositionBlockData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.AtomicTextBlockData;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.factory.SearchTextWithTextPositionDto;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -58,28 +57,6 @@ public class AtomicTextBlock implements TextBlock {
|
||||
return lineBreaks.size() + 1;
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock fromSearchTextWithTextPositionDto(SearchTextWithTextPositionDto searchTextWithTextPositionDto,
|
||||
SemanticNode parent,
|
||||
int stringOffset,
|
||||
Long textBlockIdx,
|
||||
Integer numberOnPage,
|
||||
Page page) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
.id(textBlockIdx)
|
||||
.parent(parent)
|
||||
.searchText(searchTextWithTextPositionDto.getSearchText())
|
||||
.numberOnPage(numberOnPage)
|
||||
.page(page)
|
||||
.lineBreaks(searchTextWithTextPositionDto.getLineBreaks())
|
||||
.positions(searchTextWithTextPositionDto.getPositions())
|
||||
.stringIdxToPositionIdx(searchTextWithTextPositionDto.getStringCoordsToPositionCoords())
|
||||
.boundary(new Boundary(stringOffset, stringOffset + searchTextWithTextPositionDto.getSearchText().length()))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock empty(Long textBlockIdx, int stringOffset, Page page, int numberOnPage, SemanticNode parent) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
@ -96,8 +73,8 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(AtomicTextBlockData atomicTextBlockData,
|
||||
AtomicPositionBlockData atomicPositionBlockData,
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData,
|
||||
DocumentPositionData atomicPositionBlockData,
|
||||
SemanticNode parent,
|
||||
Page page) {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -10,8 +10,8 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.Data;
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
@ -10,9 +10,9 @@ import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
|
||||
public interface TextBlock extends CharSequence {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock;
|
||||
package com.iqser.red.service.redaction.v1.server.document.graph.textblock;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.function.BiConsumer;
|
||||
@ -1,8 +1,6 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
|
||||
package com.iqser.red.service.redaction.v1.server.document.services;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedEndByRegex;
|
||||
import static com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility.getExpandedStartByRegex;
|
||||
import static com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility.anyMatch;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
|
||||
import static java.util.stream.Collectors.toMap;
|
||||
|
||||
@ -22,20 +20,20 @@ import org.kie.api.runtime.KieSession;
|
||||
|
||||
import com.google.common.base.Functions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.ConsecutiveBoundaryCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RedactionSearchUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.DocumentTree;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.TableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RedactionSearchUtility;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.ConsecutiveBoundaryCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
|
||||
@ -450,14 +448,14 @@ public class EntityCreationService {
|
||||
|
||||
public Optional<RedactionEntity> byPrefixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart = getExpandedStartByRegex(entity, regexPattern);
|
||||
int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern);
|
||||
return byBoundary(new Boundary(expandedStart, entity.getBoundary().end()), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
}
|
||||
|
||||
|
||||
public Optional<RedactionEntity> bySuffixExpansionRegex(RedactionEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd = getExpandedEndByRegex(entity, regexPattern);
|
||||
int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern);
|
||||
expandedEnd = truncateEndIfLineBreakIsBetween(entity.getBoundary().end(), expandedEnd, entity.getDeepestFullyContainingNode().getTextBlock());
|
||||
return byBoundary(new Boundary(entity.getBoundary().start(), expandedEnd), entity.getType(), entity.getEntityType(), entity.getDeepestFullyContainingNode());
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
|
||||
package com.iqser.red.service.redaction.v1.server.document.services;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
@ -6,8 +6,8 @@ import java.util.Objects;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
@ -1,14 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.services;
|
||||
package com.iqser.red.service.redaction.v1.server.document.services;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualResizeRedaction;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils.RectangleTransformations;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.document.utils.RectangleTransformations;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
|
||||
package com.iqser.red.service.redaction.v1.server.document.utils;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
@ -12,39 +12,16 @@ import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collector;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.AtomicTextBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
public class RectangleTransformations {
|
||||
|
||||
public static PDRectangle toPDRectangleBBox(List<Rectangle> rectangles) {
|
||||
|
||||
Rectangle2D rectangle2D = RectangleTransformations.rectangleBBox(rectangles);
|
||||
|
||||
PDRectangle annotationPosition = new PDRectangle();
|
||||
annotationPosition.setLowerLeftX((float) rectangle2D.getMinX());
|
||||
annotationPosition.setLowerLeftY((float) rectangle2D.getMinY());
|
||||
annotationPosition.setUpperRightX((float) rectangle2D.getMaxX());
|
||||
annotationPosition.setUpperRightY((float) rectangle2D.getMaxY());
|
||||
return annotationPosition;
|
||||
}
|
||||
|
||||
public static Rectangle2D abstractPageBlockBBox(List<AbstractPageBlock> abstractPageBlocks) {
|
||||
|
||||
return abstractPageBlocks.stream()
|
||||
.map(abstractPageBlock -> new Rectangle2D.Double(abstractPageBlock.getMinX(),
|
||||
abstractPageBlock.getMinY(),
|
||||
abstractPageBlock.getWidth(),
|
||||
abstractPageBlock.getHeight())).collect(new Rectangle2DBBoxCollector());
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D atomicTextBlockBBox(List<AtomicTextBlock> atomicTextBlocks) {
|
||||
|
||||
@ -114,12 +91,6 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D toRectangle2D(PDRectangle cropBox) {
|
||||
|
||||
return new Rectangle2D.Double(cropBox.getLowerLeftX(), cropBox.getLowerLeftY(), cropBox.getWidth(), cropBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
private static class Rectangle2DBBoxCollector implements Collector<Rectangle2D, Rectangle2DBBoxCollector.BBox, Rectangle2D> {
|
||||
|
||||
@Override
|
||||
@ -1,18 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.document.utils;
|
||||
package com.iqser.red.service.redaction.v1.server.document.utils;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.Boundary;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.entity.RedactionEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.document.graph.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
@ -1,80 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image.ImageServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ImageServiceResponseAdapter {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Map<Integer, List<ClassifiedImage>> convertImages(String dossierId, String fileId) {
|
||||
|
||||
var imageClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.IMAGE_INFO));
|
||||
|
||||
ImageServiceResponse imageServiceResponse = objectMapper.readValue(imageClassificationStream, ImageServiceResponse.class);
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> images = new HashMap<>();
|
||||
imageServiceResponse.getData().forEach(imageMetadata -> {
|
||||
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification()
|
||||
.getLabel()
|
||||
.toUpperCase(Locale.ROOT)) : ImageType.OTHER;
|
||||
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
|
||||
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
|
||||
imageMetadata.getPosition().getY1(),
|
||||
imageMetadata.getGeometry().getWidth(),
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
|
||||
});
|
||||
|
||||
// Currently This is a copy but, it will be changed later because i don' t think that we should unclassified images.
|
||||
imageServiceResponse.getDataCV().forEach(imageMetadata -> {
|
||||
var classification = imageMetadata.getFilters().isAllPassed() ? ImageType.valueOf(imageMetadata.getClassification()
|
||||
.getLabel()
|
||||
.toUpperCase(Locale.ROOT)) : ImageType.OTHER;
|
||||
images.computeIfAbsent(imageMetadata.getPosition().getPageNumber(), x -> new ArrayList<>())
|
||||
.add(new ClassifiedImage(new Rectangle2D.Double(imageMetadata.getPosition().getX1(),
|
||||
imageMetadata.getPosition().getY1(),
|
||||
imageMetadata.getGeometry().getWidth(),
|
||||
imageMetadata.getGeometry().getHeight()), classification, imageMetadata.isAlpha(), imageMetadata.getPosition().getPageNumber()));
|
||||
});
|
||||
|
||||
return images;
|
||||
}
|
||||
|
||||
|
||||
public void findOcr(ClassificationPage page) {
|
||||
|
||||
page.getImages().forEach(image -> {
|
||||
if (image.getImageType().equals(ImageType.OTHER)) {
|
||||
page.getTextBlocks().forEach(textblock -> {
|
||||
if (image.getPosition().contains(textblock.getMinX(), textblock.getMinY(), textblock.getWidth(), textblock.getHeight())) {
|
||||
image.setImageType(ImageType.OCR);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.PdfTableCell;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.TableCells;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table.TableServiceResponse;
|
||||
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TableServiceResponseAdapter {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Map<Integer, List<PdfTableCell>> convertTables(String dossierId, String fileId) {
|
||||
|
||||
var tableClassificationStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(dossierId, fileId, FileType.TABLES));
|
||||
|
||||
TableServiceResponse tableServiceResponse = objectMapper.readValue(tableClassificationStream, TableServiceResponse.class);
|
||||
|
||||
Map<Integer, List<PdfTableCell>> tableCells = new HashMap<>();
|
||||
tableServiceResponse.getData()
|
||||
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPageInfo().getNumber(), tableCell -> new ArrayList<>())
|
||||
.addAll(convertTableCells(tableData.getTableCells())));
|
||||
|
||||
return tableCells;
|
||||
}
|
||||
|
||||
|
||||
private Collection<? extends PdfTableCell> convertTableCells(List<TableCells> tableCells) {
|
||||
|
||||
List<PdfTableCell> pdfTableCells = new ArrayList<>();
|
||||
|
||||
tableCells.forEach(t -> pdfTableCells.add(PdfTableCell.builder()
|
||||
.y0(t.getY0())
|
||||
.x1(t.getX1())
|
||||
.y1(t.getY1())
|
||||
.x0(t.getX0())
|
||||
.width(t.getWidth())
|
||||
.height(t.getHeight())
|
||||
.build()));
|
||||
|
||||
return pdfTableCells;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Classification {
|
||||
|
||||
private Map<String, Float> probabilities = new HashMap<>();
|
||||
private String label;
|
||||
|
||||
}
|
||||
@ -1,11 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class FilterGeometry {
|
||||
|
||||
private ImageSize imageSize;
|
||||
private ImageFormat imageFormat;
|
||||
|
||||
}
|
||||
@ -1,12 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Filters {
|
||||
|
||||
private FilterGeometry geometry;
|
||||
private Probability probability;
|
||||
private boolean allPassed;
|
||||
|
||||
}
|
||||
@ -1,11 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Geometry {
|
||||
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -1,12 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageFormat {
|
||||
|
||||
private float quotient;
|
||||
private boolean tooTall;
|
||||
private boolean tooWide;
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageMetadata {
|
||||
|
||||
private Classification classification;
|
||||
private Position position;
|
||||
private Geometry geometry;
|
||||
private Filters filters;
|
||||
private boolean alpha;
|
||||
|
||||
}
|
||||
@ -1,46 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAlias;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
|
||||
@JsonProperty(value = "imageMetadata")
|
||||
@JsonAlias("data")
|
||||
private List<ImageMetadata> data = new ArrayList<>();
|
||||
|
||||
private List<ImageMetadata> dataCV = new ArrayList<>();
|
||||
|
||||
|
||||
@JsonProperty(value = "imageMetadata")
|
||||
@JsonAlias("data")
|
||||
public void setData(List<ImageMetadata> data) {this.data = data;}
|
||||
|
||||
|
||||
public List<ImageMetadata> getData() {
|
||||
|
||||
if (this.data == null) {
|
||||
this.data = new ArrayList<>();
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
|
||||
public List<ImageMetadata> getDataCV() {
|
||||
|
||||
if (this.dataCV == null) {
|
||||
this.dataCV = new ArrayList<>();
|
||||
}
|
||||
return dataCV;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,12 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ImageSize {
|
||||
|
||||
private float quotient;
|
||||
private boolean tooLarge;
|
||||
private boolean tooSmall;
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Position {
|
||||
|
||||
private float x1;
|
||||
private float x2;
|
||||
private float y1;
|
||||
private float y2;
|
||||
private int pageNumber;
|
||||
|
||||
}
|
||||
@ -1,10 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.image;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Probability {
|
||||
|
||||
private boolean unconfident;
|
||||
|
||||
}
|
||||
@ -1,13 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class PageInfo {
|
||||
|
||||
private int number;
|
||||
private int rotation;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@RequiredArgsConstructor
|
||||
public class PdfTableCell {
|
||||
|
||||
private float x0;
|
||||
private float y0;
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -1,15 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class TableCells {
|
||||
|
||||
private float x0;
|
||||
private float y0;
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class TableData {
|
||||
|
||||
private PageInfo pageInfo;
|
||||
private List<TableCells> tableCells = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -1,19 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.table;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class TableServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String operation;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
|
||||
private List<TableData> data = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -1,88 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public abstract class AbstractPageBlock {
|
||||
|
||||
@JsonIgnore
|
||||
protected float minX;
|
||||
@JsonIgnore
|
||||
protected float maxX;
|
||||
@JsonIgnore
|
||||
protected float minY;
|
||||
@JsonIgnore
|
||||
protected float maxY;
|
||||
@JsonIgnore
|
||||
protected PageBlockType classification;
|
||||
@JsonIgnore
|
||||
protected int page;
|
||||
|
||||
int columnIndex;
|
||||
|
||||
@JsonIgnore
|
||||
private Orientation orientation = Orientation.NONE;
|
||||
|
||||
|
||||
public abstract String getText();
|
||||
|
||||
|
||||
public boolean isHeadline() {
|
||||
|
||||
return this instanceof TextPageBlock && this.getClassification() != null && this.getClassification().isHeadline();
|
||||
}
|
||||
|
||||
|
||||
public boolean containsBlock(TextPageBlock other) {
|
||||
|
||||
return this.minX <= other.getPdfMinX() && this.maxX >= other.getPdfMaxX() && this.minY >= other.getPdfMinY() && this.maxY <= other.getPdfMaxY();
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(AbstractPageBlock other) {
|
||||
|
||||
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(Rectangle other) {
|
||||
|
||||
return page == other.getPage() && this.minX <= other.getTopLeft().getX() && this.maxX >= other.getTopLeft().getX() + other.getWidth() && this.minY <= other.getTopLeft()
|
||||
.getY() && this.maxY >= other.getTopLeft().getY() + other.getHeight();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getHeight() {
|
||||
|
||||
return maxY - minY;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getWidth() {
|
||||
|
||||
return maxX - minX;
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsY(AbstractPageBlock atc) {
|
||||
|
||||
return this.minY <= atc.getMaxY() && this.maxY >= atc.getMinY();
|
||||
}
|
||||
|
||||
|
||||
public boolean intersectsX(AbstractPageBlock atc) {
|
||||
|
||||
return this.minX <= atc.getMaxX() && this.maxX >= atc.getMinX();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.UnclassifiedText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class ClassificationDocument {
|
||||
|
||||
private List<ClassificationPage> pages = new ArrayList<>();
|
||||
private List<ClassificationSection> sections = new ArrayList<>();
|
||||
private List<ClassificationHeader> headers = new ArrayList<>();
|
||||
private List<ClassificationFooter> footers = new ArrayList<>();
|
||||
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
private boolean headlines;
|
||||
|
||||
private SectionGrid sectionGrid = new SectionGrid();
|
||||
private DictionaryVersion dictionaryVersion;
|
||||
private long rulesVersion;
|
||||
|
||||
}
|
||||
@ -1,16 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class ClassificationFooter {
|
||||
|
||||
private List<TextPageBlock> textBlocks;
|
||||
|
||||
}
|
||||
@ -1,16 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class ClassificationHeader {
|
||||
|
||||
private List<TextPageBlock> textBlocks;
|
||||
|
||||
}
|
||||
@ -1,38 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class ClassificationPage {
|
||||
|
||||
@NonNull
|
||||
private List<AbstractPageBlock> textBlocks;
|
||||
|
||||
private List<ClassifiedImage> images = new ArrayList<>();
|
||||
|
||||
private Rectangle bodyTextFrame;
|
||||
|
||||
private boolean landscape;
|
||||
private int rotation;
|
||||
|
||||
private int pageNumber;
|
||||
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
|
||||
private float pageWidth;
|
||||
private float pageHeight;
|
||||
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image.ClassifiedImage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.TablePageBlock;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class ClassificationSection {
|
||||
|
||||
private List<AbstractPageBlock> pageBlocks = new ArrayList<>();
|
||||
private List<ClassifiedImage> images = new ArrayList<>();
|
||||
private String headline;
|
||||
|
||||
|
||||
public List<TablePageBlock> getTables() {
|
||||
|
||||
List<TablePageBlock> tables = new ArrayList<>();
|
||||
pageBlocks.forEach(block -> {
|
||||
if (block instanceof TablePageBlock) {
|
||||
tables.add((TablePageBlock) block);
|
||||
}
|
||||
});
|
||||
return tables;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
@AllArgsConstructor
|
||||
public class Column {
|
||||
|
||||
int index;
|
||||
ColumnType columnType;
|
||||
Rectangle2D bBox;
|
||||
|
||||
}
|
||||
@ -1,6 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
public enum ColumnType {
|
||||
RULING,
|
||||
DISTANCE
|
||||
}
|
||||
@ -1,77 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class FloatFrequencyCounter {
|
||||
|
||||
@Getter
|
||||
Map<Float, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
|
||||
public void add(float value) {
|
||||
|
||||
if (!countPerValue.containsKey(value)) {
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addAll(Map<Float, Integer> otherCounter) {
|
||||
|
||||
for (Map.Entry<Float, Integer> entry : otherCounter.entrySet()) {
|
||||
if (countPerValue.containsKey(entry.getKey())) {
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Float getMostPopular() {
|
||||
|
||||
Map.Entry<Float, Integer> mostPopular = null;
|
||||
for (Map.Entry<Float, Integer> entry : countPerValue.entrySet()) {
|
||||
if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
|
||||
public List<Float> getHighterThanMostPopular() {
|
||||
|
||||
Float mostPopular = getMostPopular();
|
||||
List<Float> higher = new ArrayList<>();
|
||||
for (Float value : countPerValue.keySet()) {
|
||||
if (value > mostPopular) {
|
||||
higher.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
return higher.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Float getHighest() {
|
||||
|
||||
Float highest = null;
|
||||
for (Float value : countPerValue.keySet()) {
|
||||
if (highest == null || value > highest) {
|
||||
highest = value;
|
||||
}
|
||||
}
|
||||
return highest;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,8 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
public enum Orientation {
|
||||
|
||||
NONE,
|
||||
LEFT,
|
||||
RIGHT
|
||||
}
|
||||
@ -1,38 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model;
|
||||
|
||||
public enum PageBlockType {
|
||||
H1,
|
||||
H2,
|
||||
H3,
|
||||
H4,
|
||||
H5,
|
||||
H6,
|
||||
HEADER,
|
||||
FOOTER,
|
||||
TITLE,
|
||||
PARAGRAPH,
|
||||
PARAGRAPH_BOLD,
|
||||
PARAGRAPH_ITALIC,
|
||||
PARAGRAPH_UNKNOWN,
|
||||
OTHER,
|
||||
TABLE;
|
||||
|
||||
|
||||
public static PageBlockType getHeadlineType(int i) {
|
||||
|
||||
return switch (i) {
|
||||
case 1 -> PageBlockType.H1;
|
||||
case 2 -> PageBlockType.H2;
|
||||
case 3 -> PageBlockType.H3;
|
||||
case 4 -> PageBlockType.H4;
|
||||
case 5 -> PageBlockType.H5;
|
||||
default -> PageBlockType.H6;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
public boolean isHeadline() {
|
||||
|
||||
return this.equals(H1) || this.equals(H2) || this.equals(H3) || this.equals(H4) || this.equals(H5) || this.equals(H6);
|
||||
}
|
||||
}
|
||||
@ -1,25 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.image;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.ImageType;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class ClassifiedImage {
|
||||
|
||||
@NonNull
|
||||
private Rectangle2D position;
|
||||
@NonNull
|
||||
private ImageType imageType;
|
||||
private boolean isAppendedToSection;
|
||||
@NonNull
|
||||
private boolean hasTransparency;
|
||||
@NonNull
|
||||
private int page;
|
||||
|
||||
}
|
||||
@ -1,79 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@NoArgsConstructor
|
||||
public class Cell extends Rectangle {
|
||||
|
||||
private List<TextPageBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
private List<Cell> headerCells = new ArrayList<>();
|
||||
|
||||
private boolean isHeaderCell;
|
||||
|
||||
private static final int MIN_SIZE = 1;
|
||||
|
||||
private int pageNumber;
|
||||
|
||||
|
||||
public Cell(Point2D topLeft, Point2D bottomRight) {
|
||||
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
|
||||
}
|
||||
|
||||
|
||||
public void addTextBlock(TextPageBlock textBlock) {
|
||||
|
||||
textBlocks.add(textBlock);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
Iterator<TextPageBlock> itty = textBlocks.iterator();
|
||||
TextPositionSequence previous = null;
|
||||
while (itty.hasNext()) {
|
||||
|
||||
TextPageBlock textBlock = itty.next();
|
||||
|
||||
for (TextPositionSequence word : textBlock.getSequences()) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
|
||||
public boolean hasMinimumSize() {
|
||||
|
||||
return this.getHeight() >= MIN_SIZE && this.getWidth() >= MIN_SIZE;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,22 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
@RequiredArgsConstructor
|
||||
public class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
int row;
|
||||
|
||||
int col;
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(CellPosition other) {
|
||||
|
||||
int rowDiff = row - other.row;
|
||||
return rowDiff != 0 ? rowDiff : col - other.col;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,15 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class CleanRulings {
|
||||
|
||||
List<Ruling> horizontal;
|
||||
List<Ruling> vertical;
|
||||
|
||||
}
|
||||
@ -1,218 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class Rectangle extends Rectangle2D.Float {
|
||||
|
||||
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
|
||||
/**
|
||||
* Ill-defined comparator, from when Rectangle was Comparable.
|
||||
* <p>
|
||||
* see https://github.com/tabulapdf/tabula-java/issues/116
|
||||
*
|
||||
* @deprecated with no replacement
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
|
||||
@Override
|
||||
public int compare(Rectangle o1, Rectangle o2) {
|
||||
|
||||
if (o1.equals(o2)) {
|
||||
return 0;
|
||||
}
|
||||
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
|
||||
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1 ? -java.lang.Double.compare(o1.getX(), o2.getX()) : java.lang.Double.compare(o1.getX(), o2.getX());
|
||||
} else {
|
||||
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
public Rectangle() {
|
||||
|
||||
super();
|
||||
}
|
||||
|
||||
|
||||
public Rectangle(float top, float left, float width, float height) {
|
||||
|
||||
super();
|
||||
this.setRect(left, top, width, height);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param rectangles
|
||||
* @return minimum bounding box that contains all the rectangles
|
||||
*/
|
||||
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
|
||||
|
||||
float minx = java.lang.Float.MAX_VALUE;
|
||||
float miny = java.lang.Float.MAX_VALUE;
|
||||
float maxx = java.lang.Float.MIN_VALUE;
|
||||
float maxy = java.lang.Float.MIN_VALUE;
|
||||
|
||||
for (Rectangle r : rectangles) {
|
||||
minx = (float) Math.min(r.getMinX(), minx);
|
||||
miny = (float) Math.min(r.getMinY(), miny);
|
||||
maxx = (float) Math.max(r.getMaxX(), maxx);
|
||||
maxy = (float) Math.max(r.getMaxY(), maxy);
|
||||
}
|
||||
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
|
||||
}
|
||||
|
||||
|
||||
public int compareTo(Rectangle other) {
|
||||
|
||||
return ILL_DEFINED_ORDER.compare(this, other);
|
||||
}
|
||||
|
||||
|
||||
// I'm bad at Java and need this for fancy sorting in
|
||||
// technology.tabula.TextChunk.
|
||||
public int isLtrDominant() {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
public float getArea() {
|
||||
|
||||
return this.width * this.height;
|
||||
}
|
||||
|
||||
|
||||
public float verticalOverlap(Rectangle other) {
|
||||
|
||||
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||
}
|
||||
|
||||
|
||||
public boolean verticallyOverlaps(Rectangle other) {
|
||||
|
||||
return verticalOverlap(other) > 0;
|
||||
}
|
||||
|
||||
|
||||
public float horizontalOverlap(Rectangle other) {
|
||||
|
||||
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||
}
|
||||
|
||||
|
||||
public boolean horizontallyOverlaps(Rectangle other) {
|
||||
|
||||
return horizontalOverlap(other) > 0;
|
||||
}
|
||||
|
||||
|
||||
public float verticalOverlapRatio(Rectangle other) {
|
||||
|
||||
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
|
||||
|
||||
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) {
|
||||
rv = (other.getBottom() - this.getTop()) / delta;
|
||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) {
|
||||
rv = (this.getBottom() - other.getTop()) / delta;
|
||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom() && other.getBottom() <= this.getBottom()) {
|
||||
rv = (other.getBottom() - other.getTop()) / delta;
|
||||
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom() && this.getBottom() <= other.getBottom()) {
|
||||
rv = (this.getBottom() - this.getTop()) / delta;
|
||||
}
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public float overlapRatio(Rectangle other) {
|
||||
|
||||
double intersectionWidth = Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||
double intersectionHeight = Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
|
||||
double unionArea = this.getArea() + other.getArea() - intersectionArea;
|
||||
|
||||
return (float) (intersectionArea / unionArea);
|
||||
}
|
||||
|
||||
|
||||
public Rectangle merge(Rectangle other) {
|
||||
|
||||
this.setRect(this.createUnion(other));
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public float getTop() {
|
||||
|
||||
return (float) this.getMinY();
|
||||
}
|
||||
|
||||
|
||||
public void setTop(float top) {
|
||||
|
||||
float deltaHeight = top - this.y;
|
||||
this.setRect(this.x, top, this.width, this.height - deltaHeight);
|
||||
}
|
||||
|
||||
|
||||
public float getRight() {
|
||||
|
||||
return (float) this.getMaxX();
|
||||
}
|
||||
|
||||
|
||||
public void setRight(float right) {
|
||||
|
||||
this.setRect(this.x, this.y, right - this.x, this.height);
|
||||
}
|
||||
|
||||
|
||||
public float getLeft() {
|
||||
|
||||
return (float) this.getMinX();
|
||||
}
|
||||
|
||||
|
||||
public void setLeft(float left) {
|
||||
|
||||
float deltaWidth = left - this.x;
|
||||
this.setRect(left, this.y, this.width - deltaWidth, this.height);
|
||||
}
|
||||
|
||||
|
||||
public float getBottom() {
|
||||
|
||||
return (float) this.getMaxY();
|
||||
}
|
||||
|
||||
|
||||
public void setBottom(float bottom) {
|
||||
|
||||
this.setRect(this.x, this.y, this.width, bottom - this.y);
|
||||
}
|
||||
|
||||
|
||||
public Point2D[] getPoints() {
|
||||
|
||||
return new Point2D[]{new Point2D.Float(this.getLeft(), this.getTop()), new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(),
|
||||
this.getBottom()), new Point2D.Float(this.getLeft(), this.getBottom())};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String s = super.toString();
|
||||
sb.append(s.substring(0, s.length() - 1));
|
||||
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,437 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.CohenSutherlandClipping;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.Utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@SuppressWarnings("all")
|
||||
public class Ruling extends Line2D.Float {
|
||||
|
||||
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
|
||||
|
||||
|
||||
public Ruling(Point2D p1, Point2D p2) {
|
||||
|
||||
super(p1, p2);
|
||||
}
|
||||
|
||||
|
||||
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
|
||||
|
||||
ArrayList<Ruling> rv = new ArrayList<>();
|
||||
for (Ruling r : rulings) {
|
||||
if (r.intersects(area)) {
|
||||
rv.add(r.intersect(area));
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
// log(n) implementation of find_intersections
|
||||
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
|
||||
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
class SortObject {
|
||||
|
||||
protected SOType type;
|
||||
protected float position;
|
||||
protected Ruling ruling;
|
||||
|
||||
|
||||
public SortObject(SOType type, float position, Ruling ruling) {
|
||||
|
||||
this.type = type;
|
||||
this.position = position;
|
||||
this.ruling = ruling;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
List<SortObject> sos = new ArrayList<>();
|
||||
|
||||
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
|
||||
@Override
|
||||
public int compare(Ruling o1, Ruling o2) {
|
||||
|
||||
return java.lang.Double.compare(o1.getTop(), o2.getTop());
|
||||
}
|
||||
});
|
||||
|
||||
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D o1, Point2D o2) {
|
||||
|
||||
if (o1.getY() > o2.getY()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getY() < o2.getY()) {
|
||||
return -1;
|
||||
}
|
||||
if (o1.getX() > o2.getX()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getX() < o2.getX()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
|
||||
for (Ruling h : horizontals) {
|
||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
}
|
||||
|
||||
for (Ruling v : verticals) {
|
||||
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
|
||||
}
|
||||
|
||||
Collections.sort(sos, new Comparator<SortObject>() {
|
||||
@Override
|
||||
public int compare(SortObject a, SortObject b) {
|
||||
|
||||
int rv;
|
||||
if (Utils.feq(a.position, b.position)) {
|
||||
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
|
||||
rv = 1;
|
||||
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
|
||||
rv = 1;
|
||||
} else {
|
||||
rv = java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
} else {
|
||||
return java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
});
|
||||
|
||||
for (SortObject so : sos) {
|
||||
switch (so.type) {
|
||||
case VERTICAL:
|
||||
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
|
||||
try {
|
||||
Point2D i = h.getKey().intersectionPoint(so.ruling);
|
||||
if (i == null) {
|
||||
continue;
|
||||
}
|
||||
rv.put(i, new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.info("Some line are oblique, ignoring...");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case HRIGHT:
|
||||
tree.remove(so.ruling);
|
||||
break;
|
||||
case HLEFT:
|
||||
tree.put(so.ruling, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean vertical() {
|
||||
|
||||
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public boolean horizontal() {
|
||||
|
||||
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
|
||||
// attributes that make sense only for non-oblique lines
|
||||
// these are used to have a single collapse method (in page, currently)
|
||||
|
||||
|
||||
public boolean oblique() {
|
||||
|
||||
return !(this.vertical() || this.horizontal());
|
||||
}
|
||||
|
||||
|
||||
public float getPosition() {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getLeft() : this.getTop();
|
||||
}
|
||||
|
||||
|
||||
public float getStart() {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getTop() : this.getLeft();
|
||||
}
|
||||
|
||||
|
||||
public void setStart(float v) {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setTop(v);
|
||||
} else {
|
||||
this.setLeft(v);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public float getEnd() {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getBottom() : this.getRight();
|
||||
}
|
||||
|
||||
|
||||
public void setEnd(float v) {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setBottom(v);
|
||||
} else {
|
||||
this.setRight(v);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void setStartEnd(float start, float end) {
|
||||
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setTop(start);
|
||||
this.setBottom(end);
|
||||
} else {
|
||||
this.setLeft(start);
|
||||
this.setRight(end);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean perpendicularTo(Ruling other) {
|
||||
|
||||
return this.vertical() == other.horizontal();
|
||||
}
|
||||
|
||||
|
||||
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
|
||||
|
||||
if (this.intersectsLine(another)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean rv = false;
|
||||
|
||||
if (this.perpendicularTo(another)) {
|
||||
rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another);
|
||||
} else {
|
||||
rv = this.expand(colinearOrParallelExpandAmount).intersectsLine(another.expand(colinearOrParallelExpandAmount));
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
public double length() {
|
||||
|
||||
return Math.sqrt(Math.pow(this.x1 - this.x2, 2) + Math.pow(this.y1 - this.y2, 2));
|
||||
}
|
||||
|
||||
|
||||
public Ruling intersect(Rectangle2D clip) {
|
||||
|
||||
Float clipee = (Float) this.clone();
|
||||
boolean clipped = new CohenSutherlandClipping(clip).clip(clipee);
|
||||
|
||||
if (clipped) {
|
||||
return new Ruling(clipee.getP1(), clipee.getP2());
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Ruling expand(float amount) {
|
||||
|
||||
Ruling r = (Ruling) this.clone();
|
||||
try {
|
||||
r.setStart(this.getStart() - amount);
|
||||
r.setEnd(this.getEnd() + amount);
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.warn("Could not expand ruling!");
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
public Point2D intersectionPoint(Ruling other) {
|
||||
|
||||
Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling horizontal, vertical;
|
||||
|
||||
if (!this_l.intersectsLine(other_l)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (this_l.horizontal() && other_l.vertical()) {
|
||||
horizontal = this_l;
|
||||
vertical = other_l;
|
||||
} else if (this_l.vertical() && other_l.horizontal()) {
|
||||
vertical = this_l;
|
||||
horizontal = other_l;
|
||||
} else {
|
||||
log.warn("lines must be orthogonal, vertical and horizontal");
|
||||
return null;
|
||||
}
|
||||
return new Point2D.Float(vertical.getLeft(), horizontal.getTop());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(other instanceof Ruling)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Ruling o = (Ruling) other;
|
||||
return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
|
||||
public float getTop() {
|
||||
|
||||
return this.y1;
|
||||
}
|
||||
|
||||
|
||||
public void setTop(float v) {
|
||||
|
||||
setLine(this.getLeft(), v, this.getRight(), this.getBottom());
|
||||
}
|
||||
|
||||
|
||||
public float getLeft() {
|
||||
|
||||
return this.x1;
|
||||
}
|
||||
|
||||
|
||||
public void setLeft(float v) {
|
||||
|
||||
setLine(v, this.getTop(), this.getRight(), this.getBottom());
|
||||
}
|
||||
|
||||
|
||||
public float getBottom() {
|
||||
|
||||
return this.y2;
|
||||
}
|
||||
|
||||
|
||||
public void setBottom(float v) {
|
||||
|
||||
setLine(this.getLeft(), this.getTop(), this.getRight(), v);
|
||||
}
|
||||
|
||||
|
||||
public float getRight() {
|
||||
|
||||
return this.x2;
|
||||
}
|
||||
|
||||
|
||||
public void setRight(float v) {
|
||||
|
||||
setLine(this.getLeft(), this.getTop(), v, this.getBottom());
|
||||
}
|
||||
|
||||
|
||||
public float getWidth() {
|
||||
|
||||
return this.getRight() - this.getLeft();
|
||||
}
|
||||
|
||||
|
||||
public float getHeight() {
|
||||
|
||||
return this.getBottom() - this.getTop();
|
||||
}
|
||||
|
||||
|
||||
public double getAngle() {
|
||||
|
||||
double angle = Math.toDegrees(Math.atan2(this.getP2().getY() - this.getP1().getY(), this.getP2().getX() - this.getP1().getX()));
|
||||
|
||||
if (angle < 0) {
|
||||
angle += 360;
|
||||
}
|
||||
return angle;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Formatter formatter = new Formatter(sb);
|
||||
String rv = formatter.format("%s[minX=%f minY=%f maxX=%f maxY=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString();
|
||||
formatter.close();
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
private enum SOType {
|
||||
VERTICAL,
|
||||
HRIGHT,
|
||||
HLEFT
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,342 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.PageBlockType;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class TablePageBlock extends AbstractPageBlock {
|
||||
|
||||
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
||||
|
||||
private final int rotation;
|
||||
@Getter
|
||||
@Setter
|
||||
private String headline;
|
||||
private int unrotatedRowCount;
|
||||
private int unrotatedColCount;
|
||||
private List<List<Cell>> rows;
|
||||
|
||||
|
||||
public TablePageBlock(List<Cell> cells, Rectangle area, int rotation) {
|
||||
|
||||
addCells(cells);
|
||||
minX = area.getLeft();
|
||||
minY = area.getBottom();
|
||||
maxX = area.getRight();
|
||||
maxY = area.getTop();
|
||||
classification = PageBlockType.TABLE;
|
||||
this.rotation = rotation;
|
||||
}
|
||||
|
||||
|
||||
public List<List<Cell>> getRows() {
|
||||
|
||||
if (rows == null) {
|
||||
rows = computeRows();
|
||||
|
||||
// Ignore rows that does not contain any cells and values.
|
||||
List<List<Cell>> rowsToRemove = new ArrayList<>();
|
||||
for (List<Cell> row : rows) {
|
||||
if (row.size() == 1 && row.get(0).getTextBlocks().isEmpty()) {
|
||||
rowsToRemove.add(row);
|
||||
}
|
||||
}
|
||||
rows.removeAll(rowsToRemove);
|
||||
|
||||
computeHeaders();
|
||||
}
|
||||
|
||||
return rows;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public int getRowCount() {
|
||||
|
||||
return getRows().size();
|
||||
}
|
||||
|
||||
|
||||
public int getColCount() {
|
||||
|
||||
return getRows().stream().mapToInt(List::size).max().orElse(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Detect header cells (either first row or first column):
|
||||
* Column is marked as header if cell text is bold and row cell text is not bold.
|
||||
* Defaults to row.
|
||||
*/
|
||||
private void computeHeaders() {
|
||||
|
||||
if (rows == null) {
|
||||
rows = computeRows();
|
||||
}
|
||||
// A bold cell is a header cell as long as every cell to the left/top is bold, too
|
||||
// we move from left to right and top to bottom
|
||||
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
||||
List<Cell> rowCells = rows.get(rowIndex);
|
||||
if (rowCells.size() == 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int colIndex = 0; colIndex < rowCells.size(); colIndex++) {
|
||||
Cell cell = rowCells.get(colIndex);
|
||||
List<Cell> cellsToTheLeft = rowCells.subList(0, colIndex);
|
||||
Cell lastHeaderCell = null;
|
||||
for (Cell leftCell : cellsToTheLeft) {
|
||||
if (leftCell.isHeaderCell()) {
|
||||
lastHeaderCell = leftCell;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lastHeaderCell != null) {
|
||||
cell.getHeaderCells().add(lastHeaderCell);
|
||||
}
|
||||
List<Cell> cellsToTheTop = new ArrayList<>();
|
||||
for (int i = 0; i < rowIndex; i++) {
|
||||
try {
|
||||
cellsToTheTop.add(rows.get(i).get(colIndex));
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
log.debug("No cell {} in row {}, ignoring.", colIndex, rowIndex);
|
||||
}
|
||||
}
|
||||
for (Cell topCell : cellsToTheTop) {
|
||||
if (topCell.isHeaderCell()) {
|
||||
lastHeaderCell = topCell;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lastHeaderCell != null) {
|
||||
cell.getHeaderCells().add(lastHeaderCell);
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(cell.getTextBlocks()) && cell.getTextBlocks().get(0).getMostPopularWordStyle().equals("bold")) {
|
||||
cell.setHeaderCell(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private List<List<Cell>> computeRows() {
|
||||
|
||||
List<List<Cell>> rows = new ArrayList<>();
|
||||
if (rotation == 90) {
|
||||
for (int i = 0; i < unrotatedColCount; i++) { // rows
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = unrotatedRowCount - 1; j >= 0; j--) { // cols
|
||||
Cell cell = cells.get(new CellPosition(j, i));
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
} else if (rotation == 270) {
|
||||
for (int i = unrotatedColCount - 1; i >= 0; i--) { // rows
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < unrotatedRowCount; j++) { // cols
|
||||
Cell cell = cells.get(new CellPosition(j, i));
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < unrotatedRowCount; i++) {
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < unrotatedColCount; j++) {
|
||||
Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
|
||||
if (cell != null) {
|
||||
lastRow.add(cell);
|
||||
}
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
}
|
||||
|
||||
return rows;
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void add(Cell chunk, int row, int col) {
|
||||
|
||||
unrotatedRowCount = Math.max(unrotatedRowCount, row + 1);
|
||||
unrotatedColCount = Math.max(unrotatedColCount, col + 1);
|
||||
|
||||
CellPosition cp = new CellPosition(row, col);
|
||||
cells.put(cp, chunk);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addCells(List<Cell> cells) {
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
cells.removeIf(cell -> cell.getWidth() < 1.1 || cell.getHeight() < 1.1);
|
||||
|
||||
List<List<Cell>> rowsOfCells = calculateStructure(cells);
|
||||
|
||||
for (int i = 0; i < rowsOfCells.size(); i++) {
|
||||
for (int j = 0; j < rowsOfCells.get(i).size(); j++) {
|
||||
add(rowsOfCells.get(i).get(j), i, j);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the structure of the table. For spanning rows and columns multiple cells with the same values will be inserted.
|
||||
*
|
||||
* @param cells The found cells
|
||||
* @return TablePageBlock Structure
|
||||
*/
|
||||
private List<List<Cell>> calculateStructure(List<Cell> cells) {
|
||||
|
||||
List<List<Cell>> matrix = new ArrayList<>();
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return matrix;
|
||||
}
|
||||
|
||||
Set<Float> uniqueX = new HashSet<>();
|
||||
Set<Float> uniqueY = new HashSet<>();
|
||||
cells.stream().filter(c -> !c.getTextBlocks().isEmpty() || c.getHeight() > 3 && c.getWidth() > 3).forEach(c -> {
|
||||
uniqueX.add(c.getLeft());
|
||||
uniqueX.add(c.getRight());
|
||||
uniqueY.add(c.getBottom());
|
||||
uniqueY.add(c.getTop());
|
||||
});
|
||||
|
||||
var sortedUniqueX = uniqueX.stream().sorted().toList();
|
||||
var sortedUniqueY = uniqueY.stream().sorted().toList();
|
||||
|
||||
Float prevY = null;
|
||||
for (Float y : sortedUniqueY) {
|
||||
|
||||
List<Cell> row = new ArrayList<>();
|
||||
|
||||
Float prevX = null;
|
||||
for (Float x : sortedUniqueX) {
|
||||
|
||||
if (prevY != null && prevX != null) {
|
||||
var cell = new Cell(new Point2D.Float(prevX, prevY), new Point2D.Float(x, y));
|
||||
|
||||
var intersectionCell = cells.stream().filter(c -> cell.intersects(c) && cell.overlapRatio(c) > 0.1f).findFirst();
|
||||
intersectionCell.ifPresent(value -> cell.getTextBlocks().addAll(value.getTextBlocks()));
|
||||
if (cell.hasMinimumSize()) {
|
||||
row.add(cell);
|
||||
}
|
||||
}
|
||||
prevX = x;
|
||||
}
|
||||
|
||||
if (prevY != null && prevX != null && !row.isEmpty()) {
|
||||
matrix.add(row);
|
||||
}
|
||||
prevY = y;
|
||||
}
|
||||
|
||||
Collections.reverse(matrix);
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<List<Cell>> rows = getRows();
|
||||
|
||||
int i = 0;
|
||||
for (List<Cell> row : rows) {
|
||||
if (i != 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
if (!row.isEmpty()) {
|
||||
boolean firstColumn = true;
|
||||
for (Cell column : row) {
|
||||
if (!firstColumn) {
|
||||
sb.append(",");
|
||||
}
|
||||
if (column != null && column.getTextBlocks() != null) {
|
||||
boolean first = true;
|
||||
for (TextPageBlock textBlock : column.getTextBlocks()) {
|
||||
if (!first) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append('\"').append(textBlock.getText().replaceAll("\"", "\\\"")).append('\"');
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
firstColumn = false;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
public String getTextAsHtml() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<List<Cell>> rows = getRows();
|
||||
|
||||
sb.append("<table border=\"1\">");
|
||||
int i = 0;
|
||||
for (List<Cell> row : rows) {
|
||||
sb.append("\n<tr>");
|
||||
if (!row.isEmpty()) {
|
||||
for (Cell column : row) {
|
||||
sb.append(i == 0 ? "\n<th>" : "\n<td>");
|
||||
if (column != null && column.getTextBlocks() != null) {
|
||||
boolean first = true;
|
||||
for (TextPageBlock textBlock : column.getTextBlocks()) {
|
||||
if (!first) {
|
||||
sb.append("<br />");
|
||||
}
|
||||
sb.append(textBlock.getText().replaceAll("\\n", "<br />"));
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
sb.append(i == 0 ? "</th>" : "</td>");
|
||||
}
|
||||
}
|
||||
sb.append("</tr>");
|
||||
i++;
|
||||
}
|
||||
sb.append("</table>");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,100 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RedTextPosition {
|
||||
|
||||
private String textMatrix;
|
||||
private float[] position;
|
||||
|
||||
@JsonIgnore
|
||||
private int rotation;
|
||||
|
||||
@JsonIgnore
|
||||
private float pageHeight;
|
||||
|
||||
@JsonIgnore
|
||||
private float pageWidth;
|
||||
|
||||
private String unicode;
|
||||
|
||||
@JsonIgnore
|
||||
private float dir;
|
||||
|
||||
// not used in reanalysis
|
||||
@JsonIgnore
|
||||
private float widthOfSpace;
|
||||
|
||||
// not used in reanalysis
|
||||
@JsonIgnore
|
||||
private float fontSizeInPt;
|
||||
|
||||
// not used in reanalysis
|
||||
@JsonIgnore
|
||||
private String fontName;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public static RedTextPosition fromTextPosition(TextPosition textPosition) {
|
||||
|
||||
var pos = new RedTextPosition();
|
||||
BeanUtils.copyProperties(textPosition, pos);
|
||||
pos.setFontName(textPosition.getFont().getName());
|
||||
|
||||
pos.setFontSizeInPt(textPosition.getFontSizeInPt());
|
||||
|
||||
pos.setTextMatrix(textPosition.getTextMatrix().toString());
|
||||
|
||||
var position = new float[4];
|
||||
|
||||
position[0] = textPosition.getXDirAdj();
|
||||
position[1] = textPosition.getYDirAdj();
|
||||
position[2] = textPosition.getWidthDirAdj();
|
||||
position[3] = textPosition.getHeightDir();
|
||||
|
||||
pos.setPosition(position);
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getXDirAdj() {
|
||||
|
||||
return position[0];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getYDirAdj() {
|
||||
|
||||
return position[1];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getWidthDirAdj() {
|
||||
|
||||
return position[2];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getHeightDir() {
|
||||
|
||||
return position[3];
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,49 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class SearchableText {
|
||||
|
||||
@Getter
|
||||
private final List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
|
||||
public void add(TextPositionSequence textPositionSequence) {
|
||||
|
||||
sequences.add(textPositionSequence);
|
||||
}
|
||||
|
||||
|
||||
public void addAll(List<TextPositionSequence> textPositionSequences) {
|
||||
|
||||
sequences.addAll(textPositionSequences);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return buildString(sequences);
|
||||
}
|
||||
|
||||
|
||||
public static String buildString(List<TextPositionSequence> sequences) {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (TextPositionSequence word : sequences) {
|
||||
sb.append(word);
|
||||
sb.append(' ');
|
||||
}
|
||||
String text = sb.toString();
|
||||
text = TextNormalizationUtilities.removeHyphenLineBreaks(text);
|
||||
text = TextNormalizationUtilities.removeLineBreaks(text);
|
||||
text = TextNormalizationUtilities.removeRepeatingWhitespaces(text);
|
||||
return text;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,17 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class SimplifiedSectionText {
|
||||
|
||||
private int sectionNumber;
|
||||
private String text;
|
||||
|
||||
}
|
||||
@ -1,20 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class SimplifiedText {
|
||||
|
||||
private int numberOfPages;
|
||||
private List<SimplifiedSectionText> sectionTexts = new ArrayList<>();
|
||||
|
||||
}
|
||||
@ -1,47 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class StringFrequencyCounter {
|
||||
|
||||
@Getter
|
||||
private final Map<String, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
|
||||
public void add(String value) {
|
||||
|
||||
if (!countPerValue.containsKey(value)) {
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter) {
|
||||
|
||||
for (Map.Entry<String, Integer> entry : otherCounter.entrySet()) {
|
||||
if (countPerValue.containsKey(entry.getKey())) {
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String getMostPopular() {
|
||||
|
||||
Map.Entry<String, Integer> mostPopular = null;
|
||||
for (Map.Entry<String, Integer> entry : countPerValue.entrySet()) {
|
||||
if (mostPopular == null || entry.getValue() > mostPopular.getValue()) {
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,47 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum TextDirection {
|
||||
ZERO(0f),
|
||||
QUARTER_CIRCLE(90f),
|
||||
HALF_CIRCLE(180f),
|
||||
THREE_QUARTER_CIRCLE(270f);
|
||||
|
||||
public static final String VALUE_STRING_SUFFIX = "°";
|
||||
|
||||
@JsonValue
|
||||
private final float degrees;
|
||||
private final float radians;
|
||||
|
||||
|
||||
TextDirection(float degreeValue) {
|
||||
|
||||
degrees = degreeValue;
|
||||
radians = (float) Math.toRadians(degreeValue);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return degrees + VALUE_STRING_SUFFIX;
|
||||
}
|
||||
|
||||
|
||||
@JsonCreator(mode = JsonCreator.Mode.DELEGATING)
|
||||
public static TextDirection fromDegrees(float degrees) {
|
||||
|
||||
for (var dir : TextDirection.values()) {
|
||||
if (degrees == dir.degrees) {
|
||||
return dir;
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException(String.format("A value of %f is not supported by TextDirection", degrees));
|
||||
}
|
||||
}
|
||||
@ -1,302 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.PageBlockType;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class TextPageBlock extends AbstractPageBlock {
|
||||
|
||||
@Builder.Default
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
@JsonIgnore
|
||||
private int rotation;
|
||||
|
||||
@JsonIgnore
|
||||
private String mostPopularWordFont;
|
||||
|
||||
@JsonIgnore
|
||||
private String mostPopularWordStyle;
|
||||
|
||||
@JsonIgnore
|
||||
private float mostPopularWordFontSize;
|
||||
|
||||
@JsonIgnore
|
||||
private float mostPopularWordHeight;
|
||||
|
||||
@JsonIgnore
|
||||
private float mostPopularWordSpaceWidth;
|
||||
|
||||
@JsonIgnore
|
||||
private float highestFontSize;
|
||||
|
||||
@JsonIgnore
|
||||
private PageBlockType classification;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public TextDirection getDir() {
|
||||
|
||||
return sequences.get(0).getDir();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
private float getPageHeight() {
|
||||
|
||||
return sequences.get(0).getPageHeight();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
private float getPageWidth() {
|
||||
|
||||
return sequences.get(0).getPageWidth();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minX value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the minX value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getPdfMinX() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return minY;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return getPageWidth() - maxX;
|
||||
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
|
||||
return getPageWidth() - maxY;
|
||||
} else {
|
||||
return minX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the maxX value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the maxX value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getPdfMaxX() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return maxY;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return getPageWidth() - minX;
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageWidth() - minY;
|
||||
|
||||
} else {
|
||||
return maxX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minY value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the minY value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getPdfMinY() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return minX;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
return maxY;
|
||||
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageHeight() - maxX;
|
||||
|
||||
} else {
|
||||
return getPageHeight() - maxY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the maxY value in pdf coordinate system.
|
||||
* Note: This needs to use Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return the maxY value in pdf coordinate system
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getPdfMaxY() {
|
||||
|
||||
if (getDir().getDegrees() == 90) {
|
||||
return maxX;
|
||||
} else if (getDir().getDegrees() == 180) {
|
||||
|
||||
return minY;
|
||||
} else if (getDir().getDegrees() == 270) {
|
||||
return getPageHeight() - minX;
|
||||
} else {
|
||||
return getPageHeight() - minY;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public TextPageBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
|
||||
|
||||
this.minX = minX;
|
||||
this.maxX = maxX;
|
||||
this.minY = minY;
|
||||
this.maxY = maxY;
|
||||
this.sequences = sequences;
|
||||
this.rotation = rotation;
|
||||
}
|
||||
|
||||
|
||||
public TextPageBlock union(TextPositionSequence r) {
|
||||
|
||||
TextPageBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
|
||||
public TextPageBlock union(TextPageBlock r) {
|
||||
|
||||
TextPageBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPageBlock r) {
|
||||
|
||||
if (r.getMinX() < minX) {
|
||||
minX = r.getMinX();
|
||||
}
|
||||
if (r.getMaxX() > maxX) {
|
||||
maxX = r.getMaxX();
|
||||
}
|
||||
if (r.getMinY() < minY) {
|
||||
minY = r.getMinY();
|
||||
}
|
||||
if (r.getMaxY() > maxY) {
|
||||
maxY = r.getMaxY();
|
||||
}
|
||||
sequences.addAll(r.getSequences());
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPositionSequence r) {
|
||||
|
||||
if (r.getMinXDirAdj() < minX) {
|
||||
minX = r.getMinXDirAdj();
|
||||
}
|
||||
if (r.getMaxXDirAdj() > maxX) {
|
||||
maxX = r.getMaxXDirAdj();
|
||||
}
|
||||
if (r.getMinYDirAdj() < minY) {
|
||||
minY = r.getMinYDirAdj();
|
||||
}
|
||||
if (r.getMaxYDirAdj() > maxY) {
|
||||
maxY = r.getMaxYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public TextPageBlock copy() {
|
||||
|
||||
return new TextPageBlock(minX, maxX, minY, maxY, sequences, rotation);
|
||||
}
|
||||
|
||||
|
||||
public void resize(float x1, float y1, float width, float height) {
|
||||
|
||||
set(x1, y1, x1 + width, y1 + height);
|
||||
}
|
||||
|
||||
|
||||
public void set(float x1, float y1, float x2, float y2) {
|
||||
|
||||
this.minX = Math.min(x1, x2);
|
||||
this.maxX = Math.max(x1, x2);
|
||||
this.minY = Math.min(y1, y2);
|
||||
this.maxY = Math.max(y1, y2);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < sequences.size(); i++) {
|
||||
String sequenceAsString = sequences.get(i).toString();
|
||||
// Fix for missing Whitespace. This is recognized in getSequences method. See PDFTextStripper Line 1730.
|
||||
if (i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) != ' ' && sequenceAsString.charAt(0) != ' ') {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(sequenceAsString);
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
public String getText() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,301 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonIgnoreProperties({"empty"})
|
||||
public class TextPositionSequence implements CharSequence {
|
||||
|
||||
public static final int HEIGHT_PADDING = 2;
|
||||
private int page;
|
||||
private List<RedTextPosition> textPositions = new ArrayList<>();
|
||||
|
||||
private TextDirection dir;
|
||||
private int rotation;
|
||||
private float pageHeight;
|
||||
private float pageWidth;
|
||||
|
||||
|
||||
public TextPositionSequence(int page) {
|
||||
|
||||
this.page = page;
|
||||
}
|
||||
|
||||
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page) {
|
||||
|
||||
this.textPositions = textPositions.stream().map(RedTextPosition::fromTextPosition).collect(Collectors.toList());
|
||||
this.page = page;
|
||||
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
|
||||
this.rotation = textPositions.get(0).getRotation();
|
||||
this.pageHeight = textPositions.get(0).getPageHeight();
|
||||
this.pageWidth = textPositions.get(0).getPageWidth();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
|
||||
return textPositions.size();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
|
||||
RedTextPosition textPosition = textPositionAt(index);
|
||||
String text = textPosition.getUnicode();
|
||||
return text.charAt(0);
|
||||
}
|
||||
|
||||
|
||||
public char charAt(int index, boolean caseInSensitive) {
|
||||
|
||||
RedTextPosition textPosition = textPositionAt(index);
|
||||
String text = textPosition.getUnicode();
|
||||
return caseInSensitive ? text.toLowerCase().charAt(0) : text.charAt(0);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TextPositionSequence subSequence(int start, int end) {
|
||||
|
||||
var textPositionSequence = new TextPositionSequence();
|
||||
textPositionSequence.textPositions = textPositions.subList(start, end);
|
||||
textPositionSequence.page = page;
|
||||
textPositionSequence.dir = dir;
|
||||
textPositionSequence.rotation = rotation;
|
||||
textPositionSequence.pageHeight = pageHeight;
|
||||
textPositionSequence.pageWidth = pageWidth;
|
||||
|
||||
return textPositionSequence;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder builder = new StringBuilder(length());
|
||||
for (int i = 0; i < length(); i++) {
|
||||
builder.append(charAt(i));
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
|
||||
public RedTextPosition textPositionAt(int index) {
|
||||
|
||||
return textPositions.get(index);
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPositionSequence textPositionSequence, RedTextPosition textPosition) {
|
||||
|
||||
this.textPositions.add(textPosition);
|
||||
this.page = textPositionSequence.getPage();
|
||||
this.dir = textPositionSequence.getDir();
|
||||
this.rotation = textPositionSequence.getRotation();
|
||||
this.pageHeight = textPositionSequence.getPageHeight();
|
||||
this.pageWidth = textPositionSequence.getPageWidth();
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPosition textPosition) {
|
||||
|
||||
this.textPositions.add(RedTextPosition.fromTextPosition(textPosition));
|
||||
|
||||
this.dir = TextDirection.fromDegrees(textPositions.get(0).getDir());
|
||||
this.rotation = textPositions.get(0).getRotation();
|
||||
this.pageHeight = textPositions.get(0).getPageHeight();
|
||||
this.pageWidth = textPositions.get(0).getPageWidth();
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted minX value
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getMinXDirAdj() {
|
||||
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted maxX value
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getMaxXDirAdj() {
|
||||
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidthDirAdj() + HEIGHT_PADDING;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted minY value. The upper border of the bounding box of the word.
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getMinYDirAdj() {
|
||||
|
||||
return textPositions.get(0).getYDirAdj() - getTextHeight();
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This value is adjusted so that 0,0 is upper left and it is adjusted based on the text direction.
|
||||
* This method ignores the page rotation but takes the text rotation and adjusts the coordinates to awt.
|
||||
*
|
||||
* @return the text direction adjusted maxY value. The lower border of the bounding box of the word.
|
||||
*/
|
||||
@JsonIgnore
|
||||
public float getMaxYDirAdj() {
|
||||
|
||||
return textPositions.get(0).getYDirAdj();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getTextHeight() {
|
||||
|
||||
return textPositions.get(0).getHeightDir() + HEIGHT_PADDING;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getHeight() {
|
||||
|
||||
return getMaxYDirAdj() - getMinYDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getWidth() {
|
||||
|
||||
return getMaxXDirAdj() - getMinXDirAdj();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public String getFont() {
|
||||
|
||||
return textPositions.get(0).getFontName().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public String getFontStyle() {
|
||||
|
||||
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase();
|
||||
|
||||
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
|
||||
return "bold, italic";
|
||||
} else if (lowercaseFontName.contains("bold")) {
|
||||
return "bold";
|
||||
} else if (lowercaseFontName.contains("italic")) {
|
||||
return "italic";
|
||||
} else {
|
||||
return "standard";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getFontSize() {
|
||||
|
||||
return textPositions.get(0).getFontSizeInPt();
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getSpaceWidth() {
|
||||
|
||||
return textPositions.get(0).getWidthOfSpace();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This returns the bounding box of the word in Pdf Coordinate System where {0,0} rotated with the page rotation.
|
||||
* 0 -> LowerLeft
|
||||
* 90 -> UpperLeft
|
||||
* 180 -> UpperRight
|
||||
* 270 -> LowerRight
|
||||
*
|
||||
* @return bounding box of the word in Pdf Coordinate System
|
||||
*/
|
||||
@JsonIgnore
|
||||
@SneakyThrows
|
||||
public Rectangle getRectangle() {
|
||||
|
||||
log.debug("Page: '{}', Word: '{}', Rotation: '{}', textRotation {}", page, this, rotation, dir);
|
||||
|
||||
float textHeight = getTextHeight();
|
||||
|
||||
RedTextPosition firstTextPos = textPositions.get(0);
|
||||
RedTextPosition lastTextPos = textPositions.get(textPositions.size() - 1);
|
||||
|
||||
Point2D bottomLeft = new Point2D.Double(firstTextPos.getXDirAdj(), firstTextPos.getYDirAdj() - HEIGHT_PADDING);
|
||||
Point2D topRight = new Point2D.Double(lastTextPos.getXDirAdj() + lastTextPos.getWidthDirAdj(), lastTextPos.getYDirAdj() + textHeight + HEIGHT_PADDING);
|
||||
|
||||
AffineTransform transform = new AffineTransform();
|
||||
if (dir == TextDirection.ZERO || dir == TextDirection.HALF_CIRCLE) {
|
||||
transform.rotate(dir.getRadians(), pageWidth / 2f, pageHeight / 2f);
|
||||
transform.translate(0f, pageHeight + textHeight);
|
||||
transform.scale(1., -1.);
|
||||
} else if (dir == TextDirection.QUARTER_CIRCLE) {
|
||||
transform.rotate(dir.getRadians(), pageWidth / 2f, pageWidth / 2f);
|
||||
transform.translate(0f, pageWidth + textHeight);
|
||||
transform.scale(1., -1.);
|
||||
} else {
|
||||
transform.rotate(dir.getRadians(), pageHeight / 2f, pageHeight / 2f);
|
||||
transform.translate(0f, pageWidth + textHeight);
|
||||
transform.scale(1., -1.);
|
||||
}
|
||||
|
||||
bottomLeft = transform.transform(bottomLeft, null);
|
||||
topRight = transform.transform(topRight, null);
|
||||
|
||||
return new Rectangle( //
|
||||
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
|
||||
(float) (topRight.getX() - bottomLeft.getX()),
|
||||
(float) (topRight.getY() - bottomLeft.getY()),
|
||||
page);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,14 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class UnclassifiedText {
|
||||
|
||||
private List<TextPageBlock> textBlocks;
|
||||
|
||||
}
|
||||
@ -1,384 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.fontbox.ttf.TrueTypeFont;
|
||||
import org.apache.fontbox.util.BoundingBox;
|
||||
import org.apache.pdfbox.contentstream.PDFStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.DrawObject;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Restore;
|
||||
import org.apache.pdfbox.contentstream.operator.state.Save;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
|
||||
import org.apache.pdfbox.contentstream.operator.text.BeginText;
|
||||
import org.apache.pdfbox.contentstream.operator.text.EndText;
|
||||
import org.apache.pdfbox.contentstream.operator.text.MoveText;
|
||||
import org.apache.pdfbox.contentstream.operator.text.MoveTextSetLeading;
|
||||
import org.apache.pdfbox.contentstream.operator.text.NextLine;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetCharSpacing;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetTextHorizontalScaling;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetTextLeading;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetTextRenderingMode;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetTextRise;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetWordSpacing;
|
||||
import org.apache.pdfbox.contentstream.operator.text.ShowText;
|
||||
import org.apache.pdfbox.contentstream.operator.text.ShowTextAdjusted;
|
||||
import org.apache.pdfbox.contentstream.operator.text.ShowTextLine;
|
||||
import org.apache.pdfbox.contentstream.operator.text.ShowTextLineAndSpace;
|
||||
import org.apache.pdfbox.cos.COSDictionary;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDCIDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDCIDFontType2;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
|
||||
import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.apache.pdfbox.pdmodel.font.encoding.GlyphList;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.apache.pdfbox.util.Vector;
|
||||
|
||||
/**
|
||||
* LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
|
||||
* <p>
|
||||
* This class exists only so that we don't break the code of users who have their own subclasses of
|
||||
* PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine
|
||||
* with a heuristic implementation which is backwards compatible.
|
||||
* <p>
|
||||
* DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
|
||||
* THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD.
|
||||
*/
|
||||
@SuppressWarnings({"PMD", "checkstyle:all"})
|
||||
class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class);
|
||||
|
||||
private int pageRotation;
|
||||
private PDRectangle pageSize;
|
||||
private Matrix translateMatrix;
|
||||
private final GlyphList glyphList;
|
||||
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
|
||||
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
LegacyPDFStreamEngine() throws IOException {
|
||||
|
||||
addOperator(new BeginText());
|
||||
addOperator(new Concatenate());
|
||||
addOperator(new DrawObject()); // special text version
|
||||
addOperator(new EndText());
|
||||
addOperator(new SetGraphicsStateParameters());
|
||||
addOperator(new Save());
|
||||
addOperator(new Restore());
|
||||
addOperator(new NextLine());
|
||||
addOperator(new SetCharSpacing());
|
||||
addOperator(new MoveText());
|
||||
addOperator(new MoveTextSetLeading());
|
||||
addOperator(new SetFontAndSize());
|
||||
addOperator(new ShowText());
|
||||
addOperator(new ShowTextAdjusted());
|
||||
addOperator(new SetTextLeading());
|
||||
addOperator(new SetMatrix());
|
||||
addOperator(new SetTextRenderingMode());
|
||||
addOperator(new SetTextRise());
|
||||
addOperator(new SetWordSpacing());
|
||||
addOperator(new SetTextHorizontalScaling());
|
||||
addOperator(new ShowTextLine());
|
||||
addOperator(new ShowTextLineAndSpace());
|
||||
|
||||
// load additional glyph list for Unicode mapping
|
||||
String path = "/org/apache/pdfbox/resources/glyphlist/additional.txt";
|
||||
InputStream input = GlyphList.class.getResourceAsStream(path);
|
||||
glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This will initialize and process the contents of the stream.
|
||||
*
|
||||
* @param page the page to process
|
||||
* @throws java.io.IOException if there is an error accessing the stream.
|
||||
*/
|
||||
@Override
|
||||
public void processPage(PDPage page) throws IOException {
|
||||
|
||||
this.pageRotation = page.getRotation();
|
||||
this.pageSize = page.getCropBox();
|
||||
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
|
||||
translateMatrix = null;
|
||||
} else {
|
||||
// translation matrix for cropbox
|
||||
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
|
||||
}
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called when a glyph is to be processed. The heuristic calculations here were originally
|
||||
* written by Ben Litchfield for PDFStreamEngine.
|
||||
*/
|
||||
@Override
|
||||
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException {
|
||||
//
|
||||
// legacy calculations which were previously in PDFStreamEngine
|
||||
//
|
||||
// DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
|
||||
// THIS CODE IS DELIBERATELY INCORRECT
|
||||
//
|
||||
|
||||
PDGraphicsState state = getGraphicsState();
|
||||
Matrix ctm = state.getCurrentTransformationMatrix();
|
||||
float fontSize = state.getTextState().getFontSize();
|
||||
float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f;
|
||||
Matrix textMatrix = getTextMatrix();
|
||||
|
||||
float displacementX = displacement.getX();
|
||||
// the sorting algorithm is based on the width of the character. As the displacement
|
||||
// for vertical characters doesn't provide any suitable value for it, we have to
|
||||
// calculate our own
|
||||
if (font.isVertical()) {
|
||||
displacementX = font.getWidth(code) / 1000;
|
||||
// there may be an additional scaling factor for true type fonts
|
||||
TrueTypeFont ttf = null;
|
||||
if (font instanceof PDTrueTypeFont) {
|
||||
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
|
||||
} else if (font instanceof PDType0Font) {
|
||||
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
|
||||
if (cidFont instanceof PDCIDFontType2) {
|
||||
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
|
||||
}
|
||||
}
|
||||
if (ttf != null && ttf.getUnitsPerEm() != 1000) {
|
||||
displacementX *= 1000f / ttf.getUnitsPerEm();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// legacy calculations which were previously in PDFStreamEngine
|
||||
//
|
||||
// DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
|
||||
// THIS CODE IS DELIBERATELY INCORRECT
|
||||
//
|
||||
|
||||
// (modified) combined displacement, this is calculated *without* taking the character
|
||||
// spacing and word spacing into account, due to legacy code in TextStripper
|
||||
float tx = displacementX * fontSize * horizontalScaling;
|
||||
float ty = displacement.getY() * fontSize;
|
||||
|
||||
// (modified) combined displacement matrix
|
||||
Matrix td = Matrix.getTranslateInstance(tx, ty);
|
||||
|
||||
// (modified) text rendering matrix
|
||||
Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space
|
||||
float nextX = nextTextRenderingMatrix.getTranslateX();
|
||||
float nextY = nextTextRenderingMatrix.getTranslateY();
|
||||
|
||||
// (modified) width and height calculations
|
||||
float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
|
||||
Float fontHeight = fontHeightMap.get(font.getCOSObject());
|
||||
if (fontHeight == null) {
|
||||
fontHeight = computeFontHeight(font);
|
||||
fontHeightMap.put(font.getCOSObject(), fontHeight);
|
||||
}
|
||||
float dyDisplay = fontHeight * textRenderingMatrix.getScalingFactorY();
|
||||
|
||||
//
|
||||
// start of the original method
|
||||
//
|
||||
|
||||
// Note on variable names. There are three different units being used in this code.
|
||||
// Character sizes are given in glyph units, text locations are initially given in text
|
||||
// units, and we want to save the data in display units. The variable names should end with
|
||||
// Text or Disp to represent if the values are in text or disp units (no glyph units are
|
||||
// saved).
|
||||
|
||||
float glyphSpaceToTextSpaceFactor = 1 / 1000f;
|
||||
if (font instanceof PDType3Font) {
|
||||
glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
|
||||
}
|
||||
|
||||
float spaceWidthText = 0;
|
||||
try {
|
||||
// to avoid crash as described in PDFBOX-614, see what the space displacement should be
|
||||
spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
|
||||
} catch (Throwable exception) {
|
||||
LOG.warn(exception, exception);
|
||||
}
|
||||
|
||||
if (spaceWidthText == 0) {
|
||||
spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
|
||||
// the average space width appears to be higher than necessary so make it smaller
|
||||
spaceWidthText *= .80f;
|
||||
}
|
||||
if (spaceWidthText == 0) {
|
||||
spaceWidthText = 1.0f; // if could not find font, use a generic value
|
||||
}
|
||||
|
||||
// the space width has to be transformed into display units
|
||||
float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX();
|
||||
|
||||
// use our additional glyph list for Unicode mapping
|
||||
String unicodeMapping = font.toUnicode(code, glyphList);
|
||||
|
||||
// when there is no Unicode mapping available, Acrobat simply coerces the character code
|
||||
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
|
||||
// this, which is why we leave it until this point in PDFTextStreamEngine.
|
||||
if (unicodeMapping == null) {
|
||||
if (font instanceof PDSimpleFont) {
|
||||
char c = (char) code;
|
||||
unicodeMapping = new String(new char[]{c});
|
||||
} else {
|
||||
// Acrobat doesn't seem to coerce composite font's character codes, instead it
|
||||
// skips them. See the "allah2.pdf" TestTextStripper file.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// adjust for cropbox if needed
|
||||
Matrix translatedTextRenderingMatrix;
|
||||
if (translateMatrix == null) {
|
||||
translatedTextRenderingMatrix = textRenderingMatrix;
|
||||
} else {
|
||||
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
|
||||
nextX -= pageSize.getLowerLeftX();
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if (unicodeMapping.length() == 2) {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(0)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(1)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
} else {
|
||||
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
unicodeMapping,
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the font height. Override this if you want to use own calculations.
|
||||
*
|
||||
* @param font the font.
|
||||
* @return the font height.
|
||||
* @throws IOException if there is an error while getting the font bounding box.
|
||||
*/
|
||||
protected float computeFontHeight(PDFont font) throws IOException {
|
||||
|
||||
BoundingBox bbox = font.getBoundingBox();
|
||||
if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
|
||||
// PDFBOX-2158 and PDFBOX-3130
|
||||
// files by Salmat eSolutions / ClibPDF Library
|
||||
bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
|
||||
}
|
||||
// 1/2 the bbox is used as the height todo: why?
|
||||
float glyphHeight = bbox.getHeight() / 2;
|
||||
|
||||
// sometimes the bbox has very high values, but CapHeight is OK
|
||||
PDFontDescriptor fontDescriptor = font.getFontDescriptor();
|
||||
if (fontDescriptor != null) {
|
||||
float capHeight = fontDescriptor.getCapHeight();
|
||||
if (Float.compare(capHeight, 0) != 0 && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
|
||||
glyphHeight = capHeight;
|
||||
}
|
||||
// PDFBOX-3464, PDFBOX-4480, PDFBOX-4553:
|
||||
// sometimes even CapHeight has very high value, but Ascent and Descent are ok
|
||||
float ascent = fontDescriptor.getAscent();
|
||||
float descent = fontDescriptor.getDescent();
|
||||
if (capHeight > ascent && ascent > 0 && descent < 0 && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
|
||||
glyphHeight = (ascent - descent) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
// transformPoint from glyph space -> text space
|
||||
float height;
|
||||
if (font instanceof PDType3Font) {
|
||||
height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
|
||||
} else {
|
||||
height = glyphHeight / 1000;
|
||||
}
|
||||
|
||||
return height;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A method provided as an event interface to allow a subclass to perform some specific
|
||||
* functionality when text needs to be processed.
|
||||
*
|
||||
* @param text The text to be processed.
|
||||
*/
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
// subclasses can override to provide specific functionality
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,335 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.parsing;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSNumber;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
|
||||
@Getter
|
||||
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
||||
@Getter
|
||||
private final List<Ruling> rulings = new ArrayList<>();
|
||||
private final List<Ruling> graphicsPath = new ArrayList<>();
|
||||
@Setter
|
||||
protected PDPage pdpage;
|
||||
@Getter
|
||||
private int minCharWidth;
|
||||
@Getter
|
||||
private int maxCharWidth;
|
||||
@Getter
|
||||
private int minCharHeight;
|
||||
@Getter
|
||||
private int maxCharHeight;
|
||||
|
||||
private float path_x;
|
||||
private float path_y;
|
||||
|
||||
@Setter
|
||||
private int pageNumber;
|
||||
|
||||
|
||||
public PDFLinesTextStripper() throws IOException {
|
||||
|
||||
super();
|
||||
this.addOperator(new SetStrokingColorSpace());
|
||||
this.addOperator(new SetNonStrokingColorSpace());
|
||||
this.addOperator(new SetLineDashPattern());
|
||||
this.addOperator(new SetStrokingDeviceGrayColor());
|
||||
this.addOperator(new SetNonStrokingDeviceGrayColor());
|
||||
this.addOperator(new SetFlatness());
|
||||
this.addOperator(new SetLineJoinStyle());
|
||||
this.addOperator(new SetLineCapStyle());
|
||||
this.addOperator(new SetStrokingDeviceCMYKColor());
|
||||
this.addOperator(new SetNonStrokingDeviceCMYKColor());
|
||||
this.addOperator(new SetLineMiterLimit());
|
||||
this.addOperator(new SetStrokingDeviceRGBColor());
|
||||
this.addOperator(new SetNonStrokingDeviceRGBColor());
|
||||
this.addOperator(new SetRenderingIntent());
|
||||
this.addOperator(new SetStrokingColor());
|
||||
this.addOperator(new SetNonStrokingColor());
|
||||
this.addOperator(new SetStrokingColorN());
|
||||
this.addOperator(new SetNonStrokingColorN());
|
||||
this.addOperator(new SetFontAndSize());
|
||||
this.addOperator(new SetLineWidth());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void processOperator(Operator operator, List<COSBase> arguments) throws IOException {
|
||||
|
||||
String operation = operator.getName();
|
||||
|
||||
//move
|
||||
switch (operation) {
|
||||
case OperatorName.MOVE_TO:
|
||||
if (arguments.size() == 2) {
|
||||
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
|
||||
path_x = (float) pos.getX();
|
||||
path_y = (float) pos.getY();
|
||||
}
|
||||
break;
|
||||
|
||||
//line
|
||||
case OperatorName.LINE_TO:
|
||||
if (arguments.size() == 2) {
|
||||
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
|
||||
|
||||
// The direction of vertical lines must always be from bottom to top for the table extraction algorithm.
|
||||
if (pos.getY() > path_y) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y)));
|
||||
}
|
||||
|
||||
path_x = (float) pos.getX();
|
||||
path_y = (float) pos.getY();
|
||||
}
|
||||
break;
|
||||
|
||||
//rectangle
|
||||
case OperatorName.APPEND_RECT:
|
||||
|
||||
if (arguments.size() == 4) {
|
||||
float x = floatValue(arguments.get(0));
|
||||
float y = floatValue(arguments.get(1));
|
||||
float width = floatValue(arguments.get(2));
|
||||
float height = floatValue(arguments.get(3));
|
||||
|
||||
Point2D p1 = transformPosition(x, y);
|
||||
Point2D p2 = transformPosition(x + width, y + height);
|
||||
|
||||
// Horizontal lines
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
|
||||
|
||||
// Vertical lines, direction must always be from bottom to top for the table extraction algorithm.
|
||||
if (p2.getY() > p1.getY()) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
|
||||
}
|
||||
if (p2.getY() > p1.getY()) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY())));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
//fill
|
||||
case OperatorName.FILL_NON_ZERO:
|
||||
case OperatorName.LEGACY_FILL_NON_ZERO:
|
||||
case OperatorName.FILL_EVEN_ODD:
|
||||
addVisibleRulings(graphicsPath, false);
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
//stroke
|
||||
case OperatorName.STROKE_PATH:
|
||||
addVisibleRulings(graphicsPath, true);
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
//cancel path
|
||||
case OperatorName.ENDPATH:
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
super.processOperator(operator, arguments);
|
||||
}
|
||||
|
||||
|
||||
private float floatValue(COSBase value) {
|
||||
|
||||
if (value instanceof COSNumber) {
|
||||
return ((COSNumber) value).floatValue();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Point2D.Float transformPosition(float x, float y) {
|
||||
|
||||
return super.transformedPoint(x, y);
|
||||
}
|
||||
|
||||
|
||||
private void addVisibleRulings(List<Ruling> path, boolean stroke) throws IOException {
|
||||
|
||||
try {
|
||||
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor()
|
||||
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
|
||||
rulings.addAll(path);
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor()
|
||||
.getColorSpace()
|
||||
.getName() + " does not support toRGB");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
|
||||
int startIndex = 0;
|
||||
RedTextPosition previous = null;
|
||||
|
||||
textPositions.sort(Comparator.comparing(TextPosition::getXDirAdj));
|
||||
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
|
||||
if (!textPositionSequences.isEmpty()) {
|
||||
previous = textPositionSequences.get(textPositionSequences.size() - 1)
|
||||
.getTextPositions()
|
||||
.get(textPositionSequences.get(textPositionSequences.size() - 1).getTextPositions().size() - 1);
|
||||
}
|
||||
|
||||
int charWidth = (int) textPositions.get(i).getWidthDirAdj();
|
||||
if (charWidth < minCharWidth) {
|
||||
minCharWidth = charWidth;
|
||||
}
|
||||
if (charWidth > maxCharWidth) {
|
||||
maxCharWidth = charWidth;
|
||||
}
|
||||
|
||||
int charHeight = (int) textPositions.get(i).getHeightDir();
|
||||
if (charHeight < minCharHeight) {
|
||||
minCharHeight = charHeight;
|
||||
}
|
||||
if (charWidth > maxCharHeight) {
|
||||
maxCharHeight = charHeight;
|
||||
}
|
||||
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\t")) && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
|
||||
// Remove false sequence ends (whitespaces)
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition textPosition : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(textPosition);
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
}
|
||||
startIndex = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition t : sublist) {
|
||||
textPositionSequences.get(textPositionSequences.size() - 1).add(t);
|
||||
}
|
||||
} else {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
}
|
||||
super.writeString(text);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
|
||||
minCharWidth = Integer.MAX_VALUE;
|
||||
maxCharWidth = 0;
|
||||
minCharHeight = Integer.MAX_VALUE;
|
||||
maxCharHeight = 0;
|
||||
textPositionSequences.clear();
|
||||
rulings.clear();
|
||||
graphicsPath.clear();
|
||||
path_x = 0.0f;
|
||||
path_y = 0.0f;
|
||||
|
||||
return super.getText(doc);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,25 +0,0 @@
|
||||
package com.iqser.red.service.redaction.v1.server.layoutparsing.classification.service;
|
||||
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.AbstractPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.ClassificationPage;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.Orientation;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.table.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.StringFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPageBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.model.text.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.utils.RulingTextDirAdjustUtil;
|
||||
|
||||
public interface BlockificationService {
|
||||
|
||||
ClassificationPage blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user