RED-41: Move and adapt redaction-service
This commit is contained in:
parent
5c03fb876f
commit
67077bb73e
12
CHANGELOG.md
Normal file
12
CHANGELOG.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Changelog
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixed
|
||||
|
||||
### Added
|
||||
|
||||
### Changed
|
||||
|
||||
### Removed
|
||||
37
bamboo-specs/pom.xml
Normal file
37
bamboo-specs/pom.xml
Normal file
@ -0,0 +1,37 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>7.0.4</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
</project>
|
||||
137
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
137
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
@ -0,0 +1,137 @@
|
||||
package buildjob;
|
||||
|
||||
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Job;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
|
||||
import com.atlassian.bamboo.specs.api.builders.project.Project;
|
||||
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
|
||||
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
||||
|
||||
/**
|
||||
* Plan configuration for Bamboo.
|
||||
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
|
||||
*/
|
||||
@BambooSpec
|
||||
public class PlanSpec {
|
||||
|
||||
private static final String SERVICE_NAME = "redaction-service";
|
||||
|
||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", "");
|
||||
|
||||
/**
|
||||
* Run main to publish plan on Bamboo
|
||||
*/
|
||||
public static void main(final String[] args) throws Exception {
|
||||
//By default credentials are read from the '.credentials' file.
|
||||
BambooServer bambooServer = new BambooServer("http://localhost:8085");
|
||||
|
||||
Plan plan = new PlanSpec().createPlan();
|
||||
bambooServer.publish(plan);
|
||||
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
|
||||
bambooServer.publish(planPermission);
|
||||
}
|
||||
|
||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
||||
Permissions permission = new Permissions()
|
||||
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("gin4", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.loggedInUserPermissions(PermissionType.VIEW)
|
||||
.anonymousUserPermissionView();
|
||||
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
|
||||
}
|
||||
|
||||
private Project project() {
|
||||
return new Project()
|
||||
.name("RED")
|
||||
.key(new BambooKey("RED"));
|
||||
}
|
||||
|
||||
public Plan createPlan() {
|
||||
return new Plan(
|
||||
project(),
|
||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
||||
.description("Plan created from (enter repository url of your plan)")
|
||||
.stages(new Stage("Default Stage")
|
||||
.jobs(new Job("Default Job",
|
||||
new BambooKey("JOB1"))
|
||||
.tasks(
|
||||
new ScriptTask()
|
||||
.description("Clean")
|
||||
.inlineBody("#!/bin/bash\n" +
|
||||
"set -e\n" +
|
||||
"rm -rf ./*"),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout Default Repository")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Clean")
|
||||
.inlineBody("#!/bin/bash\n" +
|
||||
"set -e\n" +
|
||||
"rm -rf ./*"),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout Default Repository")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build")
|
||||
.inlineBody("#!/bin/bash\n" +
|
||||
"set -e\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean install -Djava.security.egd=file:/dev/./urandom; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean deploy -e -DdeployAtEnd=true -Dmaven.wagon.http.ssl.insecure=true -Dmaven.wagon.http.ssl.allowall=true -Dmaven.wagon.http.ssl.ignore.validity.dates=true -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases; fi\n" +
|
||||
|
||||
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml package\n" +
|
||||
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml docker:push\n" +
|
||||
|
||||
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then echo \"gitTag=${bamboo.planRepository.1.branch}_${bamboo.buildNumber}\" > git.tag; fi\n" +
|
||||
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then echo \"gitTag=${bamboo.version_tag}\" > git.tag; fi\n"),
|
||||
createJUnitParserTask()
|
||||
.description("Resultparser")
|
||||
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
|
||||
.enabled(true),
|
||||
new InjectVariablesTask()
|
||||
.description("Inject git Tag")
|
||||
.path("git.tag")
|
||||
.namespace("g")
|
||||
.scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask()
|
||||
.description("${bamboo.g.gitTag}")
|
||||
.tagName("${bamboo.g.gitTag}")
|
||||
.defaultRepository())
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock")
|
||||
)
|
||||
)
|
||||
)
|
||||
.linkedRepositories("RED / " + SERVICE_NAME)
|
||||
|
||||
.triggers(new BitbucketServerTrigger())
|
||||
.planBranchManagement(new PlanBranchManagement()
|
||||
.createForVcsBranch()
|
||||
.delete(new BranchCleanup()
|
||||
.whenInactiveInRepositoryAfterDays(14))
|
||||
.notificationForCommitters());
|
||||
}
|
||||
}
|
||||
17
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
17
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
@ -0,0 +1,17 @@
|
||||
package buildjob;
|
||||
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
||||
|
||||
public class PlanSpecTest {
|
||||
@Test
|
||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
||||
Plan plan = new PlanSpec().createPlan();
|
||||
|
||||
EntityPropertiesBuilders.build(plan);
|
||||
}
|
||||
}
|
||||
21
pom.xml
Normal file
21
pom.xml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>bamboo-specs</module>
|
||||
<module>redaction-service-v1</module>
|
||||
<module>redaction-service-image-v1</module>
|
||||
</modules>
|
||||
|
||||
</project>
|
||||
97
redaction-service-image-v1/pom.xml
Normal file
97
redaction-service-image-v1/pom.xml
Normal file
@ -0,0 +1,97 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-docker-dependency</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service-image-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
|
||||
<properties>
|
||||
<service.server>redaction-service-server-v1</service.server>
|
||||
<platform.jar>${service.server}.jar</platform.jar>
|
||||
<docker.skip.push>false</docker.skip.push>
|
||||
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>download-platform-jar</id>
|
||||
<phase>prepare-package</phase>
|
||||
<goals>
|
||||
<goal>copy</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<artifactItems>
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>${service.server}</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>jar</type>
|
||||
<overWrite>true</overWrite>
|
||||
<destFileName>${platform.jar}</destFileName>
|
||||
</dependency>
|
||||
</artifactItems>
|
||||
<outputDirectory>${docker.build.directory}</outputDirectory>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>io.fabric8</groupId>
|
||||
<artifactId>docker-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<images>
|
||||
<image>
|
||||
<name>${docker.image.name}</name>
|
||||
<build>
|
||||
<dockerFileDir>${docker.build.directory}</dockerFileDir>
|
||||
<args>
|
||||
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
|
||||
</args>
|
||||
<tags>
|
||||
<tag>${docker.image.version}</tag>
|
||||
<tag>latest</tag>
|
||||
</tags>
|
||||
</build>
|
||||
</image>
|
||||
</images>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
</project>
|
||||
9
redaction-service-image-v1/src/main/docker/Dockerfile
Normal file
9
redaction-service-image-v1/src/main/docker/Dockerfile
Normal file
@ -0,0 +1,9 @@
|
||||
FROM gin5/platform-base:5.2.0
|
||||
|
||||
ARG PLATFORM_JAR
|
||||
|
||||
ENV PLATFORM_JAR ${PLATFORM_JAR}
|
||||
|
||||
ENV USES_ELASTICSEARCH false
|
||||
|
||||
COPY ["${PLATFORM_JAR}", "/"]
|
||||
55
redaction-service-v1/pom.xml
Normal file
55
redaction-service-v1/pom.xml
Normal file
@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>platform-dependency</artifactId>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<version>1.0.1</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>redaction-service-api-v1</module>
|
||||
<module>redaction-service-server-v1</module>
|
||||
</modules>
|
||||
|
||||
<properties>
|
||||
<pdfbox.version>2.0.16</pdfbox.version>
|
||||
</properties>
|
||||
|
||||
|
||||
<dependencyManagement>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-commons-dependency</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<scope>import</scope>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</dependencyManagement>
|
||||
|
||||
</project>
|
||||
21
redaction-service-v1/redaction-service-api-v1/pom.xml
Normal file
21
redaction-service-v1/redaction-service-api-v1/pom.xml
Normal file
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-web</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@ -0,0 +1,16 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RedactionRequest {
|
||||
|
||||
private byte[] document;
|
||||
private boolean flatRedaction;
|
||||
}
|
||||
@ -0,0 +1,16 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RedactionResult {
|
||||
|
||||
private byte[] document;
|
||||
private int numberOfPages;
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
package com.iqser.red.service.redaction.v1.resources;
|
||||
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
|
||||
public interface RedactionResource {
|
||||
|
||||
String SERVICE_NAME = "redaction-service-v1";
|
||||
|
||||
@PostMapping(value = "/redact", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult redact(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@PostMapping(value = "/debug/classifications", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult classify(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@PostMapping(value = "/debug/sections", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult sections(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
|
||||
|
||||
@PostMapping(value = "/rules", produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
String getRules();
|
||||
|
||||
@PostMapping(value = "/rules/update", consumes = MediaType.APPLICATION_JSON_VALUE)
|
||||
void updateRules(@RequestBody String rules);
|
||||
}
|
||||
159
redaction-service-v1/redaction-service-server-v1/pom.xml
Normal file
159
redaction-service-v1/redaction-service-server-v1/pom.xml
Normal file
@ -0,0 +1,159 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<artifactId>redaction-service-v1</artifactId>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>redaction-service-server-v1</artifactId>
|
||||
|
||||
<properties>
|
||||
<pdfbox.version>2.0.20</pdfbox.version>
|
||||
</properties>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.drools</groupId>
|
||||
<artifactId>drools-core</artifactId>
|
||||
<version>7.37.0.Final</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.kie</groupId>
|
||||
<artifactId>kie-spring</artifactId>
|
||||
<version>7.37.0.Final</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.locationtech.jts</groupId>
|
||||
<artifactId>jts-core</artifactId>
|
||||
<version>1.16.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>redaction-service-api-v1</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<!-- commons -->
|
||||
<dependency>
|
||||
<groupId>com.iqser.gin4.commons</groupId>
|
||||
<artifactId>spring-commons</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.gin4.commons</groupId>
|
||||
<artifactId>logging-commons</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.gin4.commons</groupId>
|
||||
<artifactId>metric-commons</artifactId>
|
||||
</dependency>
|
||||
<!-- other external -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-lang3</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
</dependency>
|
||||
<!-- spring -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
</dependency>
|
||||
<!-- ribbon must be included because transitive a dependency on it.-->
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-netflix-ribbon</artifactId>
|
||||
</dependency>
|
||||
<!-- test dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.iqser.gin4.commons</groupId>
|
||||
<artifactId>test-commons</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<!-- generate git.properties for exposure in /info -->
|
||||
<groupId>pl.project13.maven</groupId>
|
||||
<artifactId>git-commit-id-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>revision</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<generateGitPropertiesFile>true</generateGitPropertiesFile>
|
||||
<gitDescribe>
|
||||
<tags>true</tags>
|
||||
</gitDescribe>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>original-jar</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<classifier>original</classifier>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<!-- repackages the generated jar into a runnable fat-jar and makes it
|
||||
executable -->
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>repackage</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<executable>true</executable>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@ -0,0 +1,44 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.internal.io.ResourceFactory;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import com.iqser.gin4.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
@Import({DefaultWebMvcConfiguration.class})
|
||||
@EnableConfigurationProperties(RedactionServiceSettings.class)
|
||||
@SpringBootApplication(exclude = { SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class })
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
private static final String drlFile = "drools/rules.drl";
|
||||
|
||||
@Bean
|
||||
public KieContainer kieContainer() {
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
kieFileSystem.write(ResourceFactory.newClassPathResource(drlFile));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
|
||||
return kieServices.newKieContainer(kieModule.getReleaseId());
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class Document {
|
||||
|
||||
private List<Page> pages = new ArrayList<>();
|
||||
private List<Paragraph> paragraphs = new ArrayList<>();
|
||||
private Map<Integer, Set<Entity>> entities = new HashMap<>();
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter= new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter= new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
private boolean headlines;
|
||||
}
|
||||
@ -0,0 +1,75 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
public class FloatFrequencyCounter
|
||||
{
|
||||
|
||||
@Getter
|
||||
Map<Float, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
public void add(float value){
|
||||
if(!countPerValue.containsKey(value)){
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAll(Map<Float, Integer> otherCounter){
|
||||
for(Map.Entry<Float, Integer> entry: otherCounter.entrySet()){
|
||||
if(countPerValue.containsKey(entry.getKey())){
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Float getMostPopular(){
|
||||
Map.Entry<Float, Integer> mostPopular = null;
|
||||
for(Map.Entry<Float, Integer> entry: countPerValue.entrySet()){
|
||||
if(mostPopular == null){
|
||||
mostPopular = entry;
|
||||
} else if(entry.getValue() >= mostPopular.getValue()){
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public List<Float> getHighterThanMostPopular(){
|
||||
Float mostPopular = getMostPopular();
|
||||
List<Float> higher = new ArrayList<>();
|
||||
for(Float value: countPerValue.keySet()){
|
||||
if(value > mostPopular){
|
||||
higher.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
return higher.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Float getHighest(){
|
||||
Float highest = null;
|
||||
for(Float value: countPerValue.keySet()){
|
||||
if (highest == null){
|
||||
highest = value;
|
||||
} else if(value > highest){
|
||||
highest = value;
|
||||
}
|
||||
}
|
||||
return highest;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class Page {
|
||||
|
||||
@NonNull
|
||||
private List<AbstractTextContainer> textBlocks;
|
||||
|
||||
private Rectangle bodyTextFrame;
|
||||
|
||||
private boolean landscape;
|
||||
private int rotation;
|
||||
|
||||
private int pageNumber;
|
||||
|
||||
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
|
||||
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
|
||||
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
|
||||
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
|
||||
|
||||
public boolean isRotated() {
|
||||
return rotation != 0;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,41 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class Paragraph {
|
||||
|
||||
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
|
||||
|
||||
public SearchableText getSearchableText(){
|
||||
SearchableText searchableText = new SearchableText();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof TextBlock){
|
||||
searchableText.addAll(((TextBlock) block).getSequences());
|
||||
}
|
||||
});
|
||||
return searchableText;
|
||||
}
|
||||
|
||||
|
||||
public List<Table> getTables(){
|
||||
List<Table> tables = new ArrayList<>();
|
||||
pageBlocks.forEach(block -> {
|
||||
if(block instanceof Table){
|
||||
tables.add((Table) block);
|
||||
}
|
||||
});
|
||||
return tables;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,47 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class StringFrequencyCounter {
|
||||
|
||||
@Getter
|
||||
Map<String, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
public void add(String value){
|
||||
if(!countPerValue.containsKey(value)){
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
countPerValue.put(value, countPerValue.get(value) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAll(Map<String, Integer> otherCounter){
|
||||
for(Map.Entry<String, Integer> entry: otherCounter.entrySet()){
|
||||
if(countPerValue.containsKey(entry.getKey())){
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
|
||||
} else {
|
||||
countPerValue.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getMostPopular(){
|
||||
Map.Entry<String, Integer> mostPopular = null;
|
||||
for(Map.Entry<String, Integer> entry: countPerValue.entrySet()){
|
||||
if(mostPopular == null){
|
||||
mostPopular = entry;
|
||||
} else if(entry.getValue() > mostPopular.getValue()){
|
||||
mostPopular = entry;
|
||||
}
|
||||
}
|
||||
return mostPopular != null ? mostPopular.getKey() : null;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,148 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
@Data
|
||||
public class TextBlock extends AbstractTextContainer {
|
||||
|
||||
@Builder.Default
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
private int rotation;
|
||||
|
||||
private String mostPopularWordFont;
|
||||
private String mostPopularWordStyle;
|
||||
private float mostPopularWordFontSize;
|
||||
private float mostPopularWordHeight;
|
||||
private float mostPopularWordSpaceWidth;
|
||||
|
||||
private float highestFontSize;
|
||||
|
||||
private String classification;
|
||||
|
||||
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
|
||||
this.minX = minX;
|
||||
this.maxX = maxX;
|
||||
this.minY = minY;
|
||||
this.maxY = maxY;
|
||||
this.sequences = sequences;
|
||||
this.rotation = rotation;
|
||||
}
|
||||
|
||||
public TextBlock union(TextPositionSequence r) {
|
||||
TextBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
public TextBlock union(TextBlock r) {
|
||||
TextBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
public void add(TextBlock r) {
|
||||
if (r.getMinX() < minX) {
|
||||
minX = r.getMinX();
|
||||
}
|
||||
if (r.getMaxX() > maxX) {
|
||||
maxX = r.getMaxX();
|
||||
}
|
||||
if (r.getMinY() < minY) {
|
||||
minY = r.getMinY();
|
||||
}
|
||||
if (r.getMaxY() > maxY) {
|
||||
maxY = r.getMaxY();
|
||||
}
|
||||
sequences.addAll(r.getSequences());
|
||||
}
|
||||
|
||||
public void add(TextPositionSequence r) {
|
||||
if (r.getX1() < minX) {
|
||||
minX = r.getX1();
|
||||
}
|
||||
if (r.getX2() > maxX) {
|
||||
maxX = r.getX2();
|
||||
}
|
||||
if (r.getY1() < minY) {
|
||||
minY = r.getY1();
|
||||
}
|
||||
if (r.getY2() > maxY) {
|
||||
maxY = r.getY2();
|
||||
}
|
||||
}
|
||||
|
||||
public TextBlock copy() {
|
||||
return new TextBlock(minX, maxX, minY, maxY, sequences, rotation);
|
||||
}
|
||||
|
||||
public void resize(float x1, float y1, float width, float height) {
|
||||
set(x1, y1, x1 + width, y1 + height);
|
||||
}
|
||||
|
||||
public void set(float x1, float y1, float x2, float y2) {
|
||||
this.minX = Math.min(x1, x2);
|
||||
this.maxX = Math.max(x1, x2);
|
||||
this.minY = Math.min(y1, y2);
|
||||
this.maxY = Math.max(y1, y2);
|
||||
}
|
||||
|
||||
public float getHeight() {
|
||||
return maxY - minY;
|
||||
}
|
||||
|
||||
public float getWidth() {
|
||||
return maxX - minX;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < sequences.size(); i++) {
|
||||
String sequenceAsString = sequences.get(i).toString();
|
||||
// Fix for missing Whitespace. This is recognized in getSequences method. See PDFTextStripper Line 1730.
|
||||
if (i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) != ' ' && sequenceAsString.charAt(0) != ' ') {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(sequenceAsString);
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,215 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class BlockificationService {
|
||||
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
List<TextPositionSequence> chunkWords = new ArrayList<>();
|
||||
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
|
||||
|
||||
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
|
||||
TextPositionSequence prev = null;
|
||||
|
||||
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
|
||||
if (prev != null &&
|
||||
(lineSeparation
|
||||
|| startFromTop
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines)
|
||||
|| word.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||
|| word.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines)
|
||||
)) {
|
||||
|
||||
TextBlock cb1 = buildTextBlock(chunkWords);
|
||||
chunkBlockList1.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
minX = 1000;
|
||||
maxX = 0;
|
||||
minY = 1000;
|
||||
maxY = 0;
|
||||
prev = null;
|
||||
}
|
||||
|
||||
chunkWords.add(word);
|
||||
|
||||
prev = word;
|
||||
if (word.getX1() < minX) {
|
||||
minX = word.getX1();
|
||||
}
|
||||
if (word.getX2() > maxX) {
|
||||
maxX = word.getX2();
|
||||
}
|
||||
if (word.getY1() < minY) {
|
||||
minY = word.getY1();
|
||||
}
|
||||
if (word.getY2() > maxY) {
|
||||
maxY = word.getY2();
|
||||
}
|
||||
}
|
||||
|
||||
TextBlock cb1 = buildTextBlock(chunkWords);
|
||||
if (cb1 != null) {
|
||||
chunkBlockList1.add(cb1);
|
||||
}
|
||||
|
||||
return new Page(chunkBlockList1);
|
||||
}
|
||||
|
||||
|
||||
private TextBlock buildTextBlock(List<TextPositionSequence> wordBlockList) {
|
||||
|
||||
TextBlock textBlock = null;
|
||||
|
||||
FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
|
||||
FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
|
||||
FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
|
||||
StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
|
||||
StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
|
||||
|
||||
for (TextPositionSequence wordBlock : wordBlockList) {
|
||||
|
||||
lineHeightFrequencyCounter.add(wordBlock.getTextHeight());
|
||||
fontSizeFrequencyCounter.add(wordBlock.getFontSize());
|
||||
spaceFrequencyCounter.add(wordBlock.getSpaceWidth());
|
||||
fontFrequencyCounter.add(wordBlock.getFont());
|
||||
styleFrequencyCounter.add(wordBlock.getFontStyle());
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
|
||||
spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
}
|
||||
}
|
||||
|
||||
if (textBlock != null) {
|
||||
textBlock.setMostPopularWordFont(fontFrequencyCounter.getMostPopular());
|
||||
textBlock.setMostPopularWordStyle(styleFrequencyCounter.getMostPopular());
|
||||
textBlock.setMostPopularWordFontSize(fontSizeFrequencyCounter.getMostPopular());
|
||||
textBlock.setMostPopularWordHeight(lineHeightFrequencyCounter.getMostPopular());
|
||||
textBlock.setMostPopularWordSpaceWidth(spaceFrequencyCounter.getMostPopular());
|
||||
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
|
||||
}
|
||||
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
float minY = 10000;
|
||||
float maxY = -100;
|
||||
|
||||
for (Page page : pages) {
|
||||
|
||||
if (page.getTextBlocks().isEmpty() || landscape != page.isLandscape()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (AbstractTextContainer container : page.getTextBlocks()) {
|
||||
|
||||
|
||||
if (container instanceof TextBlock) {
|
||||
TextBlock textBlock = (TextBlock) container;
|
||||
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
float approxLineCount = PositionUtils.getApproxLineCount(textBlock);
|
||||
if (approxLineCount < 2.9f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (documentFontSizeCounter.getMostPopular() != null) {
|
||||
if (textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
|
||||
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
if (textBlock.getMaxX() > maxX) {
|
||||
maxX = textBlock.getMaxX();
|
||||
}
|
||||
if (textBlock.getMinY() < minY) {
|
||||
minY = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getMaxY() > maxY) {
|
||||
maxY = textBlock.getMaxY();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (container instanceof Table) {
|
||||
Table table = (Table) container;
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell column : row) {
|
||||
|
||||
if (column == null || column.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
if (textBlock.getMinX() < minX) {
|
||||
minX = textBlock.getMinX();
|
||||
}
|
||||
if (textBlock.getMaxX() > maxX) {
|
||||
maxX = textBlock.getMaxX();
|
||||
}
|
||||
if (textBlock.getMinY() < minY) {
|
||||
minY = textBlock.getMinY();
|
||||
}
|
||||
if (textBlock.getMaxY() > maxY) {
|
||||
maxY = textBlock.getMaxY();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,91 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ClassificationService {
|
||||
|
||||
private final BlockificationService blockificationService;
|
||||
|
||||
|
||||
public void classifyDocument(Document document) {
|
||||
|
||||
Rectangle bodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), false);
|
||||
Rectangle landscapeBodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), true);
|
||||
|
||||
List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
|
||||
|
||||
System.out.println(document.getFontSizeCounter().getCountPerValue());
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
Rectangle btf = page.isLandscape() ? landscapeBodyTextFrame : bodyTextFrame;
|
||||
page.setBodyTextFrame(btf);
|
||||
classifyPage(btf, page, document, headlineFontSizes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void classifyPage(Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
for (AbstractTextContainer textBlock : page.getTextBlocks()) {
|
||||
if (textBlock instanceof TextBlock) {
|
||||
classifyBlock((TextBlock) textBlock, bodyTextFrame, page, document, headlineFontSizes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
if (document.getFontSizeCounter().getMostPopular() == null) {
|
||||
// TODO Figure out why this happens.
|
||||
return;
|
||||
}
|
||||
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.isRotated()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Header");
|
||||
|
||||
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Footer");
|
||||
} else if (page.getPageNumber() == 1
|
||||
&& (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame, textBlock)
|
||||
&& PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks().size() == 1)) {
|
||||
if (!Pattern.matches("[0-9]+", textBlock.toString())) {
|
||||
textBlock.setClassification("Title");
|
||||
}
|
||||
}
|
||||
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && textBlock.getMostPopularWordStyle().equals("bold")) {
|
||||
|
||||
for (int i = 1; i <= headlineFontSizes.size(); i++) {
|
||||
if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
|
||||
textBlock.setClassification("H " + i);
|
||||
document.setHeadlines(true);
|
||||
}
|
||||
}
|
||||
}else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
|
||||
textBlock.setClassification("H " + (headlineFontSizes.size() + 1));
|
||||
document.setHeadlines(true);
|
||||
}
|
||||
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
|
||||
textBlock.setClassification("TextBlock Bold");
|
||||
}
|
||||
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont().equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle().equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
|
||||
textBlock.setClassification("TextBlock");
|
||||
}
|
||||
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter().getMostPopular().equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
|
||||
textBlock.setClassification("TextBlock Italic");
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getSequences().size() > 3){
|
||||
textBlock.setClassification("TextBlock Unknown");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,95 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
@SuppressWarnings("all")
|
||||
public class PositionUtils {
|
||||
|
||||
|
||||
public boolean isWithinBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
double threshold = textBlock.getMostPopularWordHeight() * 3;
|
||||
|
||||
if (textBlock.getMinX() + threshold > btf.getX() &&
|
||||
textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() &&
|
||||
textBlock.getMinY() + threshold > btf.getY() &&
|
||||
textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean isOverBodyTextFrame(Rectangle btf, TextBlock textBlock, boolean rotated) {
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rotated && textBlock.getMinX() < btf.getX()) {
|
||||
// Its very strange, P{0,0} is on top left in this case, instead of lower left.
|
||||
return true;
|
||||
} else if (!rotated && textBlock.getMinY() > btf.getY() + btf.getHeight()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean isUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (textBlock.getMaxY() < btf.getY()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public boolean isTouchingUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
|
||||
if (btf == null || textBlock == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (textBlock.getMinY() < btf.getY()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public float getHeightDifferenceBetweenChunkWordAndDocumentWord(TextBlock textBlock, Float documentMostPopularWordHeight) {
|
||||
return textBlock.getMostPopularWordHeight() - documentMostPopularWordHeight;
|
||||
}
|
||||
|
||||
|
||||
public Float getApproxLineCount(TextBlock textBlock) {
|
||||
return textBlock.getHeight() / textBlock.getMostPopularWordHeight();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,137 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
|
||||
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.AnnotationHighlightService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfFlattenService;
|
||||
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@RestController
|
||||
@RequiredArgsConstructor
|
||||
public class RedactionController implements RedactionResource {
|
||||
|
||||
private final PdfVisualisationService pdfVisualisationService;
|
||||
private final PdfSegmentationService pdfSegmentationService;
|
||||
private final AnnotationHighlightService annotationHighlightService;
|
||||
private final EntityRedactionService entityRedactionService;
|
||||
private final PdfFlattenService pdfFlattenService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
entityRedactionService.processDocument(classifiedDoc);
|
||||
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction());
|
||||
|
||||
if (redactionRequest.isFlatRedaction()) {
|
||||
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
|
||||
return convert(flatDocument, classifiedDoc.getPages().size());
|
||||
}
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages().size());
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfSegmentationRequest.getDocument()))) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument);
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages().size());
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
pdfVisualisationService.visualizeParagraphs(classifiedDoc, pdDocument);
|
||||
|
||||
return convert(pdDocument, classifiedDoc.getPages().size());
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
|
||||
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Page page : classifiedDoc.getPages()) {
|
||||
for (AbstractTextContainer textContainer : page.getTextBlocks()) {
|
||||
if (textContainer instanceof Table) {
|
||||
Table table = (Table) textContainer;
|
||||
sb.append(table.getTextAsHtml()).append("<br />").append("<br />");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return RedactionResult.builder().document(sb.toString().getBytes()).build();
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new RedactionException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getRules() {
|
||||
return droolsExecutionService.getRules();
|
||||
}
|
||||
|
||||
public void updateRules(@RequestBody String rules) {
|
||||
droolsExecutionService.updateRules(rules);
|
||||
}
|
||||
|
||||
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
|
||||
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
|
||||
document.save(byteArrayOutputStream);
|
||||
return RedactionResult.builder()
|
||||
.document(byteArrayOutputStream.toByteArray())
|
||||
.numberOfPages(numberOfPages)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
package com.iqser.red.service.redaction.v1.server.exception;
|
||||
|
||||
public class RedactionException extends RuntimeException {
|
||||
|
||||
public RedactionException(Throwable cause) {
|
||||
super("Could not parse document", cause);
|
||||
}
|
||||
|
||||
public RedactionException() {
|
||||
super("Could not parse document");
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,256 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
|
||||
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
|
||||
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
|
||||
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSNumber;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
|
||||
@Getter
|
||||
private float minCharWidth = Float.MAX_VALUE;
|
||||
|
||||
@Getter
|
||||
private float minCharHeight = Float.MAX_VALUE;
|
||||
|
||||
@Getter
|
||||
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
|
||||
|
||||
@Getter
|
||||
private final List<Ruling> rulings = new ArrayList<>();
|
||||
|
||||
private final List<Ruling> graphicsPath = new ArrayList<>();
|
||||
|
||||
private float path_x;
|
||||
private float path_y;
|
||||
|
||||
@Setter
|
||||
private int pageNumber;
|
||||
|
||||
public PDFLinesTextStripper() throws IOException {
|
||||
super();
|
||||
this.addOperator(new SetStrokingColorSpace());
|
||||
this.addOperator(new SetNonStrokingColorSpace());
|
||||
this.addOperator(new SetLineDashPattern());
|
||||
this.addOperator(new SetStrokingDeviceGrayColor());
|
||||
this.addOperator(new SetNonStrokingDeviceGrayColor());
|
||||
this.addOperator(new SetFlatness());
|
||||
this.addOperator(new SetLineJoinStyle());
|
||||
this.addOperator(new SetLineCapStyle());
|
||||
this.addOperator(new SetStrokingDeviceCMYKColor());
|
||||
this.addOperator(new SetNonStrokingDeviceCMYKColor());
|
||||
this.addOperator(new SetLineMiterLimit());
|
||||
this.addOperator(new SetStrokingDeviceRGBColor());
|
||||
this.addOperator(new SetNonStrokingDeviceRGBColor());
|
||||
this.addOperator(new SetRenderingIntent());
|
||||
this.addOperator(new SetStrokingColor());
|
||||
this.addOperator(new SetNonStrokingColor());
|
||||
this.addOperator(new SetStrokingColorN());
|
||||
this.addOperator(new SetNonStrokingColorN());
|
||||
this.addOperator(new SetFontAndSize());
|
||||
this.addOperator(new SetLineWidth());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processOperator(Operator operator, List<COSBase> arguments)
|
||||
throws IOException {
|
||||
|
||||
String operation = operator.getName();
|
||||
|
||||
//move
|
||||
switch (operation) {
|
||||
case OperatorName.MOVE_TO:
|
||||
if (arguments.size() == 2) {
|
||||
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
|
||||
path_x = (float) pos.getX();
|
||||
path_y = (float) pos.getY();
|
||||
}
|
||||
break;
|
||||
|
||||
//line
|
||||
case OperatorName.LINE_TO:
|
||||
if (arguments.size() == 2) {
|
||||
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
|
||||
|
||||
// The direction of vertical lines must always be from bottom to top for the table extraction algorithm.
|
||||
if (pos.getY() > path_y) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y)));
|
||||
}
|
||||
|
||||
path_x = (float) pos.getX();
|
||||
path_y = (float) pos.getY();
|
||||
}
|
||||
break;
|
||||
|
||||
//rectangle
|
||||
case OperatorName.APPEND_RECT:
|
||||
|
||||
if (arguments.size() == 4) {
|
||||
float x = floatValue(arguments.get(0));
|
||||
float y = floatValue(arguments.get(1));
|
||||
float width = floatValue(arguments.get(2));
|
||||
float height = floatValue(arguments.get(3));
|
||||
|
||||
Point2D p1 = transformPosition(x, y);
|
||||
Point2D p2 = transformPosition(x + width, y + height);
|
||||
|
||||
// Horizontal lines
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
|
||||
|
||||
// Vertical lines, direction must always be from bottom to top for the table extraction algorithm.
|
||||
if (p2.getY() > p1.getY()) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
|
||||
}
|
||||
if (p2.getY() > p1.getY()) {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY())));
|
||||
} else {
|
||||
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY())));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
//fill
|
||||
case OperatorName.FILL_NON_ZERO:
|
||||
case OperatorName.LEGACY_FILL_NON_ZERO:
|
||||
case OperatorName.FILL_EVEN_ODD:
|
||||
addVisibleRulings(graphicsPath, false);
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
//stroke
|
||||
case OperatorName.STROKE_PATH:
|
||||
addVisibleRulings(graphicsPath, true);
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
//cancel path
|
||||
case OperatorName.ENDPATH:
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
}
|
||||
|
||||
super.processOperator(operator, arguments);
|
||||
}
|
||||
|
||||
private float floatValue(COSBase value) {
|
||||
if (value instanceof COSNumber) {
|
||||
return ((COSNumber) value).floatValue();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private Point2D.Float transformPosition(float x, float y) {
|
||||
return super.transformedPoint(x, y);
|
||||
}
|
||||
|
||||
private void addVisibleRulings(List<Ruling> path, boolean stroke) throws IOException {
|
||||
|
||||
try {
|
||||
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor().toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
|
||||
rulings.addAll(path);
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.error("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor().getColorSpace().getName() + " does not support toRGB");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
|
||||
int startIndex = 0;
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
minCharWidth = Math.min(minCharWidth, textPositions.get(i).getWidthDirAdj());
|
||||
minCharHeight = Math.min(minCharHeight, textPositions.get(i).getHeightDir());
|
||||
|
||||
if (i == 0 && textPositions.get(i).getUnicode().equals(" ")) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && textPositions.get(i).getUnicode().equals(" ") && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && sublist.get(sublist.size() - 1).getUnicode().equals(" ")) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
super.writeString(text);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(PDDocument doc) throws IOException {
|
||||
|
||||
minCharWidth = Float.MAX_VALUE;
|
||||
minCharHeight = Float.MAX_VALUE;
|
||||
textPositionSequences.clear();
|
||||
rulings.clear();
|
||||
graphicsPath.clear();
|
||||
path_x = 0.0f;
|
||||
path_y = 0.0f;
|
||||
|
||||
return super.getText(doc);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class ParsedElements {
|
||||
|
||||
private List<TextPositionSequence> sequences;
|
||||
private List<Ruling> rulings;
|
||||
|
||||
private boolean landscape;
|
||||
private boolean rotated;
|
||||
|
||||
private float minCharWidth;
|
||||
private float minCharHeight;
|
||||
}
|
||||
@ -0,0 +1,140 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class TextPositionSequence implements CharSequence {
|
||||
|
||||
private List<TextPosition> textPositions = new ArrayList<>();
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
private float[] annotationColor;
|
||||
|
||||
private final int page;
|
||||
|
||||
public TextPositionSequence(List<TextPosition> textPositions, int page){
|
||||
this.textPositions = textPositions;
|
||||
this.page = page;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return textPositions.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
TextPosition textPosition = textPositionAt(index);
|
||||
String text = textPosition.getUnicode();
|
||||
return text.charAt(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextPositionSequence subSequence(int start, int end) {
|
||||
return new TextPositionSequence(textPositions.subList(start, end), page);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder(length());
|
||||
for (int i = 0; i < length(); i++) {
|
||||
builder.append(charAt(i));
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public TextPosition textPositionAt(int index) {
|
||||
return textPositions.get(index);
|
||||
}
|
||||
|
||||
public void add(TextPosition textPosition) {
|
||||
this.textPositions.add(textPosition);
|
||||
}
|
||||
|
||||
public float getX1() {
|
||||
if (textPositions.get(0).getRotation() == 90) {
|
||||
return textPositions.get(0).getYDirAdj() - getTextHeight();
|
||||
} else {
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
public float getX2() {
|
||||
if (textPositions.get(0).getRotation() == 90) {
|
||||
return textPositions.get(0).getYDirAdj();
|
||||
} else {
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
|
||||
}
|
||||
}
|
||||
|
||||
public float getY1() {
|
||||
if (textPositions.get(0).getRotation() == 90) {
|
||||
return textPositions.get(0).getXDirAdj();
|
||||
} else {
|
||||
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
public float getY2() {
|
||||
if (textPositions.get(0).getRotation() == 90) {
|
||||
return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() -2 ;
|
||||
} else {
|
||||
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() + getTextHeight();
|
||||
}
|
||||
}
|
||||
|
||||
public float getTextHeight() {
|
||||
return textPositions.get(0).getHeightDir() + 2;
|
||||
}
|
||||
|
||||
public float getHeight() {
|
||||
return getY2() - getY1();
|
||||
}
|
||||
|
||||
public float getWidth() {
|
||||
return getX2() - getX1();
|
||||
}
|
||||
|
||||
public String getFont() {
|
||||
return textPositions.get(0).getFont().toString().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
|
||||
}
|
||||
|
||||
public String getFontStyle() {
|
||||
|
||||
String lowercaseFontName = textPositions.get(0).getFont().toString().toLowerCase();
|
||||
|
||||
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
|
||||
return "bold, italic";
|
||||
} else if (lowercaseFontName.contains("bold")) {
|
||||
return "bold";
|
||||
} else if (lowercaseFontName.contains("italic")) {
|
||||
return "italic";
|
||||
} else {
|
||||
return "standard";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public float getFontSize() {
|
||||
return textPositions.get(0).getFontSizeInPt();
|
||||
}
|
||||
|
||||
public float getSpaceWidth() {
|
||||
return textPositions.get(0).getWidthOfSpace();
|
||||
}
|
||||
|
||||
public int getRotation() {
|
||||
return textPositions.get(0).getRotation();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,34 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Entity {
|
||||
|
||||
private final String word;
|
||||
private final String type;
|
||||
private boolean redaction;
|
||||
private String redactionReason;
|
||||
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
|
||||
private Integer start;
|
||||
private Integer end;
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences) {
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
this.redaction = redaction;
|
||||
this.redactionReason = redactionReason;
|
||||
this.positionSequences = positionSequences;
|
||||
}
|
||||
|
||||
public Entity(String word, String type, Integer start, Integer end) {
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
public class EntityPositionSequence {
|
||||
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
private int pageNumber;
|
||||
private final UUID id;
|
||||
}
|
||||
@ -0,0 +1,162 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class SearchableText {
|
||||
|
||||
private List<TextPositionSequence> sequences = new ArrayList<>();
|
||||
|
||||
public void add(TextPositionSequence textPositionSequence) {
|
||||
sequences.add(textPositionSequence);
|
||||
}
|
||||
|
||||
public void addAll(List<TextPositionSequence> textPositionSequences) {
|
||||
sequences.addAll(textPositionSequences);
|
||||
}
|
||||
|
||||
|
||||
public List<EntityPositionSequence> getSequences(String searchString) {
|
||||
|
||||
char[] searchChars = searchString.replaceAll("\\n", " ").toCharArray();
|
||||
int counter = 0;
|
||||
|
||||
|
||||
List<TextPositionSequence> crossSequenceParts = new ArrayList<>();
|
||||
List<EntityPositionSequence> finalMatches = new ArrayList<>();
|
||||
for (int i = 0; i < sequences.size(); i++) {
|
||||
TextPositionSequence partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
for (int j = 0; j < sequences.get(i).length(); j++) {
|
||||
|
||||
if(i > 0 && j == 0 && sequences.get(i).charAt(0) == ' ' && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) == ' '
|
||||
|| j > 0 && sequences.get(i).charAt(j) == ' ' && sequences.get(i).charAt(j - 1) == ' '){
|
||||
if(j == sequences.get(i).length() -1 && counter != 0 && !partMatch.getTextPositions().isEmpty()){
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if(j == 0 && sequences.get(i).charAt(j) != ' ' && i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1) != ' ' && searchChars[counter] == ' '){
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (sequences.get(i).charAt(j) == searchChars[counter] || counter != 0 && sequences.get(i).charAt(j) == '-') {
|
||||
|
||||
if(counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i).charAt(j - 1)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1))
|
||||
|| j == 0 && i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1) != ' ' && sequences.get(i).charAt(j) != ' ') {
|
||||
partMatch.add(sequences.get(i).textPositionAt(j));
|
||||
if (!(j == sequences.get(i).length() -1 && sequences.get(i).charAt(j) == '-' && searchChars[counter] != '-')) {
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (counter == searchString.length()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
|
||||
if(i == sequences.size() - 1 && j == sequences.get(i).length() -1
|
||||
|| j != sequences.get(i).length() -1 && isSeparator(sequences.get(i).charAt(j +1))
|
||||
|| j == sequences.get(i).length() -1 && isSeparator(sequences.get(i + 1).charAt(0))
|
||||
|| j == sequences.get(i).length() -1 && sequences.get(i).charAt(j) != ' ' && sequences.get(i + 1).charAt(0) != ' ') {
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts));
|
||||
}
|
||||
|
||||
counter = 0;
|
||||
crossSequenceParts = new ArrayList<>();
|
||||
partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
}
|
||||
} else {
|
||||
counter = 0;
|
||||
if(!crossSequenceParts.isEmpty()){
|
||||
j--;
|
||||
}
|
||||
crossSequenceParts = new ArrayList<>();
|
||||
partMatch = new TextPositionSequence(sequences.get(i).getPage());
|
||||
}
|
||||
|
||||
if(j == sequences.get(i).length() -1 && counter != 0){
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return finalMatches;
|
||||
}
|
||||
|
||||
|
||||
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts){
|
||||
|
||||
UUID id = UUID.randomUUID();
|
||||
List<EntityPositionSequence> result = new ArrayList<>();
|
||||
int currentPage = -1;
|
||||
EntityPositionSequence entityPositionSequence = new EntityPositionSequence(id);
|
||||
for (TextPositionSequence textPositionSequence :crossSequenceParts){
|
||||
if(currentPage == -1){
|
||||
currentPage = textPositionSequence.getPage();
|
||||
entityPositionSequence.setPageNumber(currentPage);
|
||||
entityPositionSequence.getSequences().add(textPositionSequence);
|
||||
} else if(currentPage == textPositionSequence.getPage()){
|
||||
entityPositionSequence.getSequences().add(textPositionSequence);
|
||||
} else {
|
||||
result.add(entityPositionSequence);
|
||||
entityPositionSequence = new EntityPositionSequence(id);
|
||||
entityPositionSequence.setPageNumber(textPositionSequence.getPage());
|
||||
}
|
||||
}
|
||||
result.add(entityPositionSequence);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" ", " ");
|
||||
}
|
||||
|
||||
public String getAsStringWithLinebreaks(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
TextPositionSequence previous = null;
|
||||
for (TextPositionSequence word : sequences) {
|
||||
|
||||
if (previous != null) {
|
||||
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
|
||||
sb.append('\n');
|
||||
} else {
|
||||
sb.append(' ');
|
||||
}
|
||||
}
|
||||
sb.append(word.toString());
|
||||
previous = word;
|
||||
}
|
||||
return sb.append("\n").toString();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,139 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class Section {
|
||||
|
||||
private Set<Entity> entities;
|
||||
|
||||
// This still contains linebreaks etc.
|
||||
private String text;
|
||||
|
||||
//This does not contain linebreaks and must always be used for correct offsets.
|
||||
private String searchText;
|
||||
|
||||
public boolean contains(String type) {
|
||||
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
|
||||
}
|
||||
|
||||
public void redact(String type, int ruleNumber, String reason){
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(type)){
|
||||
entity.setRedaction(true);
|
||||
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void redactNot(String type, int ruleNumber, String reason){
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(type)){
|
||||
entity.setRedaction(false);
|
||||
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void highlightAll(String type){
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(type)){
|
||||
entity.setRedaction(true);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
public void redactLineAfter(String start, String asType, int ruleNumber, String reason){
|
||||
|
||||
String value = StringUtils.substringBetween(text, start, "\n");
|
||||
|
||||
if(value != null){
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(asType)){
|
||||
entity.setRedaction(true);
|
||||
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason){
|
||||
|
||||
String value = StringUtils.substringBetween(searchText, start, stop);
|
||||
|
||||
if(value != null){
|
||||
Set<Entity> found = findEntity(value.trim(), asType);
|
||||
entities.addAll(found);
|
||||
}
|
||||
|
||||
// TODO No need to iterate
|
||||
entities.forEach(entity -> {
|
||||
if(entity.getType().equals(asType)){
|
||||
entity.setRedaction(true);
|
||||
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
private Set<Entity> findEntity(String value, String asType) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = searchText.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 &&
|
||||
(startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText.charAt(startIndex - 1))) &&
|
||||
(stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
|
||||
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
|
||||
|
||||
removeEntitiesContainedInLarger(found);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
for (Entity word : entities) {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,58 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DictionaryService {
|
||||
|
||||
public static final String VERTEBRATES_CODE = "VERTEBRATE";
|
||||
public static final String ADDRESS_CODE = "ADDRESS";
|
||||
public static final String NAME_CODE = "NAME";
|
||||
public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR";
|
||||
|
||||
@Getter
|
||||
private Map<String, Set<String>> dictionary = new HashMap<>();
|
||||
|
||||
@Getter
|
||||
private long generation;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
loadFromResourceFiles();
|
||||
}
|
||||
|
||||
|
||||
public void updateDictionary() {
|
||||
//TODO
|
||||
}
|
||||
|
||||
|
||||
public void loadFromResourceFiles() {
|
||||
dictionary.computeIfAbsent(NAME_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
|
||||
private String cleanDictionaryEntry(String entry) {
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,67 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
import org.kie.api.builder.KieFileSystem;
|
||||
import org.kie.api.builder.KieModule;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.kie.api.runtime.KieSession;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
|
||||
|
||||
@Service
|
||||
public class DroolsExecutionService {
|
||||
|
||||
@Autowired
|
||||
private KieContainer kieContainer;
|
||||
|
||||
private String currentDrlRules;
|
||||
|
||||
@PostConstruct
|
||||
public void init (){
|
||||
currentDrlRules = ResourceLoader.loadAsString("drools/rules.drl");
|
||||
}
|
||||
|
||||
public Section executeRules(Section section) {
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
kieSession.setGlobal("section", section);
|
||||
kieSession.insert(section);
|
||||
kieSession.fireAllRules();
|
||||
kieSession.dispose();
|
||||
return section;
|
||||
}
|
||||
|
||||
|
||||
public void updateRules(String drlAsString) {
|
||||
|
||||
try {
|
||||
KieServices kieServices = KieServices.Factory.get();
|
||||
InputStream input = new ByteArrayInputStream(drlAsString.getBytes("UTF-8"));
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
kieFileSystem.write("src/main/resources/drools/rules.drl",
|
||||
kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
KieModule kieModule = kieBuilder.getKieModule();
|
||||
kieContainer.updateToVersion(kieModule.getReleaseId());
|
||||
currentDrlRules = drlAsString;
|
||||
} catch (Exception e){
|
||||
throw new RuntimeException("Could not update rules");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String getRules(){
|
||||
return currentDrlRules;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,144 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class EntityRedactionService {
|
||||
|
||||
private final DictionaryService dictionaryService;
|
||||
private final DroolsExecutionService droolsExecutionService;
|
||||
|
||||
|
||||
public void processDocument(Document classifiedDoc) {
|
||||
|
||||
dictionaryService.updateDictionary();
|
||||
|
||||
Set<Entity> documentEntities = new HashSet<>();
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
SearchableText searchableText = paragraph.getSearchableText();
|
||||
|
||||
List<Table> tables = paragraph.getTables();
|
||||
|
||||
List<SearchableText> searchableRows = new ArrayList<>();
|
||||
for (Table table : tables) {
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
SearchableText searchableRow = new SearchableText();
|
||||
for (Cell column : row) {
|
||||
if (column == null || column.getTextBlocks() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
searchableRow.addAll(textBlock.getSequences());
|
||||
}
|
||||
}
|
||||
searchableRows.add(searchableRow);
|
||||
}
|
||||
}
|
||||
|
||||
Set<Entity> entities = findEntities(searchableText);
|
||||
Section analysedSection = droolsExecutionService.executeRules(Section
|
||||
.builder()
|
||||
.entities(entities)
|
||||
.text(searchableText.getAsStringWithLinebreaks())
|
||||
.searchText(searchableText.toString())
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedSection.getEntities()) {
|
||||
entity.setPositionSequences(searchableText.getSequences(entity.getWord()));
|
||||
}
|
||||
|
||||
documentEntities.addAll(analysedSection.getEntities());
|
||||
|
||||
for (SearchableText searchableRow : searchableRows) {
|
||||
Set<Entity> rowEntities = findEntities(searchableRow);
|
||||
|
||||
Section analysedRowSection = droolsExecutionService.executeRules(Section
|
||||
.builder()
|
||||
.entities(rowEntities)
|
||||
.text(searchableRow.getAsStringWithLinebreaks())
|
||||
.searchText(searchableRow.toString())
|
||||
.build());
|
||||
|
||||
for (Entity entity : analysedRowSection.getEntities()) {
|
||||
entity.setPositionSequences(searchableRow.getSequences(entity.getWord()));
|
||||
}
|
||||
documentEntities.addAll(analysedRowSection.getEntities());
|
||||
}
|
||||
}
|
||||
|
||||
documentEntities.forEach(entity -> {
|
||||
entity.getPositionSequences().forEach(sequence -> {
|
||||
classifiedDoc.getEntities().computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()).add(
|
||||
new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List.of(sequence))
|
||||
);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(SearchableText searchableText) {
|
||||
|
||||
String normalizedInputString = searchableText.toString();
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
|
||||
for (String value : entry.getValue()) {
|
||||
int startIndex;
|
||||
int stopIndex = 0;
|
||||
do {
|
||||
startIndex = normalizedInputString.indexOf(value, stopIndex);
|
||||
stopIndex = startIndex + value.length();
|
||||
|
||||
if (startIndex > -1 &&
|
||||
(startIndex == 0 || Character.isWhitespace(normalizedInputString.charAt(startIndex - 1)) || isSeparator(normalizedInputString.charAt(startIndex - 1))) &&
|
||||
(stopIndex == normalizedInputString.length() || isSeparator(normalizedInputString.charAt(stopIndex)))) {
|
||||
found.add(new Entity(normalizedInputString.substring(startIndex, stopIndex), entry.getKey(), startIndex, stopIndex));
|
||||
}
|
||||
} while (startIndex > -1);
|
||||
}
|
||||
}
|
||||
|
||||
removeEntitiesContainedInLarger(found);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
private boolean isSeparator(char c) {
|
||||
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '‘' || c == '’';
|
||||
}
|
||||
|
||||
|
||||
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
|
||||
List<Entity> wordsToRemove = new ArrayList<>();
|
||||
for (Entity word : entities) {
|
||||
for (Entity inner : entities) {
|
||||
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
|
||||
wordsToRemove.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
entities.removeAll(wordsToRemove);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class ResourceLoader {
|
||||
|
||||
public Set<String> load(String classpathPath) {
|
||||
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
|
||||
}
|
||||
try (InputStream is = resource.openStream();
|
||||
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
|
||||
BufferedReader br = new BufferedReader(isr)) {
|
||||
return br.lines().collect(Collectors.toSet());
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String loadAsString(String classpathPath) {
|
||||
URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath);
|
||||
if (resource == null) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
|
||||
}
|
||||
try (InputStream is = resource.openStream();
|
||||
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
|
||||
BufferedReader br = new BufferedReader(isr)) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
String str;
|
||||
while ((str = br.readLine()) != null) {
|
||||
sb.append(str).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.utils;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class TextNormalizationUtilities {
|
||||
|
||||
/**
|
||||
* Revert hyphenation due to line breaks.
|
||||
* @param text Text to be processed.
|
||||
* @return Text without line-break hyphenation.
|
||||
*/
|
||||
public static String removeHyphenLineBreaks(String text) {
|
||||
return text.replaceAll("\\s(\\S+)[\\-\\u00AD]\\R|\n\r(.+ )", "\n$1$2");
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,123 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.ParsedElements;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@SuppressWarnings("PMD")
|
||||
public class PdfSegmentationService {
|
||||
|
||||
private final RulingCleaningService rulingCleaningService;
|
||||
private final TableExtractionService tableExtractionService;
|
||||
private final BlockificationService blockificationService;
|
||||
private final ClassificationService classificationService;
|
||||
private final SectionsBuilderService sectionsBuilderService;
|
||||
|
||||
public Document parseDocument(PDDocument pdDocument) throws IOException {
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
List<Page> pages = new ArrayList<>();
|
||||
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
|
||||
for (int pageNumber = 1; pageNumber <= pdDocument.getNumberOfPages(); pageNumber++) {
|
||||
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
|
||||
stripper.setPageNumber(pageNumber);
|
||||
stripper.setStartPage(pageNumber);
|
||||
stripper.setEndPage(pageNumber);
|
||||
stripper.getText(pdDocument);
|
||||
|
||||
PDRectangle pdr = pdPage.getMediaBox();
|
||||
boolean isLandscape = pdr.getWidth() > pdr.getHeight();
|
||||
|
||||
int rotation = pdPage.getRotation();
|
||||
boolean isRotated = rotation != 0 && rotation != 360;
|
||||
|
||||
ParsedElements parsedElements = ParsedElements
|
||||
.builder()
|
||||
.rulings(stripper.getRulings())
|
||||
.sequences(stripper.getTextPositionSequences())
|
||||
.minCharWidth(Utils.round(stripper.getMinCharWidth(), 2))
|
||||
.minCharHeight(Utils.round(stripper.getMinCharHeight(), 2))
|
||||
.landscape(isLandscape)
|
||||
.rotated(isRotated)
|
||||
.build();
|
||||
|
||||
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(parsedElements.getRulings(), parsedElements.getMinCharWidth(), parsedElements.getMinCharHeight());
|
||||
|
||||
Page page = blockificationService.blockify(parsedElements.getSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
page.setRotation(rotation);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
|
||||
buildPageStatistics(page);
|
||||
|
||||
page.setLandscape(parsedElements.isLandscape() || parsedElements.isRotated());
|
||||
|
||||
page.setPageNumber(pageNumber);
|
||||
increaseDocumentStatistics(page, document);
|
||||
pages.add(page);
|
||||
}
|
||||
document.setPages(pages);
|
||||
|
||||
classificationService.classifyDocument(document);
|
||||
|
||||
sectionsBuilderService.buildSections(document);
|
||||
|
||||
return document;
|
||||
|
||||
}
|
||||
|
||||
private void increaseDocumentStatistics(Page page, Document document) {
|
||||
if (!page.isLandscape()) {
|
||||
document.getFontSizeCounter().addAll(page.getFontSizeCounter().getCountPerValue());
|
||||
}
|
||||
document.getFontCounter().addAll(page.getFontCounter().getCountPerValue());
|
||||
document.getTextHeightCounter().addAll(page.getTextHeightCounter().getCountPerValue());
|
||||
document.getFontStyleCounter().addAll(page.getFontStyleCounter().getCountPerValue());
|
||||
}
|
||||
|
||||
private void buildPageStatistics(Page page) {
|
||||
|
||||
// Collect all statistics for the page, except from blocks inside tables, as tables will always be added to BodyTextFrame.
|
||||
for (AbstractTextContainer textBlock : page.getTextBlocks()) {
|
||||
if (textBlock instanceof TextBlock) {
|
||||
if (((TextBlock) textBlock).getSequences() == null) {
|
||||
continue;
|
||||
}
|
||||
for (TextPositionSequence word : ((TextBlock) textBlock).getSequences()) {
|
||||
page.getTextHeightCounter().add(word.getTextHeight());
|
||||
page.getFontCounter().add(word.getFont());
|
||||
page.getFontSizeCounter().add(word.getFontSize());
|
||||
page.getFontStyleCounter().add(word.getFontStyle());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,115 @@
|
||||
package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class SectionsBuilderService {
|
||||
|
||||
|
||||
public void buildSections(Document document) {
|
||||
|
||||
List<AbstractTextContainer> chunkWords = new ArrayList<>();
|
||||
List<Paragraph> chunkBlockList1 = new ArrayList<>();
|
||||
|
||||
AbstractTextContainer prev = null;
|
||||
|
||||
for (Page page : document.getPages()) {
|
||||
for (AbstractTextContainer current : page.getTextBlocks()) {
|
||||
|
||||
if (current.getClassification() == null || current.getClassification().equals("Header") || current.getClassification().equals("Footer")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
current.setPage(page.getPageNumber());
|
||||
|
||||
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
|
||||
|
||||
Paragraph cb1 = buildTextBlock(chunkWords);
|
||||
chunkBlockList1.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
chunkWords.add(current);
|
||||
|
||||
prev = current;
|
||||
}
|
||||
}
|
||||
|
||||
Paragraph cb1 = buildTextBlock(chunkWords);
|
||||
if (cb1 != null) {
|
||||
chunkBlockList1.add(cb1);
|
||||
}
|
||||
|
||||
document.setParagraphs(chunkBlockList1);
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList) {
|
||||
|
||||
Paragraph paragraph = new Paragraph();
|
||||
TextBlock textBlock = null;
|
||||
|
||||
int pageBefore = -1;
|
||||
boolean splitByTable = false;
|
||||
|
||||
Iterator<AbstractTextContainer> itty = wordBlockList.iterator();
|
||||
boolean alreadyAdded= false;
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer container = itty.next();
|
||||
|
||||
if (container instanceof Table) {
|
||||
splitByTable = true;
|
||||
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
alreadyAdded =true;
|
||||
}
|
||||
paragraph.getPageBlocks().add(container);
|
||||
continue;
|
||||
}
|
||||
|
||||
TextBlock wordBlock = (TextBlock) container;
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
} else if (splitByTable) {
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
alreadyAdded = false;
|
||||
} else if (pageBefore != -1 && wordBlock.getPage() != pageBefore) {
|
||||
textBlock.setPage(pageBefore);
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
|
||||
textBlock.setPage(wordBlock.getPage());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
|
||||
spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
}
|
||||
pageBefore = wordBlock.getPage();
|
||||
splitByTable = false;
|
||||
}
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
}
|
||||
return paragraph;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.iqser.red.service.redaction.v1.server.settings;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ConfigurationProperties("redaction-service")
|
||||
public class RedactionServiceSettings {
|
||||
|
||||
/**
|
||||
* Tenant used in single tenant mode.
|
||||
*/
|
||||
private String defaultTenant = "iqser-id";
|
||||
|
||||
private int flattenImageDpi = 100;
|
||||
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public abstract class AbstractTextContainer {
|
||||
|
||||
protected float minX;
|
||||
protected float maxX;
|
||||
protected float minY;
|
||||
protected float maxY;
|
||||
protected String classification;
|
||||
protected int page;
|
||||
|
||||
public abstract String getText();
|
||||
|
||||
public boolean contains(AbstractTextContainer other) {
|
||||
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,26 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class Cell extends Rectangle {
|
||||
|
||||
private List<TextBlock> textBlocks = new ArrayList<>();
|
||||
|
||||
public Cell(Point2D topLeft, Point2D bottomRight) {
|
||||
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
|
||||
}
|
||||
|
||||
public void addTextBlock(TextBlock textBlock) {
|
||||
textBlocks.add(textBlock);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class CleanRulings {
|
||||
|
||||
List<Ruling> horizontal;
|
||||
List<Ruling> vertical;
|
||||
}
|
||||
@ -0,0 +1,177 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class Rectangle extends Rectangle2D.Float {
|
||||
|
||||
/**
|
||||
* Ill-defined comparator, from when Rectangle was Comparable.
|
||||
*
|
||||
* see https://github.com/tabulapdf/tabula-java/issues/116
|
||||
* @deprecated with no replacement
|
||||
*/
|
||||
@Deprecated
|
||||
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
|
||||
@Override public int compare(Rectangle o1, Rectangle o2) {
|
||||
if (o1.equals(o2)) return 0;
|
||||
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
|
||||
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
|
||||
? - java.lang.Double.compare(o1.getX(), o2.getX())
|
||||
: java.lang.Double.compare(o1.getX(), o2.getX());
|
||||
} else {
|
||||
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
|
||||
|
||||
public Rectangle() {
|
||||
super();
|
||||
}
|
||||
|
||||
public Rectangle(float top, float left, float width, float height) {
|
||||
super();
|
||||
this.setRect(left, top, width, height);
|
||||
}
|
||||
|
||||
public int compareTo(Rectangle other) {
|
||||
return ILL_DEFINED_ORDER.compare(this, other);
|
||||
}
|
||||
|
||||
// I'm bad at Java and need this for fancy sorting in
|
||||
// technology.tabula.TextChunk.
|
||||
public int isLtrDominant() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public float getArea() {
|
||||
return this.width * this.height;
|
||||
}
|
||||
|
||||
public float verticalOverlap(Rectangle other) {
|
||||
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||
}
|
||||
|
||||
public boolean verticallyOverlaps(Rectangle other) {
|
||||
return verticalOverlap(other) > 0;
|
||||
}
|
||||
|
||||
public float horizontalOverlap(Rectangle other) {
|
||||
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||
}
|
||||
|
||||
public boolean horizontallyOverlaps(Rectangle other) {
|
||||
return horizontalOverlap(other) > 0;
|
||||
}
|
||||
|
||||
public float verticalOverlapRatio(Rectangle other) {
|
||||
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
|
||||
|
||||
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
|
||||
&& other.getBottom() <= this.getBottom()) {
|
||||
rv = (other.getBottom() - this.getTop()) / delta;
|
||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
|
||||
&& this.getBottom() <= other.getBottom()) {
|
||||
rv = (this.getBottom() - other.getTop()) / delta;
|
||||
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
|
||||
&& other.getBottom() <= this.getBottom()) {
|
||||
rv = (other.getBottom() - other.getTop()) / delta;
|
||||
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
|
||||
&& this.getBottom() <= other.getBottom()) {
|
||||
rv = (this.getBottom() - this.getTop()) / delta;
|
||||
}
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
public float overlapRatio(Rectangle other) {
|
||||
double intersectionWidth = Math.max(0,
|
||||
Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
|
||||
double intersectionHeight = Math.max(0,
|
||||
Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
|
||||
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
|
||||
double unionArea = this.getArea() + other.getArea() - intersectionArea;
|
||||
|
||||
return (float) (intersectionArea / unionArea);
|
||||
}
|
||||
|
||||
public Rectangle merge(Rectangle other) {
|
||||
this.setRect(this.createUnion(other));
|
||||
return this;
|
||||
}
|
||||
|
||||
public float getTop() {
|
||||
return (float) this.getMinY();
|
||||
}
|
||||
|
||||
public void setTop(float top) {
|
||||
float deltaHeight = top - this.y;
|
||||
this.setRect(this.x, top, this.width, this.height - deltaHeight);
|
||||
}
|
||||
|
||||
public float getRight() {
|
||||
return (float) this.getMaxX();
|
||||
}
|
||||
|
||||
public void setRight(float right) {
|
||||
this.setRect(this.x, this.y, right - this.x, this.height);
|
||||
}
|
||||
|
||||
public float getLeft() {
|
||||
return (float) this.getMinX();
|
||||
}
|
||||
|
||||
public void setLeft(float left) {
|
||||
float deltaWidth = left - this.x;
|
||||
this.setRect(left, this.y, this.width - deltaWidth, this.height);
|
||||
}
|
||||
|
||||
public float getBottom() {
|
||||
return (float) this.getMaxY();
|
||||
}
|
||||
|
||||
public void setBottom(float bottom) {
|
||||
this.setRect(this.x, this.y, this.width, bottom - this.y);
|
||||
}
|
||||
|
||||
public Point2D[] getPoints() {
|
||||
return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()),
|
||||
new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
|
||||
new Point2D.Float(this.getLeft(), this.getBottom()) };
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String s = super.toString();
|
||||
sb.append(s.substring(0, s.length() - 1));
|
||||
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param rectangles
|
||||
* @return minimum bounding box that contains all the rectangles
|
||||
*/
|
||||
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
|
||||
float minx = java.lang.Float.MAX_VALUE;
|
||||
float miny = java.lang.Float.MAX_VALUE;
|
||||
float maxx = java.lang.Float.MIN_VALUE;
|
||||
float maxy = java.lang.Float.MIN_VALUE;
|
||||
|
||||
for (Rectangle r : rectangles) {
|
||||
minx = (float) Math.min(r.getMinX(), minx);
|
||||
miny = (float) Math.min(r.getMinY(), miny);
|
||||
maxx = (float) Math.max(r.getMaxX(), maxx);
|
||||
maxy = (float) Math.max(r.getMaxY(), maxy);
|
||||
}
|
||||
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,52 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.locationtech.jts.geom.Envelope;
|
||||
import org.locationtech.jts.index.strtree.STRtree;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class RectangleSpatialIndex<T extends Rectangle> {
|
||||
|
||||
|
||||
private final STRtree si = new STRtree();
|
||||
private final List<T> rectangles = new ArrayList<>();
|
||||
|
||||
public void add(T te) {
|
||||
rectangles.add(te);
|
||||
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
|
||||
}
|
||||
|
||||
public List<T> contains(Rectangle r) {
|
||||
List<T> intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
List<T> rv = new ArrayList<T>();
|
||||
|
||||
for (T ir: intersection) {
|
||||
if (r.contains(ir)) {
|
||||
rv.add(ir);
|
||||
}
|
||||
}
|
||||
|
||||
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
|
||||
return rv;
|
||||
}
|
||||
|
||||
public List<T> intersects(Rectangle r) {
|
||||
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
|
||||
return rv;
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
|
||||
*
|
||||
* @return a Rectangle
|
||||
*/
|
||||
public Rectangle getBounds() {
|
||||
return Rectangle.boundingBoxOf(rectangles);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,369 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
|
||||
@Slf4j
|
||||
@SuppressWarnings("all")
|
||||
public class Ruling extends Line2D.Float {
|
||||
|
||||
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
|
||||
|
||||
private enum SOType {VERTICAL, HRIGHT, HLEFT}
|
||||
|
||||
|
||||
public Ruling(Point2D p1, Point2D p2) {
|
||||
super(p1, p2);
|
||||
}
|
||||
|
||||
public boolean vertical() {
|
||||
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
|
||||
public boolean horizontal() {
|
||||
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
|
||||
}
|
||||
|
||||
public boolean oblique() {
|
||||
return !(this.vertical() || this.horizontal());
|
||||
}
|
||||
|
||||
// attributes that make sense only for non-oblique lines
|
||||
// these are used to have a single collapse method (in page, currently)
|
||||
|
||||
public float getPosition() {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getLeft() : this.getTop();
|
||||
}
|
||||
|
||||
|
||||
public float getStart() {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getTop() : this.getLeft();
|
||||
}
|
||||
|
||||
public void setStart(float v) {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setTop(v);
|
||||
} else {
|
||||
this.setLeft(v);
|
||||
}
|
||||
}
|
||||
|
||||
public float getEnd() {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
return this.vertical() ? this.getBottom() : this.getRight();
|
||||
}
|
||||
|
||||
public void setEnd(float v) {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setBottom(v);
|
||||
} else {
|
||||
this.setRight(v);
|
||||
}
|
||||
}
|
||||
|
||||
public void setStartEnd(float start, float end) {
|
||||
if (this.oblique()) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
if (this.vertical()) {
|
||||
this.setTop(start);
|
||||
this.setBottom(end);
|
||||
} else {
|
||||
this.setLeft(start);
|
||||
this.setRight(end);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean perpendicularTo(Ruling other) {
|
||||
return this.vertical() == other.horizontal();
|
||||
}
|
||||
|
||||
|
||||
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
|
||||
if (this.intersectsLine(another)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean rv = false;
|
||||
|
||||
if (this.perpendicularTo(another)) {
|
||||
rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another);
|
||||
} else {
|
||||
rv = this.expand(colinearOrParallelExpandAmount)
|
||||
.intersectsLine(another.expand(colinearOrParallelExpandAmount));
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
public double length() {
|
||||
return Math.sqrt(Math.pow(this.x1 - this.x2, 2) + Math.pow(this.y1 - this.y2, 2));
|
||||
}
|
||||
|
||||
public Ruling intersect(Rectangle2D clip) {
|
||||
Float clipee = (Float) this.clone();
|
||||
boolean clipped = new CohenSutherlandClipping(clip).clip(clipee);
|
||||
|
||||
if (clipped) {
|
||||
return new Ruling(clipee.getP1(), clipee.getP2());
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public Ruling expand(float amount) {
|
||||
Ruling r = (Ruling) this.clone();
|
||||
r.setStart(this.getStart() - amount);
|
||||
r.setEnd(this.getEnd() + amount);
|
||||
return r;
|
||||
}
|
||||
|
||||
public Point2D intersectionPoint(Ruling other) {
|
||||
Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
|
||||
Ruling horizontal, vertical;
|
||||
|
||||
if (!this_l.intersectsLine(other_l)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (this_l.horizontal() && other_l.vertical()) {
|
||||
horizontal = this_l;
|
||||
vertical = other_l;
|
||||
} else if (this_l.vertical() && other_l.horizontal()) {
|
||||
vertical = this_l;
|
||||
horizontal = other_l;
|
||||
} else {
|
||||
log.warn("lines must be orthogonal, vertical and horizontal");
|
||||
return null;
|
||||
}
|
||||
return new Point2D.Float(vertical.getLeft(), horizontal.getTop());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(other instanceof Ruling)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Ruling o = (Ruling) other;
|
||||
return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
public float getTop() {
|
||||
return this.y1;
|
||||
}
|
||||
|
||||
public void setTop(float v) {
|
||||
setLine(this.getLeft(), v, this.getRight(), this.getBottom());
|
||||
}
|
||||
|
||||
public float getLeft() {
|
||||
return this.x1;
|
||||
}
|
||||
|
||||
public void setLeft(float v) {
|
||||
setLine(v, this.getTop(), this.getRight(), this.getBottom());
|
||||
}
|
||||
|
||||
public float getBottom() {
|
||||
return this.y2;
|
||||
}
|
||||
|
||||
public void setBottom(float v) {
|
||||
setLine(this.getLeft(), this.getTop(), this.getRight(), v);
|
||||
}
|
||||
|
||||
public float getRight() {
|
||||
return this.x2;
|
||||
}
|
||||
|
||||
public void setRight(float v) {
|
||||
setLine(this.getLeft(), this.getTop(), v, this.getBottom());
|
||||
}
|
||||
|
||||
public float getWidth() {
|
||||
return this.getRight() - this.getLeft();
|
||||
}
|
||||
|
||||
public float getHeight() {
|
||||
return this.getBottom() - this.getTop();
|
||||
}
|
||||
|
||||
public double getAngle() {
|
||||
double angle = Math.toDegrees(Math.atan2(this.getP2().getY() - this.getP1().getY(),
|
||||
this.getP2().getX() - this.getP1().getX()));
|
||||
|
||||
if (angle < 0) {
|
||||
angle += 360;
|
||||
}
|
||||
return angle;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Formatter formatter = new Formatter(sb);
|
||||
String rv = formatter.format("%s[minX=%f minY=%f maxX=%f maxY=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString();
|
||||
formatter.close();
|
||||
return rv;
|
||||
}
|
||||
|
||||
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
|
||||
ArrayList<Ruling> rv = new ArrayList<>();
|
||||
for (Ruling r : rulings) {
|
||||
if (r.intersects(area)) {
|
||||
rv.add(r.intersect(area));
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
// log(n) implementation of find_intersections
|
||||
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
|
||||
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
|
||||
|
||||
class SortObject {
|
||||
protected SOType type;
|
||||
protected float position;
|
||||
protected Ruling ruling;
|
||||
|
||||
public SortObject(SOType type, float position, Ruling ruling) {
|
||||
this.type = type;
|
||||
this.position = position;
|
||||
this.ruling = ruling;
|
||||
}
|
||||
}
|
||||
|
||||
List<SortObject> sos = new ArrayList<>();
|
||||
|
||||
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
|
||||
@Override
|
||||
public int compare(Ruling o1, Ruling o2) {
|
||||
return java.lang.Double.compare(o1.getTop(), o2.getTop());
|
||||
}
|
||||
});
|
||||
|
||||
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D o1, Point2D o2) {
|
||||
if (o1.getY() > o2.getY()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getY() < o2.getY()) {
|
||||
return -1;
|
||||
}
|
||||
if (o1.getX() > o2.getX()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getX() < o2.getX()) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
|
||||
for (Ruling h : horizontals) {
|
||||
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
|
||||
}
|
||||
|
||||
for (Ruling v : verticals) {
|
||||
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
|
||||
}
|
||||
|
||||
Collections.sort(sos, new Comparator<SortObject>() {
|
||||
@Override
|
||||
public int compare(SortObject a, SortObject b) {
|
||||
int rv;
|
||||
if (Utils.feq(a.position, b.position)) {
|
||||
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
|
||||
rv = 1;
|
||||
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
|
||||
rv = -1;
|
||||
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
|
||||
rv = 1;
|
||||
} else {
|
||||
rv = java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
} else {
|
||||
return java.lang.Double.compare(a.position, b.position);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
});
|
||||
|
||||
for (SortObject so : sos) {
|
||||
switch (so.type) {
|
||||
case VERTICAL:
|
||||
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
|
||||
try {
|
||||
Point2D i = h.getKey().intersectionPoint(so.ruling);
|
||||
if (i == null) {
|
||||
continue;
|
||||
}
|
||||
rv.put(i,
|
||||
new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
|
||||
so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
|
||||
} catch(UnsupportedOperationException e){
|
||||
log.info("Some line are oblique, ignoring...");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case HRIGHT:
|
||||
tree.remove(so.ruling);
|
||||
break;
|
||||
case HLEFT:
|
||||
tree.put(so.ruling, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,305 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@SuppressWarnings("all")
|
||||
public class Table extends AbstractTextContainer {
|
||||
|
||||
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
|
||||
|
||||
private RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
|
||||
|
||||
@Getter
|
||||
private int rowCount = 0;
|
||||
@Getter
|
||||
private int colCount = 0;
|
||||
|
||||
private int rotation = 0;
|
||||
|
||||
private List<List<Cell>> memoizedRows = null;
|
||||
|
||||
public Table(List<Cell> cells, Rectangle area, int rotation) {
|
||||
|
||||
addCells(cells);
|
||||
minX = area.getLeft();
|
||||
minY = area.getBottom();
|
||||
maxX = area.getRight();
|
||||
maxY = area.getTop();
|
||||
classification = "Table";
|
||||
this.rotation = rotation;
|
||||
|
||||
}
|
||||
|
||||
public List<List<Cell>> getRows() {
|
||||
|
||||
if (memoizedRows == null) {
|
||||
memoizedRows = computeRows();
|
||||
}
|
||||
|
||||
return memoizedRows;
|
||||
|
||||
}
|
||||
|
||||
private List<List<Cell>> computeRows() {
|
||||
|
||||
List<List<Cell>> rows = new ArrayList<>();
|
||||
if (rotation == 90) {
|
||||
for (int i = 0; i < colCount; i++) { // rows
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = rowCount - 1; j >= 0; j--) { // cols
|
||||
Cell cell = cells.get(new CellPosition(j, i));
|
||||
lastRow.add(cell);
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
} else if (rotation == 270) {
|
||||
for (int i = colCount - 1; i >= 0; i--) { // rows
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < rowCount; j++) { // cols
|
||||
Cell cell = cells.get(new CellPosition(i, j));
|
||||
lastRow.add(cell);
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < rowCount; i++) {
|
||||
List<Cell> lastRow = new ArrayList<>();
|
||||
for (int j = 0; j < colCount; j++) {
|
||||
Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
|
||||
lastRow.add(cell);
|
||||
}
|
||||
rows.add(lastRow);
|
||||
}
|
||||
}
|
||||
|
||||
return rows;
|
||||
|
||||
}
|
||||
|
||||
public void add(Cell chunk, int row, int col) {
|
||||
|
||||
rowCount = Math.max(rowCount, row + 1);
|
||||
colCount = Math.max(colCount, col + 1);
|
||||
|
||||
CellPosition cp = new CellPosition(row, col);
|
||||
cells.put(cp, chunk);
|
||||
|
||||
}
|
||||
|
||||
private void addCells(List<Cell> cells) {
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Iterator<Cell> itty = cells.iterator();
|
||||
|
||||
while (itty.hasNext()) {
|
||||
Cell cell = itty.next();
|
||||
if (cell.getWidth() > 1.1 && cell.getHeight() > 1.1) {
|
||||
si.add(cell);
|
||||
} else {
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
|
||||
List<List<Cell>> rowsOfCells = rowsOfCells(cells);
|
||||
|
||||
Map<Integer, Cell> previousNonNullCellForColumnIndex = new HashMap<>();
|
||||
for (int i = 0; i < rowsOfCells.size(); i++) {
|
||||
List<Cell> row = rowsOfCells.get(i);
|
||||
Iterator<Cell> rowCells = row.iterator();
|
||||
int startColumn = 0;
|
||||
int jumpToColumn = 0;
|
||||
while (rowCells.hasNext()) {
|
||||
Cell cell = rowCells.next();
|
||||
if (i > 0) {
|
||||
List<List<Cell>> others = rowsOfCells(
|
||||
si.contains(
|
||||
new Rectangle(cell.getBottom(),
|
||||
si.getBounds().getLeft(),
|
||||
cell.getLeft() - si.getBounds().getLeft() + 1,
|
||||
si.getBounds().getBottom() - cell.getBottom()
|
||||
)
|
||||
));
|
||||
|
||||
for (List<Cell> r : others) {
|
||||
jumpToColumn = Math.max(jumpToColumn, r.size());
|
||||
}
|
||||
}
|
||||
|
||||
while (startColumn != jumpToColumn) {
|
||||
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
|
||||
startColumn++;
|
||||
}
|
||||
|
||||
add(cell, i, startColumn);
|
||||
previousNonNullCellForColumnIndex.put(startColumn, cell);
|
||||
startColumn++;
|
||||
jumpToColumn = startColumn;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<List<Cell>> rowsOfCells(List<Cell> cells) {
|
||||
Cell c;
|
||||
float lastTop;
|
||||
List<List<Cell>> rv = new ArrayList<>();
|
||||
List<Cell> lastRow;
|
||||
|
||||
if (cells.isEmpty()) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
Collections.sort(cells, new Comparator<Cell>() {
|
||||
@Override
|
||||
public int compare(Cell arg0, Cell arg1) {
|
||||
return Double.compare(arg0.getLeft(), arg1.getLeft());
|
||||
}
|
||||
});
|
||||
|
||||
Collections.sort(cells, Collections.reverseOrder(new Comparator<Cell>() {
|
||||
@Override
|
||||
public int compare(Cell arg0, Cell arg1) {
|
||||
return Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(),2));
|
||||
}
|
||||
}));
|
||||
|
||||
Iterator<Cell> iter = cells.iterator();
|
||||
c = iter.next();
|
||||
lastTop = c.getBottom();
|
||||
lastRow = new ArrayList<>();
|
||||
lastRow.add(c);
|
||||
rv.add(lastRow);
|
||||
|
||||
while (iter.hasNext()) {
|
||||
c = iter.next();
|
||||
if (!Utils.feq(c.getBottom(), lastTop)) {
|
||||
lastRow = new ArrayList<>();
|
||||
rv.add(lastRow);
|
||||
}
|
||||
lastRow.add(c);
|
||||
lastTop = c.getBottom();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<List<Cell>> rows = getRows();
|
||||
|
||||
int i = 0;
|
||||
for (List<Cell> row : rows) {
|
||||
if (i != 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
if (!row.isEmpty()) {
|
||||
boolean firstColumn = true;
|
||||
for (Cell column : row) {
|
||||
if (!firstColumn) {
|
||||
sb.append(",");
|
||||
}
|
||||
if (column != null && column.getTextBlocks() != null) {
|
||||
boolean first = true;
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
if (!first) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append('\"').append(textBlock.getText().replaceAll("\"", "\\\"")).append('\"');
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
firstColumn = false;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getTextAsHtml() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<List<Cell>> rows = getRows();
|
||||
|
||||
sb.append("<table border=\"1\">");
|
||||
int i = 0;
|
||||
for (List<Cell> row : rows) {
|
||||
sb.append("\n<tr>");
|
||||
if (!row.isEmpty()) {
|
||||
for (Cell column : row) {
|
||||
sb.append(i == 0 ? "\n<th>" : "\n<td>");
|
||||
if (column != null && column.getTextBlocks() != null) {
|
||||
boolean first = true;
|
||||
for (TextBlock textBlock : column.getTextBlocks()) {
|
||||
if (!first) {
|
||||
sb.append("<br />");
|
||||
}
|
||||
sb.append(textBlock.getText().replaceAll("\\n", "<br />"));
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
sb.append(i == 0 ? "</th>" : "</td>");
|
||||
}
|
||||
}
|
||||
sb.append("</tr>");
|
||||
i++;
|
||||
}
|
||||
sb.append("</table>");
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
class CellPosition implements Comparable<CellPosition> {
|
||||
|
||||
CellPosition(int row, int col) {
|
||||
this.row = row;
|
||||
this.col = col;
|
||||
}
|
||||
|
||||
final int row, col;
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return row + 101 * col;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
CellPosition other = (CellPosition) obj;
|
||||
return row == other.row && col == other.col;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(CellPosition other) {
|
||||
int rowdiff = row - other.row;
|
||||
return rowdiff != 0 ? rowdiff : col - other.col;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,165 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
@Service
|
||||
public class RulingCleaningService {
|
||||
|
||||
public CleanRulings getCleanRulings(List<Ruling> rulings, float minCharWidth, float minCharHeight){
|
||||
if (!rulings.isEmpty()) {
|
||||
snapPoints(rulings, minCharWidth , minCharHeight);
|
||||
}
|
||||
|
||||
List<Ruling> vrs = new ArrayList<>();
|
||||
for (Ruling vr : rulings) {
|
||||
if (vr.vertical()) {
|
||||
vrs.add(vr);
|
||||
}
|
||||
}
|
||||
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
|
||||
|
||||
List<Ruling> hrs = new ArrayList<>();
|
||||
for (Ruling hr : rulings) {
|
||||
if (hr.horizontal()) {
|
||||
hrs.add(hr);
|
||||
}
|
||||
}
|
||||
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
|
||||
|
||||
return CleanRulings
|
||||
.builder()
|
||||
.vertical(verticalRulingLines)
|
||||
.horizontal(horizontalRulingLines)
|
||||
.build();
|
||||
}
|
||||
|
||||
public void snapPoints(List<? extends Line2D.Float> rulings, float xThreshold, float yThreshold) {
|
||||
|
||||
// collect points and keep a Line -> p1,p2 map
|
||||
Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>();
|
||||
List<Point2D> points = new ArrayList<>();
|
||||
for (Line2D.Float r : rulings) {
|
||||
Point2D p1 = r.getP1();
|
||||
Point2D p2 = r.getP2();
|
||||
linesToPoints.put(r, new Point2D[]{p1, p2});
|
||||
points.add(p1);
|
||||
points.add(p2);
|
||||
}
|
||||
|
||||
// snap by X
|
||||
points.sort(Comparator.comparingDouble(Point2D::getX));
|
||||
|
||||
List<List<Point2D>> groupedPoints = new ArrayList<>();
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
|
||||
|
||||
for (Point2D p : points.subList(1, points.size() - 1)) {
|
||||
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
|
||||
if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) {
|
||||
groupedPoints.get(groupedPoints.size() - 1).add(p);
|
||||
} else {
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
|
||||
}
|
||||
}
|
||||
|
||||
for (List<Point2D> group : groupedPoints) {
|
||||
float avgLoc = 0;
|
||||
for (Point2D p : group) {
|
||||
avgLoc += p.getX();
|
||||
}
|
||||
avgLoc /= group.size();
|
||||
for (Point2D p : group) {
|
||||
p.setLocation(avgLoc, p.getY());
|
||||
}
|
||||
}
|
||||
// ---
|
||||
|
||||
// snap by Y
|
||||
points.sort(Comparator.comparingDouble(Point2D::getY));
|
||||
|
||||
groupedPoints = new ArrayList<>();
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
|
||||
|
||||
for (Point2D p : points.subList(1, points.size() - 1)) {
|
||||
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
|
||||
if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) {
|
||||
groupedPoints.get(groupedPoints.size() - 1).add(p);
|
||||
} else {
|
||||
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
|
||||
}
|
||||
}
|
||||
|
||||
for (List<Point2D> group : groupedPoints) {
|
||||
float avgLoc = 0;
|
||||
for (Point2D p : group) {
|
||||
avgLoc += p.getY();
|
||||
}
|
||||
avgLoc /= group.size();
|
||||
for (Point2D p : group) {
|
||||
p.setLocation(p.getX(), avgLoc);
|
||||
}
|
||||
}
|
||||
// ---
|
||||
|
||||
// finally, modify lines
|
||||
for (Map.Entry<Line2D.Float, Point2D[]> ltp : linesToPoints.entrySet()) {
|
||||
Point2D[] p = ltp.getValue();
|
||||
ltp.getKey().setLine(p[0], p[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
|
||||
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
|
||||
return collapseOrientedRulings(lines, COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT);
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> collapseOrientedRulings(List<Ruling> lines, int expandAmount) {
|
||||
ArrayList<Ruling> rv = new ArrayList<>();
|
||||
lines.sort((a, b) -> {
|
||||
final float diff = a.getPosition() - b.getPosition();
|
||||
return Float.compare(diff == 0 ? a.getStart() - b.getStart() : diff, 0f);
|
||||
});
|
||||
|
||||
for (Ruling next_line : lines) {
|
||||
Ruling last = rv.isEmpty() ? null : rv.get(rv.size() - 1);
|
||||
// if current line colinear with next, and are "close enough": expand current line
|
||||
if (last != null && Utils.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) {
|
||||
final float lastStart = last.getStart();
|
||||
final float lastEnd = last.getEnd();
|
||||
|
||||
final boolean lastFlipped = lastStart > lastEnd;
|
||||
final boolean nextFlipped = next_line.getStart() > next_line.getEnd();
|
||||
|
||||
boolean differentDirections = nextFlipped != lastFlipped;
|
||||
float nextS = differentDirections ? next_line.getEnd() : next_line.getStart();
|
||||
float nextE = differentDirections ? next_line.getStart() : next_line.getEnd();
|
||||
|
||||
final float newStart = lastFlipped ? Math.max(nextS, lastStart) : Math.min(nextS, lastStart);
|
||||
final float newEnd = lastFlipped ? Math.min(nextE, lastEnd) : Math.max(nextE, lastEnd);
|
||||
last.setStartEnd(newStart, newEnd);
|
||||
assert !last.oblique();
|
||||
}
|
||||
else if (next_line.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
rv.add(next_line);
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,330 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
|
||||
|
||||
import java.awt.geom.Point2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
|
||||
|
||||
@Service
|
||||
@SuppressWarnings("all")
|
||||
public class TableExtractionService {
|
||||
|
||||
public void extractTables(CleanRulings cleanRulings, Page page){
|
||||
|
||||
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
|
||||
|
||||
Iterator<AbstractTextContainer> itty = page.getTextBlocks().iterator();
|
||||
while (itty.hasNext()) {
|
||||
TextBlock textBlock = (TextBlock) itty.next();
|
||||
for (Cell cell : cells) {
|
||||
if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight())) {
|
||||
cell.addTextBlock(textBlock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells)
|
||||
.stream()
|
||||
.filter(r -> r.getWidth() > 0f && r.getHeight() > 0f)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
List<Table> tables = new ArrayList<>();
|
||||
for (Rectangle area : spreadsheetAreas) {
|
||||
|
||||
List<Cell> overlappingCells = new ArrayList<>();
|
||||
for (Cell c : cells) {
|
||||
if (c.intersects(area)) {
|
||||
overlappingCells.add(c);
|
||||
}
|
||||
}
|
||||
tables.add(new Table(overlappingCells, area, page.getRotation()));
|
||||
}
|
||||
|
||||
for (Table table : tables) {
|
||||
int position = -1;
|
||||
|
||||
itty = page.getTextBlocks().iterator();
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer textBlock = (AbstractTextContainer) itty.next();
|
||||
if (table.contains(textBlock)) {
|
||||
if (position == -1) {
|
||||
position = page.getTextBlocks().indexOf(textBlock);
|
||||
}
|
||||
itty.remove();
|
||||
}
|
||||
}
|
||||
if (position != -1) {
|
||||
page.getTextBlocks().add(position, table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
List<Cell> cellsFound = new ArrayList<>();
|
||||
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
|
||||
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
|
||||
Collections.sort(intersectionPointsList, POINT_COMPARATOR);
|
||||
boolean doBreak;
|
||||
|
||||
for (int i = 0; i < intersectionPointsList.size(); i++) {
|
||||
Point2D topLeft = intersectionPointsList.get(i);
|
||||
Ruling[] hv = intersectionPoints.get(topLeft);
|
||||
doBreak = false;
|
||||
|
||||
// CrossingPointsDirectlyBelow( topLeft );
|
||||
List<Point2D> xPoints = new ArrayList<>();
|
||||
// CrossingPointsDirectlyToTheRight( topLeft );
|
||||
List<Point2D> yPoints = new ArrayList<>();
|
||||
|
||||
for (Point2D p : intersectionPointsList.subList(i, intersectionPointsList.size())) {
|
||||
if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) {
|
||||
xPoints.add(p);
|
||||
}
|
||||
if (p.getY() == topLeft.getY() && p.getX() > topLeft.getX()) {
|
||||
yPoints.add(p);
|
||||
}
|
||||
}
|
||||
outer:
|
||||
for (Point2D xPoint : xPoints) {
|
||||
if (doBreak) {
|
||||
break;
|
||||
}
|
||||
|
||||
// is there a vertical edge b/w topLeft and xPoint?
|
||||
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
|
||||
continue;
|
||||
}
|
||||
for (Point2D yPoint : yPoints) {
|
||||
// is there an horizontal edge b/w topLeft and yPoint ?
|
||||
if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
|
||||
continue;
|
||||
}
|
||||
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
|
||||
if (intersectionPoints.containsKey(btmRight)
|
||||
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
|
||||
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
|
||||
cellsFound.add(new Cell(topLeft, btmRight));
|
||||
doBreak = true;
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO create cells for vertical ruling lines with aligned endpoints at the top/bottom of a grid
|
||||
// that aren't connected with an horizontal ruler?
|
||||
// see: https://github.com/jazzido/tabula-extractor/issues/78#issuecomment-41481207
|
||||
|
||||
return cellsFound;
|
||||
}
|
||||
|
||||
|
||||
public List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
|
||||
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
|
||||
List<Rectangle> rectangles = new ArrayList<>();
|
||||
Set<Point2D> pointSet = new HashSet<>();
|
||||
Map<Point2D, Point2D> edgesH = new HashMap<>();
|
||||
Map<Point2D, Point2D> edgesV = new HashMap<>();
|
||||
int i = 0;
|
||||
|
||||
cells = new ArrayList<>(new HashSet<>(cells));
|
||||
|
||||
Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER);
|
||||
|
||||
for (Rectangle cell : cells) {
|
||||
for (Point2D pt : cell.getPoints()) {
|
||||
if (pointSet.contains(pt)) { // shared vertex, remove it
|
||||
pointSet.remove(pt);
|
||||
} else {
|
||||
pointSet.add(pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// X first sort
|
||||
List<Point2D> pointsSortX = new ArrayList<>(pointSet);
|
||||
Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR);
|
||||
// Y first sort
|
||||
List<Point2D> pointsSortY = new ArrayList<>(pointSet);
|
||||
Collections.sort(pointsSortY, POINT_COMPARATOR);
|
||||
|
||||
while (i < pointSet.size()) {
|
||||
float currY = (float) pointsSortY.get(i).getY();
|
||||
while (i < pointSet.size() && Utils.feq(pointsSortY.get(i).getY(), currY)) {
|
||||
edgesH.put(pointsSortY.get(i), pointsSortY.get(i + 1));
|
||||
edgesH.put(pointsSortY.get(i + 1), pointsSortY.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < pointSet.size()) {
|
||||
float currX = (float) pointsSortX.get(i).getX();
|
||||
while (i < pointSet.size() && Utils.feq(pointsSortX.get(i).getX(), currX)) {
|
||||
edgesV.put(pointsSortX.get(i), pointsSortX.get(i + 1));
|
||||
edgesV.put(pointsSortX.get(i + 1), pointsSortX.get(i));
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Get all the polygons
|
||||
List<List<PolygonVertex>> polygons = new ArrayList<>();
|
||||
Point2D nextVertex;
|
||||
while (!edgesH.isEmpty()) {
|
||||
ArrayList<PolygonVertex> polygon = new ArrayList<>();
|
||||
Point2D first = edgesH.keySet().iterator().next();
|
||||
polygon.add(new PolygonVertex(first, Direction.HORIZONTAL));
|
||||
edgesH.remove(first);
|
||||
|
||||
while (true) {
|
||||
PolygonVertex curr = polygon.get(polygon.size() - 1);
|
||||
PolygonVertex lastAddedVertex;
|
||||
if (curr.direction == Direction.HORIZONTAL) {
|
||||
nextVertex = edgesV.get(curr.point);
|
||||
edgesV.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL);
|
||||
polygon.add(lastAddedVertex);
|
||||
} else {
|
||||
nextVertex = edgesH.get(curr.point);
|
||||
edgesH.remove(curr.point);
|
||||
lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL);
|
||||
polygon.add(lastAddedVertex);
|
||||
}
|
||||
|
||||
if (lastAddedVertex.equals(polygon.get(0))) {
|
||||
// closed polygon
|
||||
polygon.remove(polygon.size() - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (PolygonVertex vertex : polygon) {
|
||||
edgesH.remove(vertex.point);
|
||||
edgesV.remove(vertex.point);
|
||||
}
|
||||
polygons.add(polygon);
|
||||
}
|
||||
|
||||
// calculate grid-aligned minimum area rectangles for each found polygon
|
||||
for (List<PolygonVertex> poly : polygons) {
|
||||
float top = java.lang.Float.MAX_VALUE;
|
||||
float left = java.lang.Float.MAX_VALUE;
|
||||
float bottom = java.lang.Float.MIN_VALUE;
|
||||
float right = java.lang.Float.MIN_VALUE;
|
||||
for (PolygonVertex pt : poly) {
|
||||
top = (float) Math.min(top, pt.point.getY());
|
||||
left = (float) Math.min(left, pt.point.getX());
|
||||
bottom = (float) Math.max(bottom, pt.point.getY());
|
||||
right = (float) Math.max(right, pt.point.getX());
|
||||
}
|
||||
rectangles.add(new Rectangle(top, left, right - left, bottom - top));
|
||||
}
|
||||
|
||||
return rectangles;
|
||||
}
|
||||
|
||||
|
||||
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D arg0, Point2D arg1) {
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
|
||||
if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
} else if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
private static final Comparator<Point2D> POINT_COMPARATOR = new Comparator<Point2D>() {
|
||||
@Override
|
||||
public int compare(Point2D arg0, Point2D arg1) {
|
||||
int rv = 0;
|
||||
float arg0X = Utils.round(arg0.getX(), 2);
|
||||
float arg0Y = Utils.round(arg0.getY(), 2);
|
||||
float arg1X = Utils.round(arg1.getX(), 2);
|
||||
float arg1Y = Utils.round(arg1.getY(), 2);
|
||||
|
||||
|
||||
if (arg0Y > arg1Y) {
|
||||
rv = 1;
|
||||
} else if (arg0Y < arg1Y) {
|
||||
rv = -1;
|
||||
} else if (arg0X > arg1X) {
|
||||
rv = 1;
|
||||
} else if (arg0X < arg1X) {
|
||||
rv = -1;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
private enum Direction {
|
||||
HORIZONTAL,
|
||||
VERTICAL
|
||||
}
|
||||
|
||||
static class PolygonVertex {
|
||||
Point2D point;
|
||||
Direction direction;
|
||||
|
||||
public PolygonVertex(Point2D point, Direction direction) {
|
||||
this.direction = direction;
|
||||
this.point = point;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (!(other instanceof PolygonVertex)) {
|
||||
return false;
|
||||
}
|
||||
return this.point.equals(((PolygonVertex) other).point);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.point.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* CohenSutherland.java
|
||||
* --------------------
|
||||
* (c) 2007 by Intevation GmbH
|
||||
*
|
||||
* @author Sascha L. Teichmann (teichmann@intevation.de)
|
||||
* @author Ludwig Reiter (ludwig@intevation.de)
|
||||
*
|
||||
* This program is free software under the LGPL (>=v2.1)
|
||||
* Read the file LICENSE.txt coming with the sources for details.
|
||||
*/
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
|
||||
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
/**
|
||||
* Implements the well known Cohen Sutherland line
|
||||
* clipping algorithm (line against clip rectangle).
|
||||
*/
|
||||
@SuppressWarnings("all")
|
||||
public final class CohenSutherlandClipping
|
||||
{
|
||||
private double xMin;
|
||||
private double yMin;
|
||||
private double xMax;
|
||||
private double yMax;
|
||||
|
||||
/**
|
||||
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
|
||||
*/
|
||||
public CohenSutherlandClipping() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a Cohen Sutherland clipper with the given clip rectangle.
|
||||
* @param clip the clip rectangle to use
|
||||
*/
|
||||
public CohenSutherlandClipping(Rectangle2D clip) {
|
||||
setClip(clip);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the clip rectangle.
|
||||
* @param clip the clip rectangle
|
||||
*/
|
||||
public void setClip(Rectangle2D clip) {
|
||||
xMin = clip.getX();
|
||||
xMax = xMin + clip.getWidth();
|
||||
yMin = clip.getY();
|
||||
yMax = yMin + clip.getHeight();
|
||||
}
|
||||
|
||||
private static final int INSIDE = 0;
|
||||
private static final int LEFT = 1;
|
||||
private static final int RIGHT = 2;
|
||||
private static final int BOTTOM = 4;
|
||||
private static final int TOP = 8;
|
||||
|
||||
private final int regionCode(double x, double y) {
|
||||
int code = x < xMin
|
||||
? LEFT
|
||||
: x > xMax
|
||||
? RIGHT
|
||||
: INSIDE;
|
||||
if (y < yMin) code |= BOTTOM;
|
||||
else if (y > yMax) code |= TOP;
|
||||
return code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clips a given line against the clip rectangle.
|
||||
* The modification (if needed) is done in place.
|
||||
* @param line the line to clip
|
||||
* @return true if line is clipped, false if line is
|
||||
* totally outside the clip rect.
|
||||
*/
|
||||
public boolean clip(Line2D.Float line) {
|
||||
|
||||
double p1x = line.getX1();
|
||||
double p1y = line.getY1();
|
||||
double p2x = line.getX2();
|
||||
double p2y = line.getY2();
|
||||
|
||||
double qx = 0d;
|
||||
double qy = 0d;
|
||||
|
||||
boolean vertical = p1x == p2x;
|
||||
|
||||
double slope = vertical
|
||||
? 0d
|
||||
: (p2y-p1y)/(p2x-p1x);
|
||||
|
||||
int c1 = regionCode(p1x, p1y);
|
||||
int c2 = regionCode(p2x, p2y);
|
||||
|
||||
while (c1 != INSIDE || c2 != INSIDE) {
|
||||
|
||||
if ((c1 & c2) != INSIDE)
|
||||
return false;
|
||||
|
||||
int c = c1 == INSIDE ? c2 : c1;
|
||||
|
||||
if ((c & LEFT) != INSIDE) {
|
||||
qx = xMin;
|
||||
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
|
||||
}
|
||||
else if ((c & RIGHT) != INSIDE) {
|
||||
qx = xMax;
|
||||
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
|
||||
}
|
||||
else if ((c & BOTTOM) != INSIDE) {
|
||||
qy = yMin;
|
||||
qx = vertical
|
||||
? p1x
|
||||
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
|
||||
}
|
||||
else if ((c & TOP) != INSIDE) {
|
||||
qy = yMax;
|
||||
qx = vertical
|
||||
? p1x
|
||||
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
|
||||
}
|
||||
|
||||
if (c == c1) {
|
||||
p1x = qx;
|
||||
p1y = qy;
|
||||
c1 = regionCode(p1x, p1y);
|
||||
}
|
||||
else {
|
||||
p2x = qx;
|
||||
p2y = qy;
|
||||
c2 = regionCode(p2x, p2y);
|
||||
}
|
||||
}
|
||||
line.setLine(p1x, p1y, p2x, p2y);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// end of file
|
||||
@ -0,0 +1,35 @@
|
||||
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@SuppressWarnings("all")
|
||||
public class Utils {
|
||||
|
||||
private final static float EPSILON = 0.1f;
|
||||
|
||||
public static boolean feq(double f1, double f2) {
|
||||
return (Math.abs(f1 - f2) < EPSILON);
|
||||
}
|
||||
|
||||
public static float round(double d, int decimalPlace) {
|
||||
BigDecimal bd = BigDecimal.valueOf(d);
|
||||
bd = bd.setScale(decimalPlace, BigDecimal.ROUND_HALF_UP);
|
||||
return bd.floatValue();
|
||||
}
|
||||
|
||||
public static <T> void sort(List<T> list, Comparator<? super T> comparator) {
|
||||
try {
|
||||
Collections.sort(list, comparator);
|
||||
} catch (IllegalArgumentException e){
|
||||
//TODO Figure out why this happens.
|
||||
log.warn(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,242 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.ADDRESS_CODE;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NAME_CODE;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NO_REDACTION_INDICATOR;
|
||||
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.VERTEBRATES_CODE;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
|
||||
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AnnotationHighlightService {
|
||||
|
||||
|
||||
public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction) throws IOException {
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
|
||||
PDPage pdPage = document.getPage(page - 1);
|
||||
|
||||
if (!flatRedaction) {
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||
|
||||
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
|
||||
|
||||
if (textBlock.getPage() != page) {
|
||||
continue;
|
||||
}
|
||||
if (textBlock instanceof TextBlock) {
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||
} else if (textBlock instanceof Table) {
|
||||
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTable((Table) textBlock, contentStream);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
contentStream.close();
|
||||
}
|
||||
|
||||
if (classifiedDoc.getEntities().get(page) == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (Entity entity : classifiedDoc.getEntities().get(page)) {
|
||||
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
|
||||
if (flatRedaction && !isRedactionType(entity)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (TextPositionSequence textPositions : entityPositionSequence.getSequences()) {
|
||||
|
||||
float height = textPositions.getTextPositions().get(0).getHeightDir() + 2;
|
||||
|
||||
float posXInit;
|
||||
float posXEnd;
|
||||
float posYInit;
|
||||
float posYEnd;
|
||||
float[] quadPoints;
|
||||
|
||||
if (textPositions.getTextPositions().get(0).getRotation() == 90) {
|
||||
|
||||
posXEnd = textPositions.getTextPositions().get(0).getYDirAdj() + 2;
|
||||
posXInit = textPositions.getTextPositions().get(0).getYDirAdj() - height;
|
||||
posYInit = textPositions.getTextPositions().get(0).getXDirAdj();
|
||||
posYEnd = textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getXDirAdj() - height + 2;
|
||||
|
||||
quadPoints = new float[]{posXInit, posYInit, posXInit, posYEnd + height + 2, posXEnd, posYInit, posXEnd, posYEnd + height + 2};
|
||||
} else {
|
||||
|
||||
posXInit = textPositions.getTextPositions().get(0).getXDirAdj();
|
||||
posXEnd = textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getXDirAdj() + textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getWidth() + 1;
|
||||
posYInit = textPositions.getTextPositions().get(0).getPageHeight() - textPositions.getTextPositions().get(0).getYDirAdj();
|
||||
posYEnd = textPositions.getTextPositions().get(0).getPageHeight() - textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getYDirAdj();
|
||||
quadPoints = new float[]{posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};
|
||||
}
|
||||
|
||||
|
||||
List<PDAnnotation> annotations = pdPage.getAnnotations();
|
||||
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
|
||||
highlight.constructAppearances();
|
||||
|
||||
PDRectangle position = new PDRectangle();
|
||||
position.setLowerLeftX(posXInit);
|
||||
position.setLowerLeftY(posYEnd);
|
||||
position.setUpperRightX(posXEnd);
|
||||
position.setUpperRightY(posYEnd + height);
|
||||
|
||||
highlight.setRectangle(position);
|
||||
if (!flatRedaction) {
|
||||
highlight.setAnnotationName(entityPositionSequence.getId().toString());
|
||||
highlight.setTitlePopup(entityPositionSequence.getId().toString());
|
||||
highlight.setContents(entity.getRedactionReason());
|
||||
}
|
||||
|
||||
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
|
||||
// of the area to be highlighted
|
||||
|
||||
highlight.setQuadPoints(quadPoints);
|
||||
|
||||
PDColor color;
|
||||
if (flatRedaction) {
|
||||
color = new PDColor(new float[]{0, 0, 0}, PDDeviceRGB.INSTANCE);
|
||||
} else {
|
||||
color = new PDColor(getColor(entity), PDDeviceRGB.INSTANCE);
|
||||
}
|
||||
|
||||
highlight.setColor(color);
|
||||
annotations.add(highlight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isRedactionType(Entity entity) {
|
||||
if (!entity.isRedaction()) {
|
||||
return false;
|
||||
}
|
||||
if (entity.getType().equals(ADDRESS_CODE)) {
|
||||
return true;
|
||||
}
|
||||
if (entity.getType().equals(NAME_CODE)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(Entity entity) {
|
||||
if (!entity.isRedaction()) {
|
||||
return new float[]{0.627f, 0.627f, 0.627f};
|
||||
}
|
||||
if (entity.getType().equals(VERTEBRATES_CODE)) {
|
||||
return new float[]{0, 1, 0};
|
||||
}
|
||||
if (entity.getType().equals(ADDRESS_CODE)) {
|
||||
return new float[]{0, 1, 1};
|
||||
}
|
||||
if (entity.getType().equals(NAME_CODE)) {
|
||||
return new float[]{1, 1, 0};
|
||||
}
|
||||
if (entity.getType().equals(NO_REDACTION_INDICATOR)) {
|
||||
return new float[]{1, 0.502f, 0};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
contentStream.setStrokingColor(Color.LIGHT_GRAY);
|
||||
contentStream.setLineWidth(0.5f);
|
||||
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
if (textBlock.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.DARK_GRAY);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
|
||||
|
||||
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
|
||||
|
||||
contentStream.showText(textBlock.getClassification());
|
||||
|
||||
contentStream.endText();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (cell != null) {
|
||||
contentStream.setLineWidth(0.5f);
|
||||
contentStream.setStrokingColor(Color.CYAN);
|
||||
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell.getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
// contentStream.setStrokingColor(Color.GREEN);
|
||||
// for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
// contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
// contentStream.stroke();
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (table.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.DARK_GRAY);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
|
||||
|
||||
contentStream.newLineAtOffset(table.getMinX(), table.getMinY());
|
||||
|
||||
contentStream.showText(table.getClassification());
|
||||
|
||||
contentStream.endText();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,68 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PdfFlattenService {
|
||||
|
||||
private final RedactionServiceSettings settings;
|
||||
|
||||
public PDDocument flattenPDF(PDDocument sourceDoc) throws IOException {
|
||||
|
||||
PDDocument destDoc = new PDDocument();
|
||||
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(sourceDoc);
|
||||
|
||||
final int pageCount = sourceDoc.getDocumentCatalog().getPages().getCount();
|
||||
|
||||
log.info(pageCount + " page" + (pageCount == 1 ? "" : "s") + " to flatten.");
|
||||
|
||||
for (int i = 0; i < pageCount; i += 1) {
|
||||
|
||||
log.info("Flattening page " + (i + 1) + " of " + pageCount + "...");
|
||||
|
||||
BufferedImage img = pdfRenderer.renderImageWithDPI(i, settings.getFlattenImageDpi(), ImageType.RGB);
|
||||
|
||||
log.info("Image rendered in memory (" + img.getWidth() + "x" + img.getHeight() + " " + settings.getFlattenImageDpi() + "DPI). Adding to PDF...");
|
||||
|
||||
PDPage imagePage = new PDPage(new PDRectangle(img.getWidth(), img.getHeight()));
|
||||
destDoc.addPage(imagePage);
|
||||
|
||||
PDImageXObject imgObj = LosslessFactory.createFromImage(destDoc, img);
|
||||
|
||||
PDPageContentStream imagePageContentStream = new PDPageContentStream(destDoc, imagePage);
|
||||
imagePageContentStream.drawImage(imgObj, 0, 0);
|
||||
|
||||
log.info("Image added successfully.");
|
||||
|
||||
imagePageContentStream.close();
|
||||
|
||||
img.flush();
|
||||
}
|
||||
|
||||
log.info("New flattened PDF created in memory.");
|
||||
|
||||
sourceDoc.close();
|
||||
|
||||
return destDoc;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,146 @@
|
||||
package com.iqser.red.service.redaction.v1.server.visualization.service;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PdfVisualisationService {
|
||||
|
||||
|
||||
public void visualizeParagraphs(Document classifiedDoc, PDDocument document) throws IOException {
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
|
||||
PDPage pdPage = document.getPage(page - 1);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
for(Paragraph paragraph : classifiedDoc.getParagraphs()) {
|
||||
|
||||
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
|
||||
|
||||
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
|
||||
|
||||
if (textBlock.getPage() != page) {
|
||||
continue;
|
||||
}
|
||||
if (textBlock instanceof TextBlock) {
|
||||
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||
} else if (textBlock instanceof Table) {
|
||||
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
|
||||
visualizeTable((Table) textBlock, contentStream);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
contentStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
|
||||
|
||||
for (int page = 1; page <= document.getNumberOfPages(); page++) {
|
||||
|
||||
Page analyzedPage = classifiedDoc.getPages().get(page - 1);
|
||||
|
||||
PDPage pdPage = document.getPage(page - 1);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
|
||||
|
||||
|
||||
for (AbstractTextContainer textBlock : analyzedPage.getTextBlocks()) {
|
||||
if (textBlock == null) {
|
||||
continue;
|
||||
}
|
||||
if (textBlock instanceof TextBlock) {
|
||||
visualizeTextBlock((TextBlock) textBlock, contentStream);
|
||||
} else if (textBlock instanceof Table) {
|
||||
visualizeTable((Table) textBlock, contentStream);
|
||||
}
|
||||
}
|
||||
|
||||
contentStream.setStrokingColor(Color.YELLOW);
|
||||
contentStream.addRect((float) analyzedPage.getBodyTextFrame().getX(), (float) analyzedPage.getBodyTextFrame().getY(), (float) analyzedPage.getBodyTextFrame().getWidth(), (float) analyzedPage.getBodyTextFrame().getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
contentStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException {
|
||||
|
||||
contentStream.setStrokingColor(Color.RED);
|
||||
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
if (textBlock.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 12f);
|
||||
|
||||
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
|
||||
|
||||
contentStream.showText(textBlock.getClassification());
|
||||
|
||||
contentStream.endText();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
|
||||
for (List<Cell> row : table.getRows()) {
|
||||
for (Cell cell : row) {
|
||||
|
||||
if (cell != null) {
|
||||
contentStream.setStrokingColor(Color.BLUE);
|
||||
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell.getHeight());
|
||||
contentStream.stroke();
|
||||
|
||||
contentStream.setStrokingColor(Color.GREEN);
|
||||
for (TextBlock textBlock : cell.getTextBlocks()) {
|
||||
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
|
||||
contentStream.stroke();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (table.getClassification() != null) {
|
||||
contentStream.beginText();
|
||||
|
||||
contentStream.setNonStrokingColor(Color.BLUE);
|
||||
contentStream.setFont(PDType1Font.TIMES_ROMAN, 12f);
|
||||
|
||||
contentStream.newLineAtOffset(table.getMinX(), table.getMinY());
|
||||
|
||||
contentStream.showText(table.getClassification());
|
||||
|
||||
contentStream.endText();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
info:
|
||||
description: Redaction Service Server V1
|
||||
|
||||
server:
|
||||
port: 8080
|
||||
|
||||
spring:
|
||||
profiles:
|
||||
active: kubernetes
|
||||
|
||||
platform.multi-tenancy:
|
||||
enabled: ${multitenancy.enabled:false}
|
||||
tenantFilter:
|
||||
urlPatterns: /redact
|
||||
urlPatternsToIgnore:
|
||||
|
||||
management:
|
||||
endpoint:
|
||||
metrics.enabled: ${monitoring.enabled:false}
|
||||
prometheus.enabled: ${monitoring.enabled:false}
|
||||
endpoints.web.exposure.include: prometheus, health
|
||||
metrics.export.prometheus.enabled: ${monitoring.enabled:false}
|
||||
@ -0,0 +1,3 @@
|
||||
===================================
|
||||
Redaction Service Server V1
|
||||
===================================
|
||||
@ -0,0 +1,11 @@
|
||||
spring:
|
||||
application:
|
||||
name: redaction-service-v1
|
||||
|
||||
|
||||
management:
|
||||
endpoints:
|
||||
web:
|
||||
base-path: /
|
||||
path-mapping:
|
||||
health: "health"
|
||||
@ -0,0 +1,3 @@
|
||||
In Vitro
|
||||
In vitro
|
||||
in vitro
|
||||
@ -0,0 +1,796 @@
|
||||
Aquatic BioSystems Inc, Fort Collins, Colorado, USA
|
||||
Aquatic BioSystems, Inc., Ft. Collins, Colorado, USA.
|
||||
Biological Research Laboratory (BRL), Füllinsdorf, Switzerland.
|
||||
Biological Serviced Section, Alderley Park, Macclesfield, Cheshire
|
||||
Harlan Laboratories Ltd., Itingen,
|
||||
Jealott’s Hill, International Research Station, Bracknell,
|
||||
Jealott’s Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom
|
||||
Jealott’s Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom.
|
||||
Obtained from P. Hohler, trout breeding station Zeiningen, CH-4314 Zeiningen, Switzerland
|
||||
P. Hohler, Forellenzucht Zeiningen, CH-4314 Zeiningen Switzerland
|
||||
P.Hohler trout breeding station Zeiningen, CH-4314 Zeiningen, Swit-zerland, and held in the test facility for more than 2 weeks
|
||||
RCC Biotechnology & Animal Breeding Division, Füllinsdorf,
|
||||
RCC Biotechnology & Animal Breeding Division, Füllinsdorf, Switzerland
|
||||
Sequani Limited, Ledbury, United Kingdom, BFI0274
|
||||
Springborn Laboratories Inc., 790 Main St., Wareham, Massachusetts, 02571-1075, USA.
|
||||
Syngenta, Jealott’s Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom
|
||||
adama max rudong 2014 - huifeng
|
||||
animal metabolism, dietary exposure, product safety, research and development, ciba-geigy limited, basle, switzerland
|
||||
aquatic bio systems, inc., fort collins, colorado.
|
||||
aquatic bioassay laboratory, baton rouge, louisiana
|
||||
arysta lifescience north america, llc, cary, nc, usa
|
||||
arysta lifescience sas, noguères, france
|
||||
bayer crop-science
|
||||
bayer crop-science ag
|
||||
bc potter, rosedean, woodhurst, cambridgeshire, england
|
||||
biospheric inc., rockville, usa
|
||||
birds obtained from m & m quail farm, 4090 campbell road, gillsville, ga 30543 u.s.a
|
||||
brixham environmental laboratory, astrazeneca uk limited, brixham, uk
|
||||
brixham environmental laboratory, brixham, uk
|
||||
brixham environmental laboratory, brixham, united kingdom
|
||||
brood stock maintained at springborn laboratories
|
||||
buffalo creek quail farm, po box 579, ellerbe, nc
|
||||
bybrook bass hatchery, connecticut
|
||||
c.i.t, miserey, france
|
||||
celsius property b.v., amsterdam, netherlands
|
||||
central toxicology laboratory
|
||||
central toxicology laboratory (ctl), cheshire, united kingdom
|
||||
central toxicology laboratory (ctl), cheshire, united kingdom, hr2464
|
||||
central toxicology laboratory, alderley park, macclesfield, cheshire uk
|
||||
centre international de toxicologie (c.i.t.), miserey, 27005 evreux, france
|
||||
charles river
|
||||
charles river (uk) limited
|
||||
charles river (uk) limited, margate, kent, ct9 4lt, england.
|
||||
charles river aquaria, margate, uk
|
||||
charles river breeding laboratories, raleigh, nc, usa
|
||||
charles river deutschland gmbh, stolzenseeweg 32-36, d-88353 kisslegg / germany
|
||||
charles river france
|
||||
charles river laboratories edinburgh ltd, tranent, eh33 2ne
|
||||
charles river laboratories edinburgh ltd, tranent, eh33 2ne, uk
|
||||
charles river laboratories france, bp 0109, f-69592 l’arbresle
|
||||
charles river laboratories, edinburgh, united kingdom
|
||||
charles river laboratories, edinburgh, united kingdom, 38674
|
||||
charles river laboratories, portage, mi
|
||||
charles river laboratories, raleigh, nc, usa
|
||||
charles river uk limited, margate, kent.
|
||||
charles river, 76410, saint-aubin-les-elbeuf, france
|
||||
cheshire, united kingdom,
|
||||
china agricultural university, no.2, yuan ming yuan west road, haidian district, beijing, 100193, p.r. china
|
||||
ciba-geigy agricultural division, 410 swing road, p.o. box 18300, greensboro, north carolina 27419
|
||||
ciba-geigy basel, oekotoxikologie, basel, switzerland, 953609
|
||||
ciba-geigy corp. environmental health centre, farmington, ct, usa.
|
||||
ciba-geigy corp., greensboro, us
|
||||
ciba-geigy corp., vero beach, us
|
||||
ciba-geigy corporation agricultural division, environmental health centre (ehc), 400 farmington avenue, farmington, ct 06032
|
||||
ciba-geigy limited, animal production unit, basle, switzerland.
|
||||
ciba-geigy limited, animal production unit, stein, switzerland.
|
||||
ciba-geigy limited, animal production, 4332 stein, switzerland
|
||||
ciba-geigy limited, basle, switzerland, toxicology ii. laboratories, animal facilities of toxicology ii. laboratories of residue analysis unit, agricultural division ciba-geigy limited, basle.
|
||||
ciba-geigy limited, metabolism and ecology department, r&d plant protection agricultural division, basle, switzerland
|
||||
ciba-geigy limited, plant protection division, ch-4002 basle, switzerland
|
||||
ciba-geigy limited, research and development department, product safety, safety evaluation, basle, switzerland.
|
||||
ciba-geigy limited, tierfarm, 4334 sisseln, switzerland
|
||||
ciba-geigy ltd. ch-4002 basle, switzerland
|
||||
ciba-geigy ltd., basel, switzerland
|
||||
ciba-geigy ltd., basel, switzerland,
|
||||
ciba-geigy ltd., basle, ch
|
||||
ciba-geigy ltd., genetic toxicology, basel, switzerland
|
||||
ciba-geigy,greensboro, united states
|
||||
citoxlab france
|
||||
covance laboratories inc.9200 leesburg pike, vienna, virginia 22182
|
||||
covance laboratories limited, harrogate, uk
|
||||
covance laboratories ltd., north yorkshire, uk.
|
||||
covance laboratories, harrogate, united kingdom
|
||||
cultures maintained at wildlife international ltd. laboratories
|
||||
division of toxicology, institute of environmental toxicology
|
||||
eba inc.
|
||||
eba inc., snow camp, usa
|
||||
eg&g bionomics
|
||||
epl inc., research triangle
|
||||
eurofins agroscience services chem sas, vergèze, france
|
||||
experimental toxicology, ciba-geigy limited, 4332 stein, switzerland
|
||||
fine organics limited, seal sands, middlesbrough ts2 1ub, uk
|
||||
genetic toxicology, novartis crop protection ag, ch-4002 basel, switzerland
|
||||
granja perrone, são bernardo do campo - sp – brazil
|
||||
harlan (ad zeist, the netherlands).
|
||||
harlan france, zi le malcourlet, 03800 gannat / france
|
||||
harlan laboratories b.v. kreuzelweg 53 5961 nm horst / the netherlands
|
||||
harlan laboratories b.v. postbus 6174 5960 ad horst / the netherlands
|
||||
harlan laboratories b.v., kreuzelweg 53, 5961 nm horst / the netherlands, postbus 6174, 5960 ad horst / the netherlands
|
||||
harlan laboratories ltd., itingen, switzerland, d24665
|
||||
harlan sprague dawley, inc., madison, wi.
|
||||
harlan uk, shaw’s farm, blackthorn, bicester, oxon, ox6 0tp
|
||||
harlan winkelmann gmbh, d-33178 borchen, germany
|
||||
hazleton wisconsin
|
||||
hazleton wisconsin, inc.
|
||||
hazleton wisconsin, inc., 3301 kinsman boulevard, madison, wisconsin
|
||||
houghton springs fish farm, dorset, uk
|
||||
huntingdon research centre ltd, cambridgeshire, england
|
||||
huntingdon research centre ltd., huntingdon, united kingdom
|
||||
huntingdon research centre ltd., p.o. box 2, huntingdon, cambridgeshire, pe18 6es, england
|
||||
ibc manufacturing co., memphis, tn, usa
|
||||
j. cole, the county game farms, ashford, kent, england
|
||||
jealott’s hill international, bracknell, berkshire, united kingdom
|
||||
jiangsu huifeng agrochemicals co. ltd.
|
||||
kleintierfarm madoerin ag, ch-4414 fuellinsdorf
|
||||
m & m quail farm, 4090 campbell road, gillsville, ga 30543, u.s.a.
|
||||
maryland exotic birds of pasadena, maryland usa
|
||||
max (rudong) chemical co ltd
|
||||
morse laboratories llc, 1525 fulton avenue, sacramento, ca 95825 usa
|
||||
mount lassen trout farms, california
|
||||
mr j. coles, the country game farms, ashford, kent, england.
|
||||
mt. lassen trout farm, rt. 5, box 36, red bluff, california 98080
|
||||
nichols rabbitry inc. ; lumberton, tx
|
||||
nichols rabbitry inc; lumberton, tx., us
|
||||
notox b.v., hertogenbosch, netherlands
|
||||
novartis crop protection ag, basel, switzerland ciba-geigy ltd., basel, switzerland
|
||||
novartis crop protection ag, product portfolio management, environmental safety, ecotoxicology, ch-4002 basel, switzerland
|
||||
organics limited, middlesbrough, united kingdom
|
||||
osage catfish./box 222/missouri 65065/usa
|
||||
osage catfisheries inc., lake road 54-56, route 4, box 1500, osage beach, mo65065, usa
|
||||
p. hohler / ch-4341 zeiningen, switzerland
|
||||
p. hohler, trout breeding station zeiningen, switzerland
|
||||
park, nc, usa
|
||||
plant protection division ciba-geigy limited basle, switzerland. genetic toxicology cibageigy limited basle, switzerland
|
||||
product safety laboratories, east brunswick, new jersey 08816-3206, usa
|
||||
product safety labs, east brunswick, usa
|
||||
rcc - biological research laboratories, füllinsdorf, switzerland,
|
||||
rcc cytotest cell research gmbh, rossdorf, germany
|
||||
rcc ltd, environmental chemistry & pharmanalytics, ch-4452 itingen / switzerland
|
||||
rcc ltd, itingen, switzerland
|
||||
rcc ltd, laboratory animal services, wölferstrasse 4, 4414 füllinsdorf, switzerland
|
||||
rcc ltd., itingen, switzerland,
|
||||
rcc ltd., itingen, switzerland, b18966, t009636-06
|
||||
rcc ltd., laboratory animal services, ch-4414 füllinsdorf, switzerland
|
||||
rcc ltd., toxicology, wölferstrasse 4, ch-4414 füllinsdorf, switzerland
|
||||
rcc ltd., zelgliweg 1, 4452 itingen, switzerland
|
||||
rcc, cytotest cell research gmbh (rcc-ccr), in den leppsteinwiesen19, 64380 rossdorf, germany
|
||||
research department, pharmaceuticals division, ciba-geigy corporation, 556 morris avenue, summit, new jersey 07901
|
||||
ricerca, inc., ohio, usa
|
||||
rodent breeding unit, alderley park, macclesfield, uk
|
||||
sequani limited, bromyard road, ledbury, herefordshire, hr8 1lh, united kingdom
|
||||
sequani limited, ledbury, united kingdom
|
||||
sequani limited, ledbury, united kingdom,
|
||||
sipcamadvan, durham, nc, usa
|
||||
smithers viscient, 790 main street, wareham, ma 02571-1037 usa
|
||||
smithers viscient, 790 main street, wareham, ma, usa
|
||||
smithers viscient, 790 main street, wareham, massachusetts 02571 usa
|
||||
smithers viscient, 790 main street, wareham, massachusetts 02571-1037, usa
|
||||
source tierfarm sisseln, switzerland
|
||||
southwest bio-labs, inc.401 n. 17th street, suite 11, las cruces, nm 88005 usa.
|
||||
spring creek trout hatchery, lewistown, montana, usa
|
||||
springborn (europe) ag, horn, switzerland
|
||||
springborn laboratories inc., wareham, usa
|
||||
springborn laboratories, inc. 790 main street wareham, massachusetts 02571
|
||||
springborn laboratories, inc. environmental sciences division, 790 main street, wareham, 02571, usa massachusetts
|
||||
springborn laboratories, inc.,
|
||||
springborn laboratories, inc., health and environmental sciences, 790 main street, wareham, massachusetts, 02571-1075, usa
|
||||
springborn life sciences inc.,
|
||||
springborn smithers laboratories, wareham, usa
|
||||
stillmeadow inc. study number 9062-05,
|
||||
stillmeadow inc., sugar land, united states,
|
||||
stillmeadow inc., sugarland tx, usa
|
||||
stillmeadow inc., sugarland tx, usa, 8065-04 8321-03
|
||||
stillmeadow, inc, 12852 park one drive, sugar land, tx 77478, us
|
||||
stillmeadow, inc., 12852 park one drive, sugar land, tx 77478, usa
|
||||
syngenta - jealott’s hill, bracknell, united kingdom
|
||||
syngenta -jealott’s hill international research centre, uk
|
||||
syngenta central toxicology laboratory, alderley park, macclesfield, cheshire, uk
|
||||
syngenta crop protection, llc, greensboro, nc, usa
|
||||
syngenta crop protection, llc, greensboro, usa
|
||||
syngenta crop protection, monthey, switzerland
|
||||
syngenta ctl, alderley park, macclesfield, cheshire, sk10 4tj, uk
|
||||
syngenta – jealott’s hill international, bracknell, berkshire, united kingdom
|
||||
syngenta, jealott’s hill, international research station, bracknell, rg42 6ey, united kingdom
|
||||
texas animal specialties, humble, tx
|
||||
texas animal specialties, humble, tx, us
|
||||
toxigeneticsinc. decatur, il, us
|
||||
uk. charles river
|
||||
veterinary health research pty ltd, nsw, australia
|
||||
vischim srl, c/o lewis & harrison, llc, washington, dc, usa
|
||||
vischim srl, milano, italy
|
||||
wil research laboratories, llc, 1407 george road.ashland, oh, usa
|
||||
wil research laboratories, llc, ashland, oh, usa
|
||||
wil research laboratories, llc, ashland, oh, usa,
|
||||
wil research, 1407 george road, ashland, oh, 44805-8946, usa
|
||||
wil research, llc, 1407 george road, ashland, oh 44805-8946, usa
|
||||
wildlife international a division of eag inc. 8598 commerce drive easton, md 21601
|
||||
wildlife international ltd. cultures, 8651 brooks drive, easton, maryland 21601
|
||||
wildlife international ltd., 8598 commerce drive, easton, maryland 21601, usa
|
||||
wildlife international ltd., 8598 commerce drive, maryland 21601, usa
|
||||
wildlife international ltd., easton md, usa
|
||||
wildlife international ltd., easton, maryland 21601, usa
|
||||
wildlife international ltd., easton, usa
|
||||
wildlife international ltd., maryland, us
|
||||
wildlife international ltd., maryland, usa
|
||||
wildlife international, 8598 commerce drive, easton, md 21601 usa
|
||||
wildlife international, a division of eag inc., 8598 commerce drive, easton, md 21601 usa
|
||||
wise d.r. & wise r.e., monkfield, bourn, cambridgeshire, england
|
||||
zeneca agrochemicals, jealott’s hill, united kingdom
|
||||
zentralinstitut fur versuchstierzucht gmbh, hannover, germany",
|
||||
Syngenta Ltd., Jealott’s Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK.
|
||||
Sequani Limited, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, UK.
|
||||
Harlan Cytotest Cell Research GmbH (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
Harlan Laboratories Ltd, Itingen, Switzerland.
|
||||
Bioassay Labor fuer biologische Analytik GmbH INF 515, 69120 Heidelberg, Germany
|
||||
Syngenta Crop Protection Ltd.
|
||||
Syngenta, Jealott’s Hill, Bracknell, United Kingdom
|
||||
Charles River Laboratories, Preclinical Services, Tranent (PCS-EDI) Edinburgh, EH33 2NE, UK
|
||||
CXR Biosciences, 2, James Lindsay Place, Dundee Technopole, Dundee, DD1 5JJ, Scotland, UK
|
||||
CiToxLAB Hungary Ltd. H-8200 Veszprém, Szabadságpuszta Hungary
|
||||
Charles River, Tranent, Edinburgh, EH33 2NE, UK
|
||||
Charles River Laboratories Edinburgh Ltd., Tranent, Edinburgh, EH33 2NE, UK
|
||||
BASF SE; Ludwigshafen/Rhein; Germany Fed.Rep.
|
||||
Leatherhead Food Research (LFR), Molecular Sciences Department, Randalls Road, Leatherhead, Surrey, KT22 7RY, UK
|
||||
Syngenta, Jealott’s Hill, Bracknell, United Kingdom
|
||||
Department of Veterinary & Biomedical Sciences, 101 Life Sciences Building, Penn State University, University Park, PA 16802, USA
|
||||
CiToxLAB Hungary Ltd., H-8200 Veszprém, Szabadságpuszta, Hungary
|
||||
SafePharm Laboratories Ltd, Shardlow Business Park, Shardlow, Derbyshire, UK
|
||||
Harlan Laboratories Ltd., Zelgliweg 1, 4452 Itingen, Switzerland
|
||||
RCC, Cytotest Cell Research GmbH (RCC-CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
Harlan, Cytotest Cell Research GmbH (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
Harlan Laboratories Ltd. Zelgliweg 1, CH-4452 Itingen / Switzerland
|
||||
Quotient Bioresearch (Rushden) Ltd., Pegasus Way, Crown Business Park, Rushden, Northamptonshire, NN10 6ER, UK
|
||||
Charles River Laboratories Edinburgh, Ltd., Elphinstone Research Centre, Tranent, East Lothian, EH33 2NE, United Kingdom
|
||||
CiToxLAB Hungary Ltd. H-8200 Veszprém, Szabadságpuszta, Hungary
|
||||
Harlan Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf Germany
|
||||
Charles River, Tranent, Edinburgh, EH32 2NE, UK
|
||||
Charles River Laboratories Edinburgh Ltd, Tranent, Edinburgh, EH33 2NE, UK
|
||||
Harlan Cytotest Cell Research GmbH, (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
Charles River UK Limited, Margate, Kent, UK
|
||||
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fuellinsdorf, Switzerland
|
||||
Charles River (UK) Ltd., Margate, Kent, CT9 4LT, England
|
||||
Charles River Ltd., Margate, Kent, United Kingdom
|
||||
Charles River UK Ltd, Manston Road, Margate, Kent CT9 4LT, England, UK
|
||||
Syngenta Crop Protection, Toxicology, 4332 Stein, Switzerland
|
||||
Safepharm Laboratories Limited, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, United Kingdom
|
||||
Sequani Ltd, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, United Kingdom
|
||||
Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, SK10 4TJ, UK
|
||||
Charles River UK
|
||||
Department of Veterinary & Biomedical Sciences, Penn State University
|
||||
Syngenta Ltd. Jealott’s Hill International Research, Bracknell, Berks RG42 6EY
|
||||
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
|
||||
Novartis Crop Protection AG, Toxicology, 4332 Stein, Switzerland
|
||||
BRL Biological Research Laboratories Ltd., Wölferstrasse 4, 4414 Füllinsdorf, Switzerland
|
||||
B&K Universal Ltd, Grimston, Aldbrough, Hull, HU11 4QE, East Yorkshire, UK
|
||||
B&K Universal Ltd, Grimston, Aldborough, Hull, UK
|
||||
Nunc GmbH & Co. KG, 65203 Wiesbaden, Germany
|
||||
Fluka, 89203 Neu-Ulm, Germany
|
||||
MERCK, 64293 Darmstadt, Germany
|
||||
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
|
||||
Animal Production, Novartis Pharma AG, 4332 Stein, Switzerland
|
||||
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fuellinsdorf, Switzerland.
|
||||
SYSTAT Software, Inc., 501, Canal Boulevard, Suite C, Richmond, CA 94804, USA
|
||||
Safepharm Laboratories Limited, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, United Kingdom
|
||||
Charles River (UK) Limited, Margate, Kent, CT9 4LT, England
|
||||
CXR Biosciences, 2 James Lindsay Place, Dundee Technopole, Dundee, DD1 5JJ, Scotland, UK
|
||||
Granja Perrone, São Bernardo do Campo - SP – Brazil
|
||||
Harlan Sprague-Dawley, Inc. Houston/Texas
|
||||
P. Hohler, trout breeding station Zeiningen, 4314 Zeiningen, Switzerland
|
||||
Spring Creek trout hatchery, Lewistown, Montana, USA
|
||||
Springborn laboratories culture facility
|
||||
Springborn culture
|
||||
University of Texas
|
||||
Institute for Plant Physiology, University of Göttingen, 37073 Göttingen, Germany
|
||||
Bayer CropScience AG, 40789 Monheim, Germany
|
||||
Koppert B. V. Berkel en Rodenrijs, Nederland
|
||||
Bio-Test Labor GmbH, Sagerheide, Germany
|
||||
Ciba-Geigy
|
||||
Ciba-Geigy Ltd.
|
||||
Harlan Laboratories Ltd., Itingen, Switzerland, D24643
|
||||
Springborn Laboratories Inc., Wareham, USA
|
||||
Springborn Laboratories (Europe) AG
|
||||
Syngenta Eurofins - GAB, Niefern Öschelbronn, Germany
|
||||
Syngenta Eurofins Agroscience Services EcoChem GmbH, N-Osch., Germany
|
||||
Novartis Crop Protection AG, Basel, CH
|
||||
Springborn (Europe) AG, Horn, Switzerland
|
||||
Springborn Smithers Laboratories (Europe) AG, Horn, Switzerland
|
||||
Syngenta Crop Protection AG, Basel, Switzerland
|
||||
GAB Biotechnologie GmbH, Niefern, Germany
|
||||
BioChem Agrar, Gerichshain, Germany
|
||||
AgroChemex Ltd, Manningtree, United Kingdom
|
||||
Ciba-Geigy Ltd., Basel, Switzerland
|
||||
Ciba-Geigy Muenchwilen AG, Muenchwilen, Switzerland
|
||||
Novartis Crop Protection Münchwilen AG, Münchwilen, Switzerland
|
||||
Novartis Crop Protection AG, Basel, Switzerland
|
||||
Ciba-Geigy Muenchwilen AG, Muenchwilen, Switzerland
|
||||
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
|
||||
Alderley Park
|
||||
Alderley Park Swiss
|
||||
Stillmeadow, Inc., 12852 Park One Drive, Sugar Land, TX 77478, USA
|
||||
Texas Animal Specialties, Humble, TX
|
||||
Nichols Rabbitry Inc. ; Lumberton, TX
|
||||
Charles River Laboratories., Wilmington, MA
|
||||
Charles River Laboratories Edinburgh Ltd., Elphinstone Research Centre, Tranent, East Lothian, EH33 2NE
|
||||
Syngenta Crop Protection, Monthey, Switzerland
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland
|
||||
Fine Organics Limited, Middlesbrough, United Kingdom
|
||||
Fine Organics Limited, Seal Sands, Middlesbrough TS2 1UB, UK
|
||||
Syngenta Crop Protection, Inc., Greensboro, USA
|
||||
Syngenta Technology & Projects, Huddersfield, United Kingdom
|
||||
Syngenta Biosciences Pvt. Ltd., Ilhas Goa, India
|
||||
Syngenta - Process Hazards Section, Huddersfield, United Kingdom
|
||||
Syngenta Walloon Agricultural Research Centre, Gembloux, Belgium , 21764
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland, 300052719
|
||||
Syngenta Crop Protection Münchwilen AG, Münchwilen, Switzerland, 109747
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland, 300073294
|
||||
Syngenta - Jealott’s Hill, Bracknell, United Kingdom RCC Ltd., Itingen, Switzerland, B18977, T003446-06
|
||||
Syngenta - Jealott’s Hill, Bracknell, United Kingdom RCC Ltd., Itingen, Switzerland, B18966, T009636-06
|
||||
RCC Cytotest Cell Research GmbH, Rossdorf, Germany, RCC 107662
|
||||
Syngenta Syngenta - Jealott’s Hill, Bracknell, United Kingdom,
|
||||
RCC Cytotest Cell Research GmbH, Rossdorf, Germany
|
||||
WIL Research Laboratories, LLC, Ashland, OH, USA
|
||||
Charles River Laboratories, Edinburgh, United Kingdom, 36955
|
||||
Syngenta Crop Protection AG, Basel, Switzerland Stillmeadow Inc., Sugarland TX, USA
|
||||
Novartis Crop Protection Inc., Greensboro, USA
|
||||
Syngenta - Jealott’s Hill, Bracknell, United Kingdom
|
||||
Eurofins - ADME Bioanalyses, Vergeze, France
|
||||
BioChem GmbH, Cunnersdorf, Germany
|
||||
Syngenta Syngenta Crop Protection, LLC, Greensboro, NC, USA
|
||||
Syngenta Eurofins Agroscience Services Chem SAS, Vergèze, France
|
||||
Syngenta Innovative Environmental Services, Witterswil, Switzerland
|
||||
Ricerca Biosciences, LLC, Concord, OH, USA
|
||||
Dr Knoell Consult GmbH, Mannheim, Germany
|
||||
RCC Umweltchemie GmbH & Co. KG, Rossdorf, Germany
|
||||
JSC International Ltd., Harrogate, United Kingdom
|
||||
Wildlife International Ltd., Easton, Maryland 21601, USA
|
||||
Syngenta Crop Protection, LLC, Greensboro, NC, USA
|
||||
Novartis - Greensboro, Greensboro, USA
|
||||
Smithers Viscient, 790 Main Street, Wareham, MA, USA
|
||||
Syngenta Cambridge Environmental Assessments, United Kingdom
|
||||
Ciba-Geigy Basel, Oekotoxikologie, Basel, Switzerland
|
||||
RCC Ltd., Itingen, Switzerland
|
||||
IBACON GmbH, Rossdorf, Germany
|
||||
Envigo Research Limited, Shardlow, UK
|
||||
Syngenta Crop Protection Münchwilen AG, Münchwilen, Switzerland
|
||||
Ciba-Geigy Münchwilen AG, Münchwilen, Switzerland
|
||||
Huntingdon Research Centre Ltd., Huntingdon, United Kingdom
|
||||
Syngenta Technology & Projects, Huddersfield, United Kingdom
|
||||
Harlan Laboratories Ltd., Shardlow, Derbyshire, UK
|
||||
Dr. Specht & Partner Chem. Laboratorien GmbH, Hamburg, Germany
|
||||
Institut Fresenius, Taunusstein, Germany
|
||||
Syngenta - Jealott’s Hill International, Bracknell, Berkshire, United Kingdom
|
||||
Ciba-Geigy Corp., Greensboro, USA
|
||||
CIP Chemisches Institut Pforzheim GmbH, Pforzheim, Germany
|
||||
Charles River Laboratories Edinburgh Ltd, Tranent, EH33 2NE, UK
|
||||
Hazleton Laboratories, Madison, USA
|
||||
Eurofins BioPharma, Planegg, Germany, 150556
|
||||
Syngenta Environ. Health Center, Farmington, USA
|
||||
Centre International de Toxicologie C.I.T., Evreux, France
|
||||
Toxalim, Research Centre in Food Toxicology, F- 31027 Toulouse, France
|
||||
Harlan Laboratories Ltd., Shardlow, Derbyshire, UK
|
||||
CRS GmbH GmbH, In den Leppsteinswies en 19, 64380 Rossdorf Germany
|
||||
Environ. Health Center, Farmington, USA
|
||||
Ciba-Geigy Corp., Summit, USA
|
||||
Ciba-Geigy Basel, Genetische Toxikologie, Basel, Switzerland
|
||||
Ciba-Geigy Ltd., Stein, Switzerland
|
||||
Novartis Crop Protection AG, Stein, Switzerland
|
||||
Central Toxicology Laboratory (CTL), Cheshire, United Kingdom
|
||||
Sequani Limited, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, United Kingdom
|
||||
Brixham Environmental Laboratory, Brixham, United Kingdom
|
||||
Springborn Smithers Laboratories, Horn, Switzerland
|
||||
Huntingdon Research Centre, Cambridgeshire, United Kingdom
|
||||
Mambo-Tox Ltd., Southampton, United Kingdom
|
||||
MITOX Consultants, Amsterdam, Netherlands
|
||||
Charles River Aquaria, Margate, UK
|
||||
Brixham Environmental Laboratory, Brixham, UK
|
||||
O.Keller, Mörschwil, CH
|
||||
Huntingdon Life Sciences Ltd., Huntingdon, UK
|
||||
BTL Bio-Test Labor GmbH, Sagerheide, Germany
|
||||
Mambo-Tox Ltd., Southampton, UK
|
||||
Mambo-Tox Ltd. 2 Venture Road, University Science Park, Southampton SO16 7NP, United Kingdom
|
||||
BioChem GmbH, Germany
|
||||
PK Nützlingszuchten, Welzheim, Germany
|
||||
BioChem agrar, Germany
|
||||
Sautter & Stepper, Ammerbuch, Germany
|
||||
Koppert, The Netherlands
|
||||
Kraut & Rubeen (Doris Haber), Zeilstraße 40, 64367 Mühltal-Frankenhausen, Germany
|
||||
Springborn Laboratories (Europe) AG, Seestrasse 21, CH-9326 Horn, Switzerland
|
||||
Biologische Bundesanstalt (BBA), Braunschweig, Germany
|
||||
Institut für Biologische Analytik und Consulting, IBACON GmbH, Arheilger Weg 17, 64380 Rossdorf, Germany
|
||||
Abandoned vineyard, Northern Italy
|
||||
Syngenta Limited, Cheshire, United Kingdom
|
||||
Agrochemex, Lawford, United Kingdom
|
||||
Staphyt, Inchy en Artois, France
|
||||
Dermal Technology Laboratory Ltd., Staffordshire, UK
|
||||
Ciba Agriculture, Whittlesford, United Kingdom
|
||||
Bayer Crop Science AG, Monheim, Germany
|
||||
tier3 solutions GmbH, Leichlingen, Germany
|
||||
Mambo-Tox. Ltd., Southampton, United Kingdom
|
||||
Syngenta Crop Protection AG, Stein, Switzerland
|
||||
Stillmeadow Inc, Sugar Land, TX 77478, US
|
||||
Texas Animal Specialities, Humble, TX, US
|
||||
CiToxLAB, 8200 Veszprem, Szabadsagpuszta, Hungary
|
||||
Syngenta Ltd, Jealott's Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, United Kingdom
|
||||
Stillmeadow, Inc, 12852 Park One Drive, Sugar Land
|
||||
Syngenta Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, UK
|
||||
Syngenta Limited, Alderley Park, Macclesfield, Cheshire, SK10 4TJ
|
||||
Nichols Rabbitry Inc; Lumberton, TX., US
|
||||
AgroChemex International Ltd, Aldhams Farm Research Station, Lawford, Essex, UK
|
||||
Ciba Agriculture, Whittlesford, Cambridge, UK
|
||||
Ricerca Inc., Department of Residue Analysis, Painesville OH, USA
|
||||
Staphyt, 23 rue de Moeuvres, F-62860 Inchy en Artois, France
|
||||
Dermal Technology Laboratory Ltd., Med IC4, Keele University Science and Business Park, Keele, Staffordshire, ST5 5NL, United Kingdom
|
||||
Tier3 solutions GmbH, Kolberger Strasse 61-63 51381 Leverkusen, Germany
|
||||
RCC Ltd, Environmental Chemistry & Pharmanalytics, CH-4452 Itingen / Switzerland
|
||||
GAB Biotechnologie GmbH & IFU Umweltanalytik GmbH, Niefern-Öschelbronn, Germany
|
||||
Biochem agrar, Germany
|
||||
Bienenfarm Kern GmbH, Am Rehbacher Anger 10, 04249 Leipzig, Germany
|
||||
Joaquin Cordero, Paseo de Colón No. 19, 41370 Cazalla (Sevilla), Spain
|
||||
Mambo-Tox Ltd, Southampton, UK
|
||||
GAB Biotechnologie GmbH & IFU Umweltanalytik GmbH, Niefern-Öschelbronn, Germany
|
||||
Innovative Environmental Services (IES), Benkenstrasse 260, 4108 Witterswil, Switzerland
|
||||
BioChem agrar GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
|
||||
RCC - Biological Research Laboratories, Füllinsdorf, Switzerland, 859442
|
||||
RCC Ltd., Toxicology, Wölferstrasse 4, CH-4414 Füllinsdorf, Switzerland
|
||||
RCC Ltd., Laboratory Animal Services, CH-4414 Füllinsdorf, Switzerland
|
||||
Charles River Laboratories France, BP 0109, F-69592 L’Arbresle
|
||||
Charles River Deutschland GmbH, Stolzenseeweg 32-36, D-88353 Kisslegg / Germany
|
||||
Syngenta CTL, Alderley Park, Macclesfield, Cheshire, SK10 4TJ, UK
|
||||
Harlan UK, Shaw’s Farm, Blackthorn, Bicester, Oxon, OX6 0TP
|
||||
Syngenta Central Toxicology Laboratory, UK
|
||||
RCC Ltd., Toxicology, Wölferstrasse 4, CH- 4414 Füllinsdorf, Switzerland
|
||||
RCC Ltd, Itingen, Switzerland
|
||||
P. Hohler, trout breeding station Zeiningen, Switzerland
|
||||
SAG, Institute for Plant Physiology, University of Göttingen, Germany
|
||||
GAB Biotechnologie GmbH, Niefern-Öschelbronn, Germany
|
||||
Beekeeper Mr. Berthold Nengel, Brückenstraße 12, 56348 Dahlheim, Germany
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland, CHMU140561
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland
|
||||
Sequani Limited, Ledbury, United Kingdom, BFI0516
|
||||
PTRL Europe, Ulm, Germany
|
||||
SGS Institut Fresenius GmbH, Taunusstein, Germany
|
||||
CEM Analytical Services Ltd (CEMAS) - Berkshire, UK
|
||||
PTRL Europe, Ulm, Germany
|
||||
Sequani Limited, Ledbury, United Kingdom
|
||||
SGS Institut Fresenius GmbH, Taunusstein, Germany
|
||||
CEM Analytical Services, UK
|
||||
Eurofins Agroscience Services Chem SAS, Vergà ̈ze, France
|
||||
Novartis Services AG, Basel, Switzerland
|
||||
BSL Bioservice Scientific, Planegg, Germany
|
||||
Envigo CRS GmbH, Rossdorf, Germany
|
||||
Envigo CRS GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
BASF Ltd., Ludwigshafen, Germany
|
||||
ALS Laboratory Group, Edmonton, Alberta, Canada
|
||||
Syngenta Crop Protection, Inc., Greensboro, USA
|
||||
ADME - Bioanalyses, Vergeze, France
|
||||
Battelle UK Ltd., Ongar, United Kingdom
|
||||
SGS Institut Fresenius GmbH
|
||||
Novartis Agro GmbH, Frankfurt, Germany
|
||||
Supervision & Test Center Pesticide Safety Evaluation, China
|
||||
T. R. Wilbury Laboratories, Inc., Marblehead, MA, USA
|
||||
CEMAS, North Ascot, United Kingdom
|
||||
EAG Laboratories PTRL Europe GmbH, Germany
|
||||
Syngenta Crop Protection Inc., USA
|
||||
Syngenta Crop Protection Inc., 410 Swing Road, Greensboro, NC 27409, USA
|
||||
Huntingdon Research Centre Ltd., UK
|
||||
Huntingdon Research Centre Ltd., England
|
||||
T.R. Wilbury Laboratories, Inc., USA
|
||||
Wildlife International Ltd., USA
|
||||
RCC Ltd, Switzerland
|
||||
RCC Ltd. Environmental Chemistry & Pharmanalytics Division CH-4452 Itingen/Switzerland
|
||||
Harlan Laboratories Ltd., Switzerland
|
||||
CIBA-GEIGY Ltd., Switzerland
|
||||
Syngenta Crop Protection AG, Basel , Switzerland
|
||||
Syngenta Crop Protection LLC, Greensboro, USA
|
||||
PTRL Europe GmbH, Helmholtzstr. 22, Science Park, Ulm, Germany
|
||||
PTRL Europe GmbH, Germany
|
||||
CEM Analytical Services Ltd (CEMAS), Imperial House, Oaklands Business Centre, Oaklands Park, Wokingham, Berkshire, RG41 2FD UK
|
||||
SGS INSTITUT FRESENIUS GmbH
|
||||
Syngenta Ltd, Jealott’s Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK
|
||||
Fraunhofer Institute for Molecular Biology and Applied Ecology, IME, Auf dem Aberg 1, 57392 Schmallenberg, Germany
|
||||
Eurofins Agroscience Services Chem SAS, 75B, Avenue du Pascalet, 30310 Vergèze, France
|
||||
Innovative Environmental Services (IES) Ltd, Benkenstrasse 260, 4108 Witterswil, Switzerland
|
||||
BSL Bioservice, Scientific Laboratories GmbH, Behringstrasse 6/8, 82152 Planegg, Germany
|
||||
RCC Ltd, Zelgliweg 1, CH-4452 Itingen, Switzerland
|
||||
RCC Ltd, Laboratory Animal Services, CH-4414 Fuellinsdorf
|
||||
Harlan Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
Ciba-Geigy Limited, Basel, Switzerland
|
||||
BASF SE, Experimental Toxicology and Ecology, 67056 Ludwigshafen, Germany
|
||||
Ciba-Geigy Limited, Animal Production, 4332 Stein, Switzerland
|
||||
RCC Ltd. Biotechnology & Animal Breeding Division, 4414 Füllinsdorf, Switzerland
|
||||
Syngenta Ltd. Jealott’s Hill International Research Centre, Bracknell, Berks RG42 6EY
|
||||
WIL Research Laboratories, LLC, 1407 George Road, Ashland, Ohio 44805-8946, USA
|
||||
Charles River Laboratories Inc., Kingston, New York, USA
|
||||
Syngenta, Jealott’s Hill International Research Centre, Bracknell, United Kingdom
|
||||
Battelle UK Ltd
|
||||
D.R. & R.E. Wise, Monkfield, Bourn, Cambridgeshire, England
|
||||
Wildlife International. 8598 Commerce Drive, Easton, MD 21601 USA
|
||||
Maryland Exotic Birds of Pasadena, MD 21122
|
||||
Mr D. R. Wise, Monkfield, Bourn, Cambridgeshire, England
|
||||
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire, CB23 4NN, UK
|
||||
J. Coles, The County Game Farms, Ashford, Kent, England
|
||||
Osage Catfisheries, MO 65 065, USA
|
||||
Supervision and Test Center for Pesticide Safety Evaluation and Quality Control, 600 Shenliao Road, Tiexi District, Shengyang 110141, Liaoning Province, P.R. China
|
||||
Syngenta, Jealott’s Hill International Research Centre, Bracknell, Berkshire, RG42 6EY
|
||||
Harlan Laboratories Ltd., 4452 Itingen, Switzerland
|
||||
Ciba-Geigy Ltd., Product Safety, Ecotoxicology, CH-4002 Basel, Switzerland
|
||||
P. Hohler, CH-4314 Zeiningen
|
||||
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire, CB23 4NN/UK
|
||||
Wildlife International, A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
|
||||
Novartis Crop Protection AG, Kanton Aargau, Switzerland.
|
||||
RCC Ltd, CH-4452 Itingen, Switzerland
|
||||
CEMAS, North Ascot, Berkshire, UK
|
||||
Wilbury Laboratories Inc, 40 Doaks Lane, Marblehead, Massachusetts
|
||||
P. Cummins Oyster Company, Pasadena, Maryland
|
||||
Harlan Laboratories Ltd, Zelgliweg 1, 4452 Itingen/Switzerland
|
||||
PK Nützlingszuchten, D-73642 Welzheim, Germany
|
||||
Institut für Biologische Analytik und Consulting IBACON GmbH Arheilger Weg 17, 64380 Rossdorf, Germany
|
||||
ABC Laboratories Inc., Analytical Chemistry and Field Services, 7200 E. ABC Lane, Columbia, Missouri
|
||||
Ciba-Geigy Corporation, Farmington, CT, USA
|
||||
Syngenta Ltd. Jealott’s Hill, Bracknell, United Kingdom
|
||||
Eurofins Agroscience Services EcoChem GmbH, N- Osch., Germany
|
||||
Ciba-Geigy Limited, Animal Production Unit, Basle, Switzerland
|
||||
Ciba-Geigy Limited, Basle, Switzerland
|
||||
Charles River Laboratories, Raleigh, NC, USA
|
||||
Charles River (UK) Limited
|
||||
Harlan Sprague Dawley, Inc., Madison, WI
|
||||
CIBA-GEIGY Limited, Animal Production, 4332 Stein, Switzerland
|
||||
CIBA-GEIGY Limited, 4332 Stein, Switzerland
|
||||
Kleintierfarm Madoerin AG, CH-4414 Fuellinsdorf
|
||||
CIBA-GEIGY Limited, Tierfarm, 4334 Sisseln, Switzerland
|
||||
Animal production, CIBA-GEIGY Limited, 4332 Stain/Switzerland
|
||||
Environmental Health Centre (EHC), 400 Farmington Avenue, Farmington, CT 06032
|
||||
Charles River Laboratories, Kingston, NY
|
||||
Harlan (Ad Zeist, the Netherlands)
|
||||
Animal Production CIBA-GEIGY Limited 4332 Stein / Switzerland
|
||||
Tierfarm, Sisseln, Switzerland
|
||||
Zen-tralinstitut fur Versuchstier-zucht GmbH, Hannover, Germany
|
||||
Charles River Laboratories, Portage, MI
|
||||
CIBA-GEIGY Limited, Basel, Switzerland
|
||||
Novartis Crop Protection AG, CH-4002 Basel, Switzerland
|
||||
RCC Ltd., Biotechnology and animal breeding division, Fullinsdorf, Switzerland
|
||||
Tierfarm Sisseln, Switzerland
|
||||
Charles River Breeding Laboratories, Raleigh, NC, USA
|
||||
Ciba-Geigy Corporation, Plant Protection Division, Environmental Health Center, 400 Farmington Avenue, Farmington, Connecticut 06032, USA
|
||||
Charles River Breeding Laboratories, Inc., Raleigh, North Carolina USA
|
||||
Charles River, 76410, Saint-Aubin-les-Elbeuf, France
|
||||
Charles River Laboratories, Inc., Raleigh, NC, USA
|
||||
WIL Research Laboratories, LLC, 1407 George Road, Ashland, OH 44805-8946 USA
|
||||
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fȕllinsdorf, Switzerland
|
||||
Alderley Park, Macclesfield, Cheshire UK
|
||||
Rodent Breeding Unit, Alderley Park, Macclesfield, UK
|
||||
Harlan Winkelmann GmbH, D-33178 Borchen, Germany
|
||||
WIL Research Laboratories, LLC, 1407 George Road.Ashland, OH 44805-8946 USA
|
||||
Centre d’Elevage Charles River
|
||||
CIBA-GEIGY Limited, Experimental Toxicology, 4332 Stein/Switzerland
|
||||
Centre International de Toxicologie (C.I.T.), Miserey, 27005 Evreux, France
|
||||
Centre Internationale de Toxicologie, Miserey, 27005 Evreux, France
|
||||
CIBA-GEIGY Limited, Basle, Switzerland
|
||||
Harlan Laboratories Ltd, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, UK
|
||||
Envigo CRS GmbH GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf Germany
|
||||
Ciba-Geigy Ltd., Genetic Toxicology, Basel, Switzerland
|
||||
Toxalim, Research Centre in Food Toxicology, F-31027 Toulouse, France
|
||||
Ciba-Geigy Corp, Plant Protection Division, Environmental Health Center, 400 Farmington Avenue, Farmington, Connecticut 06032, USA
|
||||
Charles River France
|
||||
Charles River US
|
||||
WIL Research, LLC, 1407 George Road, Ashland, OH 44805-8946, USA
|
||||
Novartis Crop Protection AG, Toxicology, 4332 Stein Switzerland
|
||||
Syngenta Crop Protection, Health Assessment 2 Stein, 4332 Stein, Switzerland
|
||||
RCC Ltd. Biotechnology and Animal Breeding Division, 4414 Füllinsdorf, Switzerland
|
||||
Genetic Toxicology, Novartis Crop Protection AG, CH-40002 Basel, Switzerland
|
||||
RCC - Cytotest Cell Research GmbH In den Leppsteinswiesen 19, D- 64380 Roβdorf, Germany
|
||||
RCC - Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, D-64380 Rofldorf, Germany
|
||||
Ciba-Geigy Limited, Animal production, 4332 Stein, Switzerland
|
||||
RCC Ltd., Zelgliweg 1, 4452 Itingen, Switzerland
|
||||
RCC Ltd, Laboratory Animal Services, Wölferstrasse 4, 4414 Füllinsdorf, Switzerland
|
||||
RCC Ltd, Laboratory Animal Services, 4414 Füllinsdorf, Switzerland
|
||||
CIBA-GEIGY Limited, Basle, Switzerland
|
||||
RCC Cytotest Cell Research GmbH (RCC-CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
|
||||
RCC Cytotest cell Research GmbH, In den Leppsteinwiesen 19, Rossdorf, Germany
|
||||
Centre International de Toxicologie (CIT)
|
||||
C iba-Geigy
|
||||
Ciba-Geigy, Greensboro, North Carolina
|
||||
Ciba-Geigy Corp., Greensboro, United States
|
||||
Ciba-Geigy Vero Beach Research Center, Florida, USA
|
||||
Ciba-Geigy Corporation, Environ. Health Center, Farmington, United States
|
||||
Ciba-Geigy GmbH, Frankfurt a.Main, Germany
|
||||
Ciba-Geigy Corp., Greensboro, United States
|
||||
Wise D.R. & Wise R.E., Monkfield, Bourn, Cambridgeshire, England
|
||||
Mr J. Coles, The Country Game Farms, Ashford, Kent, England
|
||||
Maryland Exotic Birds of Pasadena, Maryland USA
|
||||
J. Cole, The County Game Farms, Ashford, Kent, England
|
||||
BC Potter, Rosedean, Woodhurst, Cambridgeshire, England
|
||||
M & M Quail Farm, 4090 Campbell Road, Gillsville, GA 30543, U.S.A
|
||||
Wildlife International A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
|
||||
M & M Quail Farm, 4090 Campbell Road, Gillsville, GA 30543 U.S.A
|
||||
China Agricultural University, No.2, Yuan Ming Yuan West Road, Haidian District, Beijing, 100193, P.R. China
|
||||
Mt. Lassen Trout Farm, Rt. 5, Box 36, Red Bluff, California 98080
|
||||
Bybrook Bass Hatchery, Connecticut
|
||||
CIBA-GEIGY Ltd. CH-4002 Basle, Switzerland
|
||||
Wildlife International Ltd. Cultures, 8651 Brooks Drive, Easton, Maryland 21601
|
||||
Aquatic bioassay laboratory, Baton Rouge, Louisiana
|
||||
P. Hohler/ CH-4314 Zeiningen, Switzerland
|
||||
Houghton Springs Fish Farm, Dorset, UK
|
||||
Cultures maintained at Wildlife International Ltd. Laboratories
|
||||
Aquatic Bio Systems, Inc., Fort Collins, Colorado
|
||||
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571- 1037 USA
|
||||
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571 USA
|
||||
Springborn laboratories
|
||||
Syngenta Ltd. Jealott’s Hill International Research Centre Bracknell, Berkshire, RG42 6EY United Kingdom
|
||||
Wildlife International Ltd., Maryland, USA
|
||||
Wildlife International Ltd., Easton, USA
|
||||
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571 USA
|
||||
Brixham Environmental Laboratory, AstraZeneca UK Limited, Brixham, UK
|
||||
Springborn Laboratories Inc., Massachusetts 02571, USA
|
||||
Smithers Viscient, 790 Main Street, Wareham, MA 02571-1037, USA
|
||||
Wildlife International Ltd, Easton, MD, USA
|
||||
Wildlife International A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
|
||||
Smithers Viscient, 790 Main Street, Wareham, MA 02571-1037 USA
|
||||
Ciba-Geigy Corporation, Post Office Box 18300, Greensboro, NC 27419, USA
|
||||
Chesapeake Cultures, Hayes, Virginia
|
||||
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571-1037 USA
|
||||
University of Sheffield, UK
|
||||
Blue Frog Scientific Limited, Scott House, South St. Andrew Street, Edinburgh, EH2 2AZ, UK
|
||||
MBL Aquaculture, Sarasota, Florida
|
||||
Bayer AG (Pflanzenschutz Umweltforschung, Institut für Oekobiologie, D- 5090 Leverkusen)
|
||||
Pflanzenphysiologisches Institut University, Nikolausberger Weg 180, D-3400 Göttingen, Germany
|
||||
Envigo Research Limited Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, UK
|
||||
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571- 1037 USA
|
||||
Wildlife International Ltd., Easton, Maryland, USA
|
||||
David Francis, W.J. Mead Apiarist Supplies, Fowlmere, Cambridgshire
|
||||
RCC AG, Itingen, Switzerland
|
||||
Blades Biological Ltd, United Kingdom
|
||||
ECT Oekotoxikologie GmbH, Germany
|
||||
BioChem agrar, Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
|
||||
RCC Umweltchemie AG, P.O. Box, CH-4452 Itingen/BL, Switzerland
|
||||
RCC Ltd, Environmental Chemistry & Pharmanalytics Division, CH-4452 Itingen, Switzerland
|
||||
BioChem agrar Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6 04827 Gerichshain, Germany
|
||||
BioChem agrar, Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
|
||||
Pan-Agricultural Labs, Inc. 32380 Avenue 10 Madera, CA 93638 USA
|
||||
Syngenta AG. Basel. Switzerland
|
||||
Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH, Inhoffenstraße 7 B, 38124 Braunschweig, Germany
|
||||
CIBA-GEIGY Ltd., Product Safety, Ecotoxicology, CH-4002 Basel, Switzerland
|
||||
Springborn Smithers Laboratories 790 Main Street Wareham, MA 02571-1037
|
||||
Syngenta crop protection AG, Research Biological science, Disease control, Stein
|
||||
Syngenta Biosciences Pvt. Ltd., Ilhas Goa, India
|
||||
Syngenta Technology & Projects, Huddersfield, United Kingdom
|
||||
Stillmeadow. Inc.. 12852 Park One Drive. Sugar Land. TX 77478. USA
|
||||
Texas Animal Specialties. Humble. TX
|
||||
Nichols Rabbitry Inc. ; Lumberton. TX
|
||||
Charles River Laboratories.. Wilmington. MA
|
||||
Charles River Laboratories Edinburgh Ltd.. Elphinstone Research Centre. Tranent. East Lothian. EH33 2NE
|
||||
Charles River Laboratories, Edinburgh, United Kingdom
|
||||
tier3 solutions GmbH
|
||||
tier3 solutions GmbH, Kolberger Str. 61-63, 51381 Leverkusen, Germany
|
||||
Bayer CropScience AG
|
||||
Syngenta, Jealott’s Hill International Research Centre, UK
|
||||
Brixham Environmental Laboratory, Brixham, Devon, TQ5 8BA, UK
|
||||
MITOX Consultants Science Park 408, 1098XH Amsterdam, The Netherlands
|
||||
Eurofins Agrosciences Services EcoChem GmbH, Eutinger Str. 24, 75233 Niefern-Öschelbronn, Germany
|
||||
Mambo-Tox Ltd., 2 Venture Road, Chilworth Science Park, Southampton SO16 7NP, United Kingdom
|
||||
Biochem agrar GmbH, Gerichshain, Germany
|
||||
“W. Neudorff GmbH KG”, An der Mühle 3, D- 31860 Emmertal
|
||||
BioChem Agrar, Kupferstraβe 6, 04827 Gerichshain, Germany
|
||||
Bayer CropScience AG, Monheim
|
||||
BioChem agrar, Labor für biologische und chemische Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
|
||||
RIFCON GmbH, Hirschberg, Germany
|
||||
Dr K Thomae GMBH, Chemisch-pharmazeutische Fabrik, D-7950 Biberach, Riss
|
||||
Centre International de Toxicologie (C.I.T), Miserey, 27005 Evreux, France
|
||||
Centre d’Elevage Lebeau, 78950 Gambais, France
|
||||
CIBA-GEIGY Limited, Toxicology Services, Short-term Toxicology, 4332 Stein, Switzerland
|
||||
Ciba-Geigy Ltd., CH-4002, Basel, Switzerland
|
||||
Osage Catfish, Box 222, Missouri, USA
|
||||
Mambo-Tox Ltd., 2 Venture Road, University Science Park, Southampton, SO16 7NP
|
||||
Biologische Bundesanstalt (BBA), Berlin-Dahlem
|
||||
“Bayer CropScience AG” Monheim
|
||||
Zeneca Agrochemicals, Jealott’s Hill, United Kingdom
|
||||
Eurofins Agroscience Services Chem GmbH, Hamburg, Germany
|
||||
Harlan Cytotest Cell Research GmbH (Harlan CCR), Germany
|
||||
Smithers Viscient (ESG) Ltd, Harrogate, UK
|
||||
Covance Laboratories Limited, Harrogate, UK
|
||||
Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, UK
|
||||
Biological Services Section, Alderley Park, Macclesfield, Cheshire, UK
|
||||
Charles River
|
||||
Harlan Cytotest Cell Research GmBH, Rossdorf, Germany
|
||||
Syngenta Crop Protection, Inc., Greensboro, NC 27419, USA
|
||||
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire
|
||||
Central Toxicology Laboratory, Syngenta
|
||||
Harlan Laboratories Ltd. Zelgliweg,445 Itingen/Switzerland
|
||||
Tecsolve UK Ltd., Glendale Park, North Ascot, Berkshire
|
||||
Harlan Laboratories Ltd, Zelgliweg 1, 4452 Itingen, Switzerland
|
||||
Harlan Laboratories
|
||||
Katz Biotech AG, Baruth, Germany
|
||||
Mambo-Tox Ltd., 2 Venture Road, Chilworth Science Park, Southampton, SO16 7NP
|
||||
BioChem agrar, 04827 Gerichshain, Germany
|
||||
W. Neudorff, 31860 Emmerthal, Germany
|
||||
W. Neudorff GmbH KG, An der Mühle 3, 31860 Emmerthal, Germany
|
||||
BioChem agrar Labor für biologische und chemische Analytik GmbH, Kupferstraße 6 04827 Gerichshain, Germany
|
||||
“Biologische Bundesanstalt (BBA)”, Berlin-Dahlem
|
||||
BioChem agrar, Labor für biologische und chemische Analytik GmbH, Kupferstraβe 6, 04827 Gerichshain, Germany
|
||||
Syngenta Crop Protection, Münchwilen, Switzerland
|
||||
Ciba-Geigy Ltd., Basle, Switzerland
|
||||
Ciba-Geigy Corporation , Greensboro, NC, USA
|
||||
Ciba-Geigy Corp., Greensboro, NC, USA
|
||||
Nauchi, Shiraimachi, Inba-Gun, Chiba, Japan
|
||||
Animal Metabolism, Ciba-Geigy Ltd., Basle, Switzerland
|
||||
Hazleton Wisconsin, Inc. Madison, Wisconsin USA
|
||||
CiToxLAB Hungary Ltd, Szabadsagpuszta, Hungary
|
||||
Hazleton Wisconsin, Inc. Madison, Wis- consin USA
|
||||
Stillmeadow Inc., Sugar Land TX, USA
|
||||
Ciba-Geigy Corporation, Summit, NJ, USA
|
||||
Ciba-Geigy Corp., Environmental Health Center, Farmington, CT, USA
|
||||
Ciba-Geigy Limited, Pharmaceutical Division, 4002 Basel / Switzerland
|
||||
Ciba-Geigy Limited, Experimental Pathology, 4002 Basel/ Switzerland
|
||||
Ciba-Geigy Limited, Experimental Pathol- ogy, 4002 Basel / Switzerland
|
||||
Hazleton Wisconsin, Madison, WI, USA
|
||||
Ciba-Geigy Toxicology Services, ShortTerm Toxicology, 4332 Stein/ Switzerland
|
||||
Ciba-Geigy Limited, Experimental Pathology, 4002 Basel / Switzerland
|
||||
Hazleton Biotechnologies Company, Kensington, Maryland, USA
|
||||
Ciba-Geigy Limited, Genetic Toxicology, 4002 Basel / Switzerland
|
||||
Hazleton Washington, Inc., Vienna, Virginia 22182, USA
|
||||
Ciba-Geigy Limited, 4002 Basel / Switzerland
|
||||
Hazleton Raltech, Inc., a Subsidiary of Hazleton Laboratories America, Inc., Madison, Wisconsin, USA
|
||||
Experimental Pathology Laboratories, Research Triangle Park
|
||||
Toxicology/Cell Biology, Novartis Crop Protection Inc., Basel, Switzerland
|
||||
Toxigenics, Inc., Decatur, IL 62526, USA
|
||||
Argus Research Laboratories, Inc., Perkasie, PA, USA
|
||||
Argus Research Laboratories Inc., Horsham, Pennsylvania 19044, USA
|
||||
Ciba-Geigy Ltd.,Stein, Switzerland
|
||||
Ciba-Geigy Ltd., Genetic Toxicology, Basle, Switzerland
|
||||
Novartis Crop Protection AG, Stein, CH
|
||||
Safepharm Laboratories Ltd., Shadlow, United Kingdom
|
||||
Sandoz Agro Ltd., Department of Toxicology CH-4132 Muttenz, Switzerland
|
||||
Hazleton Washington, Inc. Vienna, Virginia, USA
|
||||
CXR Biosciences. Laboratory
|
||||
Ciba-Geigy Corp., Greensboro NC, USA
|
||||
Ciba-Geigy Ltd., Basel, CH
|
||||
Novartis Agro S.A., Aigues-Vives, F
|
||||
Ciba-Geigy SA, Rueil-Malmaison, F
|
||||
Novartis Agro S.A., Aigues-Vives, France
|
||||
Osage Catfisheries Inc., Osage Beach, Missouri 65065, USA
|
||||
Aquatic Biosystems Corvalis
|
||||
EPA, Corvalis, OR
|
||||
Ward’s Natural Science, ON
|
||||
Chilliwack Hatchery
|
||||
Sun Valley Trout Farm, Abbotsford BC
|
||||
Chilliwack Hatchery, BC
|
||||
P. Hohler, CH-4314 Zeiningen, Switzerland
|
||||
University of Sheffield , UK
|
||||
Wildlife International Ltd., Maryland, US
|
||||
Ciba-Geigy Ltd., Basle, CH
|
||||
Stillmeadow Inc., Sugar Land, United States
|
||||
Hazleton Wisconsin, Inc
|
||||
ToxigeneticsINc. Decatur, IL, US
|
||||
EG&G Bionomics
|
||||
Biospheric Inc., Rockville, USA
|
||||
Bionomics Aquatic Tox. Lab., Wareham, USA
|
||||
Springborn Laboratories Inc.
|
||||
Syngenta – Jealott’s Hill International, Bracknell, Berkshire, United Kingdom
|
||||
Wildlife International Ltd., Easton MD, USA
|
||||
Springborn Smithers Laboratories, Wareham, USA
|
||||
Springborn Life Sciences Inc
|
||||
Eg&G Bionomics (Fl), Pensacola, USA
|
||||
Harlan Laboratories Ltd., Itingen, Switzerland
|
||||
Solvias AG, Basel, Switzerland
|
||||
T.R. Wilbury Laboratories Inc., Massachusetts, USA
|
||||
Ciba-Geigy Ltd., Basle, CH
|
||||
Stillmeadow Inc., Sugar Land, TX, USA
|
||||
Syngenta Crop Protection, Munchwilen, Switzerland
|
||||
RCC - Biological Research Laboratories, Füllinsdorf, Switzerland
|
||||
Covance Laboratories, Harrogate, United Kingdom
|
||||
Battelle UK Ltd, Chelmsford, Essex, UK
|
||||
Zeneca Agrochemicals, Jealott’s Hill Research Station, Bracknell, Berkshire, UK
|
||||
Xenobiotic Laboratories, Inc., Plainsboro, USA
|
||||
Fraunhofer Institute, Schmallenberg, Germany
|
||||
PTRL West, Hercules CA, USA
|
||||
Eurofins Agroscience Services GmbH, Niefern-Öschel., Germany
|
||||
ICI Agrochemicals, Bracknell, Berkshire, United Kingdom
|
||||
Chemex International plc, Cambridge, United Kingdom
|
||||
BASF, Limburgerhof, Germany
|
||||
RIFCON, Leichlingen, Germany
|
||||
Eurofins - GAB, Niefern Öschelbronn, Germany
|
||||
River Thames, Maidenhead, Berkshire, UK
|
||||
Beach N o . 24, Hayling Island, Hampshire, UK
|
||||
Jealott’s Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK
|
||||
Zeneca Agrochemical s, Jealott’s Hill, United Kingdom
|
||||
Zeneca Agrochemicals, Jealott’s Hill, United Kingdom
|
||||
Jealott’s Hill Research Station. Syngenta Crop protection AG
|
||||
Bayer CropScience, Monheim, Germany
|
||||
Huntingdon Life Sciences Ltd., Huntingdon, United Kingdom
|
||||
Eurofins Agroscience Services EcoChem GmbH, N- Osch., Germany
|
||||
Eurofins Agroscience Services EcoChem GmbH, NOsch., Germany
|
||||
Tier3 solutions GmbH, Germany
|
||||
Syngenta Crop Protection AG
|
||||
Jealott’s Hill Research Centre. Syngenta Crop protection AG
|
||||
RCC Umweltchemie GmbH & Co KG
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,233 @@
|
||||
Vulpes vulpes
|
||||
african clawed frog
|
||||
agalychnis callidryas
|
||||
amphibian
|
||||
amphibians
|
||||
American bullfrog tadpole
|
||||
american toad
|
||||
anad platyrhynchos
|
||||
Anas platyrhynchos
|
||||
anuran
|
||||
anurans
|
||||
apodemus syl vaticus
|
||||
avian
|
||||
bird
|
||||
birds
|
||||
bluegill
|
||||
bluegill sunfish
|
||||
bobwhite
|
||||
bobwhite quail
|
||||
bullfrog
|
||||
Bufo americanus
|
||||
brachydanio rerio
|
||||
canary
|
||||
carassius carassius
|
||||
carp
|
||||
catfish
|
||||
cattle
|
||||
cattles
|
||||
channel catfish
|
||||
Chinook
|
||||
chicken
|
||||
Colinus virginianus
|
||||
colinus virginianus
|
||||
Common carp
|
||||
coturnix japonica
|
||||
Coturnix japonica
|
||||
cow
|
||||
cows
|
||||
Crucian carp
|
||||
cyprinodon variegatus
|
||||
cyprinus carpio
|
||||
dog
|
||||
dogs
|
||||
duck
|
||||
ducks
|
||||
fathead minnow
|
||||
fish
|
||||
fishes
|
||||
fox
|
||||
frog
|
||||
frogs
|
||||
fudulus heteroclitus
|
||||
fundulus heteroclitus
|
||||
galaxias maculatus
|
||||
galaxias truttaceus
|
||||
gasterosteus aculeatus
|
||||
goat
|
||||
goats
|
||||
guinea
|
||||
guinea pig
|
||||
guinea pigs
|
||||
Guppy
|
||||
hamster
|
||||
hamsters
|
||||
hen
|
||||
hens
|
||||
Hyla versicolor
|
||||
ictalurus melas
|
||||
ictalurus punctatus
|
||||
japanese quail
|
||||
japonica
|
||||
lebistes reticulatus
|
||||
leiostomus xanthurus
|
||||
leisostomus xanthurus
|
||||
lepomis macrochirus
|
||||
livestock
|
||||
livestocks
|
||||
mallard duck
|
||||
mammal
|
||||
mammals
|
||||
Mammalian
|
||||
mice
|
||||
midwestern anurans
|
||||
monkey
|
||||
mouse
|
||||
northern bobwhite
|
||||
o. mykiss
|
||||
Oncorhynchus mykiss
|
||||
Oncorhynchus
|
||||
O. mykiss
|
||||
oryzias melastigma
|
||||
oryzias melastigma larvae
|
||||
p. promelas
|
||||
pagrus major
|
||||
pig
|
||||
pigeon
|
||||
pigeons
|
||||
pimephales promelas
|
||||
Pseudacris triseriata
|
||||
poecilia reticulata
|
||||
poultry
|
||||
quail
|
||||
rabbit
|
||||
rabbits
|
||||
rainbow trout
|
||||
Rana limnocharis
|
||||
rana
|
||||
limnocharis
|
||||
rana pipiens
|
||||
rat
|
||||
rats
|
||||
reptile
|
||||
reptiles
|
||||
ricefish
|
||||
ruminant
|
||||
ruminants
|
||||
sheepshead minnow
|
||||
sheepshead minnows
|
||||
spea multiplicata
|
||||
Salmo gairdneri
|
||||
salmon
|
||||
spotted march frog
|
||||
tadpoles
|
||||
treefrog
|
||||
toad
|
||||
terrestrial vertrebrates
|
||||
Limnodynastes tasmaniensis
|
||||
trout
|
||||
Vulpes vulpes
|
||||
wistar
|
||||
xenopus laevis
|
||||
xenpous leavis
|
||||
zebra fish
|
||||
zebrafish
|
||||
Salmo gairdneri
|
||||
minnow
|
||||
minnows
|
||||
Pimephales promela
|
||||
Cyprinodon variegatus
|
||||
limnodynastes
|
||||
Rana catesbeiana
|
||||
R. catesbeiana
|
||||
coho salmon
|
||||
Oncorhynchus tshawytscha
|
||||
O. tshawytscha
|
||||
tshawytscha
|
||||
catesbeiana
|
||||
kisutch
|
||||
Pseudacris triseriata
|
||||
Pseudacris
|
||||
triseriata
|
||||
Wood pigeon
|
||||
Columba palumbus
|
||||
palumbus
|
||||
Columbidae
|
||||
shrew
|
||||
shrews
|
||||
bank vole
|
||||
common vole
|
||||
vole
|
||||
voles
|
||||
lagomorph
|
||||
Wood mouse
|
||||
Apodemus sylvaticus
|
||||
A. sylvaticus
|
||||
Apodemus flavicollis
|
||||
Apodemus
|
||||
mus musculus
|
||||
Microtus arvalis
|
||||
Microtus agrestis
|
||||
Microtus
|
||||
Arvicola terrestris
|
||||
Sorex araneus
|
||||
Myodes glareolus
|
||||
yellow-necked mouse
|
||||
house mouse
|
||||
Oryctolagus cuniculus
|
||||
marten
|
||||
martes
|
||||
white-toothed shrew
|
||||
greater white-toothed shrew
|
||||
Lepus europaeus
|
||||
brown hare
|
||||
European brown hare
|
||||
European rabbit
|
||||
O. cuniculus
|
||||
Crocidura russula
|
||||
Chinese Hamster
|
||||
Rat
|
||||
Rats
|
||||
Dog
|
||||
Chinese hamsters
|
||||
Chinese hamster
|
||||
Mouse
|
||||
Guinea pig
|
||||
Wistar rats
|
||||
Rabbit
|
||||
mammalian
|
||||
Japanese quail
|
||||
Microtus subterraneus
|
||||
Lepomis macrochirus
|
||||
P. promelas
|
||||
Cyprinus carpio
|
||||
Fish
|
||||
Ictalurus punctatus
|
||||
Carassius carassius
|
||||
Lepomis macrochirus
|
||||
Poecilia reticulata
|
||||
Lebistes reticulatus
|
||||
Lepomis macrochirus
|
||||
Leiostomus xanthurus
|
||||
Pimephales promelas
|
||||
Lepomis macrochirus
|
||||
Albino rat
|
||||
Hen
|
||||
Goat
|
||||
Livestock
|
||||
Guinea Pigs
|
||||
Hamster
|
||||
wood mouse
|
||||
Rabbits
|
||||
Mice
|
||||
Rainbow trout
|
||||
Canary
|
||||
Serinus canaria
|
||||
Guinea Pig
|
||||
Cow
|
||||
Pigs
|
||||
Poultry
|
||||
Guinea-pigs
|
||||
White rabbits
|
||||
Birds
|
||||
Wood mice
|
||||
@ -0,0 +1,66 @@
|
||||
package drools
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
|
||||
|
||||
global Section section
|
||||
|
||||
rule "0: Highlight Indicators"
|
||||
when
|
||||
eval(section.getEntities().isEmpty()==false);
|
||||
then
|
||||
section.highlightAll("VERTEBRATE");
|
||||
section.highlightAll("NO_REDACTION_INDICATOR");
|
||||
end
|
||||
|
||||
|
||||
rule "1: Redacted because Section contains Vertebrate"
|
||||
when
|
||||
eval(section.contains("VERTEBRATE")==true);
|
||||
then
|
||||
section.redact("NAME", 1, "Redacted because Section contains Vertebrate");
|
||||
section.redact("ADDRESS", 1, "Redacted because Section contains Vertebrate");
|
||||
end
|
||||
|
||||
|
||||
rule "2: Not Redacted because Section contains no Vertebrate"
|
||||
when
|
||||
eval(section.contains("VERTEBRATE")==false);
|
||||
then
|
||||
section.redactNot("NAME", 2, "Not Redacted because Section contains no Vertebrate");
|
||||
section.redactNot("ADDRESS", 2, "Not Redacted because Section contains no Vertebrate");
|
||||
end
|
||||
|
||||
|
||||
rule "3: Do not redact Names and Addresses if no redaction Indicator is contained"
|
||||
when
|
||||
eval(section.contains("VERTEBRATE")==true && section.contains("NO_REDACTION_INDICATOR")==true);
|
||||
then
|
||||
section.redactNot("NAME", 3, "Vertebrate was found, but also a no redaction indicator");
|
||||
section.redactNot("ADDRESS", 3, "Vertebrate was found, but also a no redaction indicator");
|
||||
end
|
||||
|
||||
|
||||
rule "4: Redact contact information, if applicant is found"
|
||||
when
|
||||
eval(section.getText().toLowerCase().contains("applicant"));
|
||||
then
|
||||
section.redactLineAfter("Name:", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
section.redactBetween("Address:", "Contact", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
section.redactLineAfter("Contact point:", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
section.redactLineAfter("Phone:", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
section.redactLineAfter("Fax:", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
section.redactLineAfter("E-mail:", "ADDRESS", 4, "Redacted because of Rule 4");
|
||||
end
|
||||
|
||||
|
||||
rule "5: Redact contact information, if 'Producer of the plant protection product' is found"
|
||||
when
|
||||
eval(section.getText().contains("Producer of the plant protection product"));
|
||||
then
|
||||
section.redactLineAfter("Name:", "ADDRESS", 5, "xxxx");
|
||||
section.redactBetween("Address:", "Contact", "ADDRESS", 5, "xxxx");
|
||||
section.redactBetween("Contact:", "Phone", "ADDRESS", 5, "xxxx");
|
||||
section.redactLineAfter("Phone:", "ADDRESS", 5, "xxxx");
|
||||
section.redactLineAfter("Fax:", "ADDRESS", 5, "xxxx");
|
||||
section.redactLineAfter("E-mail:", "ADDRESS", 5, "xxxx");
|
||||
end
|
||||
@ -0,0 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DummyTest {
|
||||
|
||||
@Test
|
||||
public void dummy(){
|
||||
System.out.println("Hello World");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,110 @@
|
||||
package com.iqser.red.service.redaction.v1.server;
|
||||
|
||||
|
||||
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.DEFINED_PORT;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
|
||||
|
||||
@Ignore
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest(webEnvironment = DEFINED_PORT)
|
||||
public class RedactionIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private RedactionController redactionController;
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void redactionTest() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
request.setFlatRedaction(false);
|
||||
|
||||
RedactionResult result = redactionController.redact(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void classificationTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
|
||||
RedactionResult result = redactionController.classify(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void sectionsTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
|
||||
RedactionResult result = redactionController.sections(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void htmlTablesTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void htmlTableRotationTest() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
|
||||
|
||||
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
|
||||
|
||||
RedactionResult result = redactionController.htmlTables(request);
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
|
||||
fileOutputStream.write(result.getDocument());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,14 @@
|
||||
|
||||
ribbon:
|
||||
ConnectTimeout: 600000
|
||||
ReadTimeout: 600000
|
||||
|
||||
|
||||
spring:
|
||||
main:
|
||||
allow-bean-definition-overriding: true
|
||||
|
||||
processing.kafkastreams: false
|
||||
|
||||
platform.multi-tenancy:
|
||||
enabled: false
|
||||
@ -0,0 +1,3 @@
|
||||
spring:
|
||||
application:
|
||||
name: pdf-redaction-service-v1
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user