RED-41: Move and adapt redaction-service

This commit is contained in:
deiflaender 2020-06-29 13:14:35 +02:00
parent 5c03fb876f
commit 67077bb73e
151 changed files with 1890083 additions and 0 deletions

12
CHANGELOG.md Normal file
View File

@ -0,0 +1,12 @@
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]
### Fixed
### Added
### Changed
### Removed

37
bamboo-specs/pom.xml Normal file
View File

@ -0,0 +1,37 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs-parent</artifactId>
<version>7.0.4</version>
<relativePath/>
</parent>
<artifactId>bamboo-specs</artifactId>
<version>1.0.0-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs-api</artifactId>
</dependency>
<dependency>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs</artifactId>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<!-- run 'mvn test' to perform offline validation of the plan -->
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
</project>

View File

@ -0,0 +1,137 @@
package buildjob;
import static com.atlassian.bamboo.specs.builders.task.TestParserTask.createJUnitParserTask;
import com.atlassian.bamboo.specs.api.BambooSpec;
import com.atlassian.bamboo.specs.api.builders.BambooKey;
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
import com.atlassian.bamboo.specs.api.builders.plan.Job;
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
import com.atlassian.bamboo.specs.api.builders.project.Project;
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
import com.atlassian.bamboo.specs.util.BambooServer;
/**
* Plan configuration for Bamboo.
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
*/
@BambooSpec
public class PlanSpec {
private static final String SERVICE_NAME = "redaction-service";
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-", "");
/**
* Run main to publish plan on Bamboo
*/
public static void main(final String[] args) throws Exception {
//By default credentials are read from the '.credentials' file.
BambooServer bambooServer = new BambooServer("http://localhost:8085");
Plan plan = new PlanSpec().createPlan();
bambooServer.publish(plan);
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
bambooServer.publish(planPermission);
}
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
Permissions permission = new Permissions()
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
.groupPermissions("gin4", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
.loggedInUserPermissions(PermissionType.VIEW)
.anonymousUserPermissionView();
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
}
private Project project() {
return new Project()
.name("RED")
.key(new BambooKey("RED"));
}
public Plan createPlan() {
return new Plan(
project(),
SERVICE_NAME, new BambooKey(SERVICE_KEY))
.description("Plan created from (enter repository url of your plan)")
.stages(new Stage("Default Stage")
.jobs(new Job("Default Job",
new BambooKey("JOB1"))
.tasks(
new ScriptTask()
.description("Clean")
.inlineBody("#!/bin/bash\n" +
"set -e\n" +
"rm -rf ./*"),
new VcsCheckoutTask()
.description("Checkout Default Repository")
.checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask()
.description("Clean")
.inlineBody("#!/bin/bash\n" +
"set -e\n" +
"rm -rf ./*"),
new VcsCheckoutTask()
.description("Checkout Default Repository")
.checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask()
.description("Build")
.inlineBody("#!/bin/bash\n" +
"set -e\n" +
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml versions:set -DnewVersion=${bamboo.version_tag}; fi\n" +
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean install -Djava.security.egd=file:/dev/./urandom; fi\n" +
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then ${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-v1/pom.xml --no-transfer-progress clean deploy -e -DdeployAtEnd=true -Dmaven.wagon.http.ssl.insecure=true -Dmaven.wagon.http.ssl.allowall=true -Dmaven.wagon.http.ssl.ignore.validity.dates=true -DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/red-platform-releases; fi\n" +
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml package\n" +
"${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn --no-transfer-progress -f ${bamboo_build_working_directory}/" + SERVICE_NAME + "-image-v1/pom.xml docker:push\n" +
"if [[ \"${bamboo.version_tag}\" = \"dev\" ]]; then echo \"gitTag=${bamboo.planRepository.1.branch}_${bamboo.buildNumber}\" > git.tag; fi\n" +
"if [[ \"${bamboo.version_tag}\" != \"dev\" ]]; then echo \"gitTag=${bamboo.version_tag}\" > git.tag; fi\n"),
createJUnitParserTask()
.description("Resultparser")
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
.enabled(true),
new InjectVariablesTask()
.description("Inject git Tag")
.path("git.tag")
.namespace("g")
.scope(InjectVariablesScope.LOCAL),
new VcsTagTask()
.description("${bamboo.g.gitTag}")
.tagName("${bamboo.g.gitTag}")
.defaultRepository())
.dockerConfiguration(
new DockerConfiguration()
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock")
)
)
)
.linkedRepositories("RED / " + SERVICE_NAME)
.triggers(new BitbucketServerTrigger())
.planBranchManagement(new PlanBranchManagement()
.createForVcsBranch()
.delete(new BranchCleanup()
.whenInactiveInRepositoryAfterDays(14))
.notificationForCommitters());
}
}

View File

@ -0,0 +1,17 @@
package buildjob;
import org.junit.Test;
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
public class PlanSpecTest {
@Test
public void checkYourPlanOffline() throws PropertiesValidationException {
Plan plan = new PlanSpec().createPlan();
EntityPropertiesBuilders.build(plan);
}
}

21
pom.xml Normal file
View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>bamboo-specs</module>
<module>redaction-service-v1</module>
<module>redaction-service-image-v1</module>
</modules>
</project>

View File

@ -0,0 +1,97 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>com.iqser.red</groupId>
<artifactId>platform-docker-dependency</artifactId>
<version>1.0.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service-image-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<properties>
<service.server>redaction-service-server-v1</service.server>
<platform.jar>${service.server}.jar</platform.jar>
<docker.skip.push>false</docker.skip.push>
<docker.image.name>${docker.image.prefix}/${service.server}</docker.image.name>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>download-platform-jar</id>
<phase>prepare-package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>${service.server}</artifactId>
<version>${project.version}</version>
<type>jar</type>
<overWrite>true</overWrite>
<destFileName>${platform.jar}</destFileName>
</dependency>
</artifactItems>
<outputDirectory>${docker.build.directory}</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<configuration>
<images>
<image>
<name>${docker.image.name}</name>
<build>
<dockerFileDir>${docker.build.directory}</dockerFileDir>
<args>
<PLATFORM_JAR>${platform.jar}</PLATFORM_JAR>
</args>
<tags>
<tag>${docker.image.version}</tag>
<tag>latest</tag>
</tags>
</build>
</image>
</images>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View File

@ -0,0 +1,9 @@
FROM gin5/platform-base:5.2.0
ARG PLATFORM_JAR
ENV PLATFORM_JAR ${PLATFORM_JAR}
ENV USES_ELASTICSEARCH false
COPY ["${PLATFORM_JAR}", "/"]

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>1.0.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<modules>
<module>redaction-service-api-v1</module>
<module>redaction-service-server-v1</module>
</modules>
<properties>
<pdfbox.version>2.0.16</pdfbox.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>1.0.0</version>
<scope>import</scope>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>redaction-service-api-v1</artifactId>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<optional>true</optional>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class RedactionRequest {
private byte[] document;
private boolean flatRedaction;
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class RedactionResult {
private byte[] document;
private int numberOfPages;
}

View File

@ -0,0 +1,31 @@
package com.iqser.red.service.redaction.v1.resources;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
public interface RedactionResource {
String SERVICE_NAME = "redaction-service-v1";
@PostMapping(value = "/redact", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult redact(@RequestBody RedactionRequest redactionRequest);
@PostMapping(value = "/debug/classifications", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult classify(@RequestBody RedactionRequest redactionRequest);
@PostMapping(value = "/debug/sections", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult sections(@RequestBody RedactionRequest redactionRequest);
@PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
@PostMapping(value = "/rules", produces = MediaType.APPLICATION_JSON_VALUE)
String getRules();
@PostMapping(value = "/rules/update", consumes = MediaType.APPLICATION_JSON_VALUE)
void updateRules(@RequestBody String rules);
}

View File

@ -0,0 +1,159 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>redaction-service-v1</artifactId>
<groupId>com.iqser.red.service</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<artifactId>redaction-service-server-v1</artifactId>
<properties>
<pdfbox.version>2.0.20</pdfbox.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-core</artifactId>
<version>7.37.0.Final</version>
</dependency>
<dependency>
<groupId>org.kie</groupId>
<artifactId>kie-spring</artifactId>
<version>7.37.0.Final</version>
</dependency>
<dependency>
<groupId>org.locationtech.jts</groupId>
<artifactId>jts-core</artifactId>
<version>1.16.1</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
<version>${project.version}</version>
</dependency>
<!-- commons -->
<dependency>
<groupId>com.iqser.gin4.commons</groupId>
<artifactId>spring-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.gin4.commons</groupId>
<artifactId>logging-commons</artifactId>
</dependency>
<dependency>
<groupId>com.iqser.gin4.commons</groupId>
<artifactId>metric-commons</artifactId>
</dependency>
<!-- other external -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
</dependency>
<!-- spring -->
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
</dependency>
<!-- ribbon must be included because transitive a dependency on it.-->
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-netflix-ribbon</artifactId>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.iqser.gin4.commons</groupId>
<artifactId>test-commons</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
</gitDescribe>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<id>original-jar</id>
<goals>
<goal>jar</goal>
</goals>
<configuration>
<classifier>original</classifier>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- repackages the generated jar into a runnable fat-jar and makes it
executable -->
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
<configuration>
<executable>true</executable>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,44 @@
package com.iqser.red.service.redaction.v1.server;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.kie.internal.io.ResourceFactory;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.security.servlet.SecurityAutoConfiguration;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
import com.iqser.gin4.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
@Import({DefaultWebMvcConfiguration.class})
@EnableConfigurationProperties(RedactionServiceSettings.class)
@SpringBootApplication(exclude = { SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class })
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
private static final String drlFile = "drools/rules.drl";
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
kieFileSystem.write(ResourceFactory.newClassPathResource(drlFile));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
}

View File

@ -0,0 +1,26 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class Document {
private List<Page> pages = new ArrayList<>();
private List<Paragraph> paragraphs = new ArrayList<>();
private Map<Integer, Set<Entity>> entities = new HashMap<>();
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter= new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter= new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private boolean headlines;
}

View File

@ -0,0 +1,75 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.Getter;
public class FloatFrequencyCounter
{
@Getter
Map<Float, Integer> countPerValue = new HashMap<>();
public void add(float value){
if(!countPerValue.containsKey(value)){
countPerValue.put(value, 1);
} else {
countPerValue.put(value, countPerValue.get(value) + 1);
}
}
public void addAll(Map<Float, Integer> otherCounter){
for(Map.Entry<Float, Integer> entry: otherCounter.entrySet()){
if(countPerValue.containsKey(entry.getKey())){
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
} else {
countPerValue.put(entry.getKey(), entry.getValue());
}
}
}
public Float getMostPopular(){
Map.Entry<Float, Integer> mostPopular = null;
for(Map.Entry<Float, Integer> entry: countPerValue.entrySet()){
if(mostPopular == null){
mostPopular = entry;
} else if(entry.getValue() >= mostPopular.getValue()){
mostPopular = entry;
}
}
return mostPopular != null ? mostPopular.getKey() : null;
}
public List<Float> getHighterThanMostPopular(){
Float mostPopular = getMostPopular();
List<Float> higher = new ArrayList<>();
for(Float value: countPerValue.keySet()){
if(value > mostPopular){
higher.add(value);
}
}
return higher.stream().sorted(Collections.reverseOrder()).collect(Collectors.toList());
}
public Float getHighest(){
Float highest = null;
for(Float value: countPerValue.keySet()){
if (highest == null){
highest = value;
} else if(value > highest){
highest = value;
}
}
return highest;
}
}

View File

@ -0,0 +1,35 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class Page {
@NonNull
private List<AbstractTextContainer> textBlocks;
private Rectangle bodyTextFrame;
private boolean landscape;
private int rotation;
private int pageNumber;
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
public boolean isRotated() {
return rotation != 0;
}
}

View File

@ -0,0 +1,41 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
public class Paragraph {
private List<AbstractTextContainer> pageBlocks = new ArrayList<>();
public SearchableText getSearchableText(){
SearchableText searchableText = new SearchableText();
pageBlocks.forEach(block -> {
if(block instanceof TextBlock){
searchableText.addAll(((TextBlock) block).getSequences());
}
});
return searchableText;
}
public List<Table> getTables(){
List<Table> tables = new ArrayList<>();
pageBlocks.forEach(block -> {
if(block instanceof Table){
tables.add((Table) block);
}
});
return tables;
}
}

View File

@ -0,0 +1,47 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
/**
*
*/
public class StringFrequencyCounter {
@Getter
Map<String, Integer> countPerValue = new HashMap<>();
public void add(String value){
if(!countPerValue.containsKey(value)){
countPerValue.put(value, 1);
} else {
countPerValue.put(value, countPerValue.get(value) + 1);
}
}
public void addAll(Map<String, Integer> otherCounter){
for(Map.Entry<String, Integer> entry: otherCounter.entrySet()){
if(countPerValue.containsKey(entry.getKey())){
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey())+ entry.getValue());
} else {
countPerValue.put(entry.getKey(), entry.getValue());
}
}
}
public String getMostPopular(){
Map.Entry<String, Integer> mostPopular = null;
for(Map.Entry<String, Integer> entry: countPerValue.entrySet()){
if(mostPopular == null){
mostPopular = entry;
} else if(entry.getValue() > mostPopular.getValue()){
mostPopular = entry;
}
}
return mostPopular != null ? mostPopular.getKey() : null;
}
}

View File

@ -0,0 +1,148 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@AllArgsConstructor
@Builder
@Data
public class TextBlock extends AbstractTextContainer {
@Builder.Default
private List<TextPositionSequence> sequences = new ArrayList<>();
private int rotation;
private String mostPopularWordFont;
private String mostPopularWordStyle;
private float mostPopularWordFontSize;
private float mostPopularWordHeight;
private float mostPopularWordSpaceWidth;
private float highestFontSize;
private String classification;
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
this.minX = minX;
this.maxX = maxX;
this.minY = minY;
this.maxY = maxY;
this.sequences = sequences;
this.rotation = rotation;
}
public TextBlock union(TextPositionSequence r) {
TextBlock union = this.copy();
union.add(r);
return union;
}
public TextBlock union(TextBlock r) {
TextBlock union = this.copy();
union.add(r);
return union;
}
public void add(TextBlock r) {
if (r.getMinX() < minX) {
minX = r.getMinX();
}
if (r.getMaxX() > maxX) {
maxX = r.getMaxX();
}
if (r.getMinY() < minY) {
minY = r.getMinY();
}
if (r.getMaxY() > maxY) {
maxY = r.getMaxY();
}
sequences.addAll(r.getSequences());
}
public void add(TextPositionSequence r) {
if (r.getX1() < minX) {
minX = r.getX1();
}
if (r.getX2() > maxX) {
maxX = r.getX2();
}
if (r.getY1() < minY) {
minY = r.getY1();
}
if (r.getY2() > maxY) {
maxY = r.getY2();
}
}
public TextBlock copy() {
return new TextBlock(minX, maxX, minY, maxY, sequences, rotation);
}
public void resize(float x1, float y1, float width, float height) {
set(x1, y1, x1 + width, y1 + height);
}
public void set(float x1, float y1, float x2, float y2) {
this.minX = Math.min(x1, x2);
this.maxX = Math.max(x1, x2);
this.minY = Math.min(y1, y2);
this.maxY = Math.max(y1, y2);
}
public float getHeight() {
return maxY - minY;
}
public float getWidth() {
return maxX - minX;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < sequences.size(); i++) {
String sequenceAsString = sequences.get(i).toString();
// Fix for missing Whitespace. This is recognized in getSequences method. See PDFTextStripper Line 1730.
if (i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) != ' ' && sequenceAsString.charAt(0) != ' ') {
builder.append(' ');
}
builder.append(sequenceAsString);
}
return builder.toString();
}
@Override
public String getText() {
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString());
}
}

View File

@ -0,0 +1,215 @@
package com.iqser.red.service.redaction.v1.server.classification.service;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@Service
@SuppressWarnings("all")
public class BlockificationService {
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
List<TextPositionSequence> chunkWords = new ArrayList<>();
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
TextPositionSequence prev = null;
for (TextPositionSequence word : textPositions) {
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
boolean startFromTop = word.getY1() > maxY + word.getHeight();
if (prev != null &&
(lineSeparation
|| startFromTop
|| word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines)
|| word.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|| word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|| word.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines)
)) {
TextBlock cb1 = buildTextBlock(chunkWords);
chunkBlockList1.add(cb1);
chunkWords = new ArrayList<>();
minX = 1000;
maxX = 0;
minY = 1000;
maxY = 0;
prev = null;
}
chunkWords.add(word);
prev = word;
if (word.getX1() < minX) {
minX = word.getX1();
}
if (word.getX2() > maxX) {
maxX = word.getX2();
}
if (word.getY1() < minY) {
minY = word.getY1();
}
if (word.getY2() > maxY) {
maxY = word.getY2();
}
}
TextBlock cb1 = buildTextBlock(chunkWords);
if (cb1 != null) {
chunkBlockList1.add(cb1);
}
return new Page(chunkBlockList1);
}
private TextBlock buildTextBlock(List<TextPositionSequence> wordBlockList) {
TextBlock textBlock = null;
FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
FloatFrequencyCounter fontSizeFrequencyCounter = new FloatFrequencyCounter();
FloatFrequencyCounter spaceFrequencyCounter = new FloatFrequencyCounter();
StringFrequencyCounter fontFrequencyCounter = new StringFrequencyCounter();
StringFrequencyCounter styleFrequencyCounter = new StringFrequencyCounter();
for (TextPositionSequence wordBlock : wordBlockList) {
lineHeightFrequencyCounter.add(wordBlock.getTextHeight());
fontSizeFrequencyCounter.add(wordBlock.getFontSize());
spaceFrequencyCounter.add(wordBlock.getSpaceWidth());
fontFrequencyCounter.add(wordBlock.getFont());
styleFrequencyCounter.add(wordBlock.getFontStyle());
if (textBlock == null) {
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
} else {
TextBlock spatialEntity = textBlock.union(wordBlock);
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
spatialEntity.getWidth(), spatialEntity.getHeight());
}
}
if (textBlock != null) {
textBlock.setMostPopularWordFont(fontFrequencyCounter.getMostPopular());
textBlock.setMostPopularWordStyle(styleFrequencyCounter.getMostPopular());
textBlock.setMostPopularWordFontSize(fontSizeFrequencyCounter.getMostPopular());
textBlock.setMostPopularWordHeight(lineHeightFrequencyCounter.getMostPopular());
textBlock.setMostPopularWordSpaceWidth(spaceFrequencyCounter.getMostPopular());
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
}
return textBlock;
}
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
for (Ruling ruling : rulingLines) {
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
return true;
}
}
return false;
}
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
float minX = 10000;
float maxX = -100;
float minY = 10000;
float maxY = -100;
for (Page page : pages) {
if (page.getTextBlocks().isEmpty() || landscape != page.isLandscape()) {
continue;
}
for (AbstractTextContainer container : page.getTextBlocks()) {
if (container instanceof TextBlock) {
TextBlock textBlock = (TextBlock) container;
if (textBlock.getMostPopularWordFont() == null || textBlock.getMostPopularWordStyle() == null) {
continue;
}
float approxLineCount = PositionUtils.getApproxLineCount(textBlock);
if (approxLineCount < 2.9f) {
continue;
}
if (documentFontSizeCounter.getMostPopular() != null) {
if (textBlock.getMostPopularWordFontSize() >= documentFontSizeCounter.getMostPopular()) {
if (textBlock.getMinX() < minX) {
minX = textBlock.getMinX();
}
if (textBlock.getMaxX() > maxX) {
maxX = textBlock.getMaxX();
}
if (textBlock.getMinY() < minY) {
minY = textBlock.getMinY();
}
if (textBlock.getMaxY() > maxY) {
maxY = textBlock.getMaxY();
}
}
}
}
if (container instanceof Table) {
Table table = (Table) container;
for (List<Cell> row : table.getRows()) {
for (Cell column : row) {
if (column == null || column.getTextBlocks() == null) {
continue;
}
for (TextBlock textBlock : column.getTextBlocks()) {
if (textBlock.getMinX() < minX) {
minX = textBlock.getMinX();
}
if (textBlock.getMaxX() > maxX) {
maxX = textBlock.getMaxX();
}
if (textBlock.getMinY() < minY) {
minY = textBlock.getMinY();
}
if (textBlock.getMaxY() > maxY) {
maxY = textBlock.getMaxY();
}
}
}
}
}
}
}
return new Rectangle(minY, minX, maxX - minX, maxY - minY);
}
}

View File

@ -0,0 +1,91 @@
package com.iqser.red.service.redaction.v1.server.classification.service;
import java.util.List;
import java.util.regex.Pattern;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class ClassificationService {
private final BlockificationService blockificationService;
public void classifyDocument(Document document) {
Rectangle bodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), false);
Rectangle landscapeBodyTextFrame = blockificationService.calculateBodyTextFrame(document.getPages(), document.getFontSizeCounter(), true);
List<Float> headlineFontSizes = document.getFontSizeCounter().getHighterThanMostPopular();
System.out.println(document.getFontSizeCounter().getCountPerValue());
for (Page page : document.getPages()) {
Rectangle btf = page.isLandscape() ? landscapeBodyTextFrame : bodyTextFrame;
page.setBodyTextFrame(btf);
classifyPage(btf, page, document, headlineFontSizes);
}
}
public void classifyPage(Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
for (AbstractTextContainer textBlock : page.getTextBlocks()) {
if (textBlock instanceof TextBlock) {
classifyBlock((TextBlock) textBlock, bodyTextFrame, page, document, headlineFontSizes);
}
}
}
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
if (document.getFontSizeCounter().getMostPopular() == null) {
// TODO Figure out why this happens.
return;
}
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.isRotated()) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
textBlock.setClassification("Header");
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock) && (document.getFontSizeCounter().getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
textBlock.setClassification("Footer");
} else if (page.getPageNumber() == 1
&& (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame, textBlock)
&& PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks().size() == 1)) {
if (!Pattern.matches("[0-9]+", textBlock.toString())) {
textBlock.setClassification("Title");
}
}
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && textBlock.getMostPopularWordStyle().equals("bold")) {
for (int i = 1; i <= headlineFontSizes.size(); i++) {
if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
textBlock.setClassification("H " + i);
document.setHeadlines(true);
}
}
}else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
textBlock.setClassification("H " + (headlineFontSizes.size() + 1));
document.setHeadlines(true);
}
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
textBlock.setClassification("TextBlock Bold");
}
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont().equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle().equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
textBlock.setClassification("TextBlock");
}
else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter().getMostPopular().equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
textBlock.setClassification("TextBlock Italic");
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getSequences().size() > 3){
textBlock.setClassification("TextBlock Unknown");
}
}
}

View File

@ -0,0 +1,95 @@
package com.iqser.red.service.redaction.v1.server.classification.utils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.experimental.UtilityClass;
@UtilityClass
@SuppressWarnings("all")
public class PositionUtils {
public boolean isWithinBodyTextFrame(Rectangle btf, TextBlock textBlock) {
//TODO Currently this is not working for rotated pages.
if (btf == null || textBlock == null) {
return false;
}
double threshold = textBlock.getMostPopularWordHeight() * 3;
if (textBlock.getMinX() + threshold > btf.getX() &&
textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() &&
textBlock.getMinY() + threshold > btf.getY() &&
textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
return true;
} else {
return false;
}
}
public boolean isOverBodyTextFrame(Rectangle btf, TextBlock textBlock, boolean rotated) {
if (btf == null || textBlock == null) {
return false;
}
if (rotated && textBlock.getMinX() < btf.getX()) {
// Its very strange, P{0,0} is on top left in this case, instead of lower left.
return true;
} else if (!rotated && textBlock.getMinY() > btf.getY() + btf.getHeight()) {
return true;
} else {
return false;
}
}
public boolean isUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
//TODO Currently this is not working for rotated pages.
if (btf == null || textBlock == null) {
return false;
}
if (textBlock.getMaxY() < btf.getY()) {
return true;
} else {
return false;
}
}
public boolean isTouchingUnderBodyTextFrame(Rectangle btf, TextBlock textBlock) {
//TODO Currently this is not working for rotated pages.
if (btf == null || textBlock == null) {
return false;
}
if (textBlock.getMinY() < btf.getY()) {
return true;
} else {
return false;
}
}
public float getHeightDifferenceBetweenChunkWordAndDocumentWord(TextBlock textBlock, Float documentMostPopularWordHeight) {
return textBlock.getMostPopularWordHeight() - documentMostPopularWordHeight;
}
public Float getApproxLineCount(TextBlock textBlock) {
return textBlock.getHeight() / textBlock.getMostPopularWordHeight();
}
}

View File

@ -0,0 +1,137 @@
package com.iqser.red.service.redaction.v1.server.controller;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.EntityRedactionService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.visualization.service.AnnotationHighlightService;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfFlattenService;
import com.iqser.red.service.redaction.v1.server.visualization.service.PdfVisualisationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@RestController
@RequiredArgsConstructor
public class RedactionController implements RedactionResource {
private final PdfVisualisationService pdfVisualisationService;
private final PdfSegmentationService pdfSegmentationService;
private final AnnotationHighlightService annotationHighlightService;
private final EntityRedactionService entityRedactionService;
private final PdfFlattenService pdfFlattenService;
private final DroolsExecutionService droolsExecutionService;
public RedactionResult redact(@RequestBody RedactionRequest redactionRequest) {
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
pdDocument.setAllSecurityToBeRemoved(true);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
entityRedactionService.processDocument(classifiedDoc);
annotationHighlightService.highlight(pdDocument, classifiedDoc, redactionRequest.isFlatRedaction());
if (redactionRequest.isFlatRedaction()) {
PDDocument flatDocument = pdfFlattenService.flattenPDF(pdDocument);
return convert(flatDocument, classifiedDoc.getPages().size());
}
return convert(pdDocument, classifiedDoc.getPages().size());
} catch (IOException e) {
throw new RedactionException(e);
}
}
public RedactionResult classify(@RequestBody RedactionRequest pdfSegmentationRequest) {
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(pdfSegmentationRequest.getDocument()))) {
pdDocument.setAllSecurityToBeRemoved(true);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
pdfVisualisationService.visualizeClassifications(classifiedDoc, pdDocument);
return convert(pdDocument, classifiedDoc.getPages().size());
} catch (IOException e) {
throw new RedactionException(e);
}
}
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
pdDocument.setAllSecurityToBeRemoved(true);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
pdfVisualisationService.visualizeParagraphs(classifiedDoc, pdDocument);
return convert(pdDocument, classifiedDoc.getPages().size());
} catch (IOException e) {
throw new RedactionException(e);
}
}
public RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest) {
try (PDDocument pdDocument = PDDocument.load(new ByteArrayInputStream(redactionRequest.getDocument()))) {
pdDocument.setAllSecurityToBeRemoved(true);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdDocument);
StringBuilder sb = new StringBuilder();
for (Page page : classifiedDoc.getPages()) {
for (AbstractTextContainer textContainer : page.getTextBlocks()) {
if (textContainer instanceof Table) {
Table table = (Table) textContainer;
sb.append(table.getTextAsHtml()).append("<br />").append("<br />");
}
}
}
return RedactionResult.builder().document(sb.toString().getBytes()).build();
} catch (IOException e) {
throw new RedactionException(e);
}
}
public String getRules() {
return droolsExecutionService.getRules();
}
public void updateRules(@RequestBody String rules) {
droolsExecutionService.updateRules(rules);
}
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
document.save(byteArrayOutputStream);
return RedactionResult.builder()
.document(byteArrayOutputStream.toByteArray())
.numberOfPages(numberOfPages)
.build();
}
}
}

View File

@ -0,0 +1,13 @@
package com.iqser.red.service.redaction.v1.server.exception;
public class RedactionException extends RuntimeException {
public RedactionException(Throwable cause) {
super("Could not parse document", cause);
}
public RedactionException() {
super("Could not parse document");
}
}

View File

@ -0,0 +1,256 @@
package com.iqser.red.service.redaction.v1.server.parsing;
import java.awt.geom.Point2D;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetNonStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorN;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingColorSpace;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceCMYKColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceGrayColor;
import org.apache.pdfbox.contentstream.operator.color.SetStrokingDeviceRGBColor;
import org.apache.pdfbox.contentstream.operator.state.SetFlatness;
import org.apache.pdfbox.contentstream.operator.state.SetLineCapStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineDashPattern;
import org.apache.pdfbox.contentstream.operator.state.SetLineJoinStyle;
import org.apache.pdfbox.contentstream.operator.state.SetLineMiterLimit;
import org.apache.pdfbox.contentstream.operator.state.SetLineWidth;
import org.apache.pdfbox.contentstream.operator.state.SetRenderingIntent;
import org.apache.pdfbox.contentstream.operator.text.SetFontAndSize;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class PDFLinesTextStripper extends PDFTextStripper {
@Getter
private float minCharWidth = Float.MAX_VALUE;
@Getter
private float minCharHeight = Float.MAX_VALUE;
@Getter
private final List<TextPositionSequence> textPositionSequences = new ArrayList<>();
@Getter
private final List<Ruling> rulings = new ArrayList<>();
private final List<Ruling> graphicsPath = new ArrayList<>();
private float path_x;
private float path_y;
@Setter
private int pageNumber;
public PDFLinesTextStripper() throws IOException {
super();
this.addOperator(new SetStrokingColorSpace());
this.addOperator(new SetNonStrokingColorSpace());
this.addOperator(new SetLineDashPattern());
this.addOperator(new SetStrokingDeviceGrayColor());
this.addOperator(new SetNonStrokingDeviceGrayColor());
this.addOperator(new SetFlatness());
this.addOperator(new SetLineJoinStyle());
this.addOperator(new SetLineCapStyle());
this.addOperator(new SetStrokingDeviceCMYKColor());
this.addOperator(new SetNonStrokingDeviceCMYKColor());
this.addOperator(new SetLineMiterLimit());
this.addOperator(new SetStrokingDeviceRGBColor());
this.addOperator(new SetNonStrokingDeviceRGBColor());
this.addOperator(new SetRenderingIntent());
this.addOperator(new SetStrokingColor());
this.addOperator(new SetNonStrokingColor());
this.addOperator(new SetStrokingColorN());
this.addOperator(new SetNonStrokingColorN());
this.addOperator(new SetFontAndSize());
this.addOperator(new SetLineWidth());
}
@Override
protected void processOperator(Operator operator, List<COSBase> arguments)
throws IOException {
String operation = operator.getName();
//move
switch (operation) {
case OperatorName.MOVE_TO:
if (arguments.size() == 2) {
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
path_x = (float) pos.getX();
path_y = (float) pos.getY();
}
break;
//line
case OperatorName.LINE_TO:
if (arguments.size() == 2) {
Point2D.Float pos = transformPosition(floatValue(arguments.get(0)), floatValue(arguments.get(1)));
// The direction of vertical lines must always be from bottom to top for the table extraction algorithm.
if (pos.getY() > path_y) {
graphicsPath.add(new Ruling(new Point2D.Float(path_x, path_y), new Point2D.Float((float) pos.getX(), (float) pos.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float(path_x, (float) pos.getY()), new Point2D.Float((float) pos.getX(), path_y)));
}
path_x = (float) pos.getX();
path_y = (float) pos.getY();
}
break;
//rectangle
case OperatorName.APPEND_RECT:
if (arguments.size() == 4) {
float x = floatValue(arguments.get(0));
float y = floatValue(arguments.get(1));
float width = floatValue(arguments.get(2));
float height = floatValue(arguments.get(3));
Point2D p1 = transformPosition(x, y);
Point2D p2 = transformPosition(x + width, y + height);
// Horizontal lines
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
// Vertical lines, direction must always be from bottom to top for the table extraction algorithm.
if (p2.getY() > p1.getY()) {
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p1.getY()), new Point2D.Float((float) p2.getX(), (float) p2.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float((float) p2.getX(), (float) p2.getY()), new Point2D.Float((float) p2.getX(), (float) p1.getY())));
}
if (p2.getY() > p1.getY()) {
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p1.getY()), new Point2D.Float((float) p1.getX(), (float) p2.getY())));
} else {
graphicsPath.add(new Ruling(new Point2D.Float((float) p1.getX(), (float) p2.getY()), new Point2D.Float((float) p1.getX(), (float) p1.getY())));
}
}
break;
//fill
case OperatorName.FILL_NON_ZERO:
case OperatorName.LEGACY_FILL_NON_ZERO:
case OperatorName.FILL_EVEN_ODD:
addVisibleRulings(graphicsPath, false);
graphicsPath.clear();
break;
//stroke
case OperatorName.STROKE_PATH:
addVisibleRulings(graphicsPath, true);
graphicsPath.clear();
break;
//cancel path
case OperatorName.ENDPATH:
graphicsPath.clear();
break;
}
super.processOperator(operator, arguments);
}
private float floatValue(COSBase value) {
if (value instanceof COSNumber) {
return ((COSNumber) value).floatValue();
} else {
return 0;
}
}
private Point2D.Float transformPosition(float x, float y) {
return super.transformedPoint(x, y);
}
private void addVisibleRulings(List<Ruling> path, boolean stroke) throws IOException {
try {
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor().toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
rulings.addAll(path);
}
} catch (UnsupportedOperationException e) {
log.error("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor().getColorSpace().getName() + " does not support toRGB");
}
}
@Override
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
int startIndex = 0;
for (int i = 0; i <= textPositions.size() - 1; i++) {
minCharWidth = Math.min(minCharWidth, textPositions.get(i).getWidthDirAdj());
minCharHeight = Math.min(minCharHeight, textPositions.get(i).getHeightDir());
if (i == 0 && textPositions.get(i).getUnicode().equals(" ")) {
startIndex++;
continue;
}
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && textPositions.get(i).getUnicode().equals(" ") && i <= textPositions.size() - 2) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i + 1;
}
}
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
if (!sublist.isEmpty() && sublist.get(sublist.size() - 1).getUnicode().equals(" ")) {
sublist = sublist.subList(0, sublist.size() - 1);
}
if (!(sublist.isEmpty() || sublist.size() == 1 && sublist.get(0).getUnicode().equals(" "))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
super.writeString(text);
}
@Override
public String getText(PDDocument doc) throws IOException {
minCharWidth = Float.MAX_VALUE;
minCharHeight = Float.MAX_VALUE;
textPositionSequences.clear();
rulings.clear();
graphicsPath.clear();
path_x = 0.0f;
path_y = 0.0f;
return super.getText(doc);
}
}

View File

@ -0,0 +1,22 @@
package com.iqser.red.service.redaction.v1.server.parsing.model;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class ParsedElements {
private List<TextPositionSequence> sequences;
private List<Ruling> rulings;
private boolean landscape;
private boolean rotated;
private float minCharWidth;
private float minCharHeight;
}

View File

@ -0,0 +1,140 @@
package com.iqser.red.service.redaction.v1.server.parsing.model;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.text.TextPosition;
import lombok.Data;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
@Data
@RequiredArgsConstructor
public class TextPositionSequence implements CharSequence {
private List<TextPosition> textPositions = new ArrayList<>();
@Getter
@Setter
private float[] annotationColor;
private final int page;
public TextPositionSequence(List<TextPosition> textPositions, int page){
this.textPositions = textPositions;
this.page = page;
}
@Override
public int length() {
return textPositions.size();
}
@Override
public char charAt(int index) {
TextPosition textPosition = textPositionAt(index);
String text = textPosition.getUnicode();
return text.charAt(0);
}
@Override
public TextPositionSequence subSequence(int start, int end) {
return new TextPositionSequence(textPositions.subList(start, end), page);
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder(length());
for (int i = 0; i < length(); i++) {
builder.append(charAt(i));
}
return builder.toString();
}
public TextPosition textPositionAt(int index) {
return textPositions.get(index);
}
public void add(TextPosition textPosition) {
this.textPositions.add(textPosition);
}
public float getX1() {
if (textPositions.get(0).getRotation() == 90) {
return textPositions.get(0).getYDirAdj() - getTextHeight();
} else {
return textPositions.get(0).getXDirAdj();
}
}
public float getX2() {
if (textPositions.get(0).getRotation() == 90) {
return textPositions.get(0).getYDirAdj();
} else {
return textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth() + 1;
}
}
public float getY1() {
if (textPositions.get(0).getRotation() == 90) {
return textPositions.get(0).getXDirAdj();
} else {
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
}
}
public float getY2() {
if (textPositions.get(0).getRotation() == 90) {
return textPositions.get(textPositions.size() - 1).getXDirAdj() + getTextHeight() -2 ;
} else {
return textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj() + getTextHeight();
}
}
public float getTextHeight() {
return textPositions.get(0).getHeightDir() + 2;
}
public float getHeight() {
return getY2() - getY1();
}
public float getWidth() {
return getX2() - getX1();
}
public String getFont() {
return textPositions.get(0).getFont().toString().toLowerCase().replaceAll(",bold", "").replaceAll(",italic", "");
}
public String getFontStyle() {
String lowercaseFontName = textPositions.get(0).getFont().toString().toLowerCase();
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
return "bold, italic";
} else if (lowercaseFontName.contains("bold")) {
return "bold";
} else if (lowercaseFontName.contains("italic")) {
return "italic";
} else {
return "standard";
}
}
public float getFontSize() {
return textPositions.get(0).getFontSizeInPt();
}
public float getSpaceWidth() {
return textPositions.get(0).getWidthOfSpace();
}
public int getRotation() {
return textPositions.get(0).getRotation();
}
}

View File

@ -0,0 +1,34 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.List;
import lombok.Data;
@Data
public class Entity {
private final String word;
private final String type;
private boolean redaction;
private String redactionReason;
private List<EntityPositionSequence> positionSequences = new ArrayList<>();
private Integer start;
private Integer end;
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences) {
this.word = word;
this.type = type;
this.redaction = redaction;
this.redactionReason = redactionReason;
this.positionSequences = positionSequences;
}
public Entity(String word, String type, Integer start, Integer end) {
this.word = word;
this.type = type;
this.start = start;
this.end = end;
}
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Data;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
public class EntityPositionSequence {
private List<TextPositionSequence> sequences = new ArrayList<>();
private int pageNumber;
private final UUID id;
}

View File

@ -0,0 +1,162 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.regex.Pattern;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
@SuppressWarnings("all")
public class SearchableText {
private List<TextPositionSequence> sequences = new ArrayList<>();
public void add(TextPositionSequence textPositionSequence) {
sequences.add(textPositionSequence);
}
public void addAll(List<TextPositionSequence> textPositionSequences) {
sequences.addAll(textPositionSequences);
}
public List<EntityPositionSequence> getSequences(String searchString) {
char[] searchChars = searchString.replaceAll("\\n", " ").toCharArray();
int counter = 0;
List<TextPositionSequence> crossSequenceParts = new ArrayList<>();
List<EntityPositionSequence> finalMatches = new ArrayList<>();
for (int i = 0; i < sequences.size(); i++) {
TextPositionSequence partMatch = new TextPositionSequence(sequences.get(i).getPage());
for (int j = 0; j < sequences.get(i).length(); j++) {
if(i > 0 && j == 0 && sequences.get(i).charAt(0) == ' ' && sequences.get(i - 1).charAt(sequences.get(i - 1).length() - 1) == ' '
|| j > 0 && sequences.get(i).charAt(j) == ' ' && sequences.get(i).charAt(j - 1) == ' '){
if(j == sequences.get(i).length() -1 && counter != 0 && !partMatch.getTextPositions().isEmpty()){
crossSequenceParts.add(partMatch);
}
continue;
}
if(j == 0 && sequences.get(i).charAt(j) != ' ' && i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1) != ' ' && searchChars[counter] == ' '){
counter++;
}
if (sequences.get(i).charAt(j) == searchChars[counter] || counter != 0 && sequences.get(i).charAt(j) == '-') {
if(counter != 0 || i == 0 && j == 0 || j != 0 && isSeparator(sequences.get(i).charAt(j - 1)) || j == 0 && i != 0 && isSeparator(sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1))
|| j == 0 && i != 0 && sequences.get(i - 1).charAt(sequences.get(i - 1).length() -1) != ' ' && sequences.get(i).charAt(j) != ' ') {
partMatch.add(sequences.get(i).textPositionAt(j));
if (!(j == sequences.get(i).length() -1 && sequences.get(i).charAt(j) == '-' && searchChars[counter] != '-')) {
counter++;
}
}
if (counter == searchString.length()) {
crossSequenceParts.add(partMatch);
if(i == sequences.size() - 1 && j == sequences.get(i).length() -1
|| j != sequences.get(i).length() -1 && isSeparator(sequences.get(i).charAt(j +1))
|| j == sequences.get(i).length() -1 && isSeparator(sequences.get(i + 1).charAt(0))
|| j == sequences.get(i).length() -1 && sequences.get(i).charAt(j) != ' ' && sequences.get(i + 1).charAt(0) != ' ') {
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts));
}
counter = 0;
crossSequenceParts = new ArrayList<>();
partMatch = new TextPositionSequence(sequences.get(i).getPage());
}
} else {
counter = 0;
if(!crossSequenceParts.isEmpty()){
j--;
}
crossSequenceParts = new ArrayList<>();
partMatch = new TextPositionSequence(sequences.get(i).getPage());
}
if(j == sequences.get(i).length() -1 && counter != 0){
crossSequenceParts.add(partMatch);
}
}
}
return finalMatches;
}
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts){
UUID id = UUID.randomUUID();
List<EntityPositionSequence> result = new ArrayList<>();
int currentPage = -1;
EntityPositionSequence entityPositionSequence = new EntityPositionSequence(id);
for (TextPositionSequence textPositionSequence :crossSequenceParts){
if(currentPage == -1){
currentPage = textPositionSequence.getPage();
entityPositionSequence.setPageNumber(currentPage);
entityPositionSequence.getSequences().add(textPositionSequence);
} else if(currentPage == textPositionSequence.getPage()){
entityPositionSequence.getSequences().add(textPositionSequence);
} else {
result.add(entityPositionSequence);
entityPositionSequence = new EntityPositionSequence(id);
entityPositionSequence.setPageNumber(textPositionSequence.getPage());
}
}
result.add(entityPositionSequence);
return result;
}
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" ", " ");
}
public String getAsStringWithLinebreaks(){
StringBuilder sb = new StringBuilder();
TextPositionSequence previous = null;
for (TextPositionSequence word : sequences) {
if (previous != null) {
if (Math.abs(previous.getY1() - word.getY1()) > word.getTextHeight()) {
sb.append('\n');
} else {
sb.append(' ');
}
}
sb.append(word.toString());
previous = word;
}
return sb.append("\n").toString();
}
}

View File

@ -0,0 +1,139 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class Section {
private Set<Entity> entities;
// This still contains linebreaks etc.
private String text;
//This does not contain linebreaks and must always be used for correct offsets.
private String searchText;
public boolean contains(String type) {
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
}
public void redact(String type, int ruleNumber, String reason){
entities.forEach(entity -> {
if(entity.getType().equals(type)){
entity.setRedaction(true);
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
}
});
}
public void redactNot(String type, int ruleNumber, String reason){
entities.forEach(entity -> {
if(entity.getType().equals(type)){
entity.setRedaction(false);
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
}
});
}
public void highlightAll(String type){
entities.forEach(entity -> {
if(entity.getType().equals(type)){
entity.setRedaction(true);
}
});
}
public void redactLineAfter(String start, String asType, int ruleNumber, String reason){
String value = StringUtils.substringBetween(text, start, "\n");
if(value != null){
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
// TODO No need to iterate
entities.forEach(entity -> {
if(entity.getType().equals(asType)){
entity.setRedaction(true);
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
}
});
}
public void redactBetween(String start, String stop, String asType, int ruleNumber, String reason){
String value = StringUtils.substringBetween(searchText, start, stop);
if(value != null){
Set<Entity> found = findEntity(value.trim(), asType);
entities.addAll(found);
}
// TODO No need to iterate
entities.forEach(entity -> {
if(entity.getType().equals(asType)){
entity.setRedaction(true);
entity.setRedactionReason("\nRule " + ruleNumber + " matched\n\n" +reason);
}
});
}
private Set<Entity> findEntity(String value, String asType) {
Set<Entity> found = new HashSet<>();
int startIndex;
int stopIndex = 0;
do {
startIndex = searchText.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
if (startIndex > -1 &&
(startIndex == 0 || Character.isWhitespace(searchText.charAt(startIndex - 1)) || isSeparator(searchText.charAt(startIndex - 1))) &&
(stopIndex == searchText.length() || isSeparator(searchText.charAt(stopIndex)))) {
found.add(new Entity(searchText.substring(startIndex, stopIndex), asType, startIndex, stopIndex));
}
} while (startIndex > -1);
removeEntitiesContainedInLarger(found);
return found;
}
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
for (Entity word : entities) {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
wordsToRemove.add(inner);
}
}
}
entities.removeAll(wordsToRemove);
}
}

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class DictionaryService {
public static final String VERTEBRATES_CODE = "VERTEBRATE";
public static final String ADDRESS_CODE = "ADDRESS";
public static final String NAME_CODE = "NAME";
public static final String NO_REDACTION_INDICATOR = "NO_REDACTION_INDICATOR";
@Getter
private Map<String, Set<String>> dictionary = new HashMap<>();
@Getter
private long generation;
@PostConstruct
public void init() {
loadFromResourceFiles();
}
public void updateDictionary() {
//TODO
}
public void loadFromResourceFiles() {
dictionary.computeIfAbsent(NAME_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/names.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
dictionary.computeIfAbsent(VERTEBRATES_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/vertebrates.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
dictionary.computeIfAbsent(ADDRESS_CODE, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/addresses.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
dictionary.computeIfAbsent(NO_REDACTION_INDICATOR, v -> new HashSet<>()).addAll(ResourceLoader.load("dictionaries/NoRedactionIndicator.txt").stream().map(this::cleanDictionaryEntry).collect(Collectors.toList()));
}
private String cleanDictionaryEntry(String entry) {
return TextNormalizationUtilities.removeHyphenLineBreaks(entry).replaceAll("\\n", " ");
}
}

View File

@ -0,0 +1,67 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import javax.annotation.PostConstruct;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.kie.api.runtime.KieSession;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
@Service
public class DroolsExecutionService {
@Autowired
private KieContainer kieContainer;
private String currentDrlRules;
@PostConstruct
public void init (){
currentDrlRules = ResourceLoader.loadAsString("drools/rules.drl");
}
public Section executeRules(Section section) {
KieSession kieSession = kieContainer.newKieSession();
kieSession.setGlobal("section", section);
kieSession.insert(section);
kieSession.fireAllRules();
kieSession.dispose();
return section;
}
public void updateRules(String drlAsString) {
try {
KieServices kieServices = KieServices.Factory.get();
InputStream input = new ByteArrayInputStream(drlAsString.getBytes("UTF-8"));
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
kieFileSystem.write("src/main/resources/drools/rules.drl",
kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
kieContainer.updateToVersion(kieModule.getReleaseId());
currentDrlRules = drlAsString;
} catch (Exception e){
throw new RuntimeException("Could not update rules");
}
}
public String getRules(){
return currentDrlRules;
}
}

View File

@ -0,0 +1,144 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class EntityRedactionService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
public void processDocument(Document classifiedDoc) {
dictionaryService.updateDictionary();
Set<Entity> documentEntities = new HashSet<>();
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
SearchableText searchableText = paragraph.getSearchableText();
List<Table> tables = paragraph.getTables();
List<SearchableText> searchableRows = new ArrayList<>();
for (Table table : tables) {
for (List<Cell> row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
for (Cell column : row) {
if (column == null || column.getTextBlocks() == null) {
continue;
}
for (TextBlock textBlock : column.getTextBlocks()) {
searchableRow.addAll(textBlock.getSequences());
}
}
searchableRows.add(searchableRow);
}
}
Set<Entity> entities = findEntities(searchableText);
Section analysedSection = droolsExecutionService.executeRules(Section
.builder()
.entities(entities)
.text(searchableText.getAsStringWithLinebreaks())
.searchText(searchableText.toString())
.build());
for (Entity entity : analysedSection.getEntities()) {
entity.setPositionSequences(searchableText.getSequences(entity.getWord()));
}
documentEntities.addAll(analysedSection.getEntities());
for (SearchableText searchableRow : searchableRows) {
Set<Entity> rowEntities = findEntities(searchableRow);
Section analysedRowSection = droolsExecutionService.executeRules(Section
.builder()
.entities(rowEntities)
.text(searchableRow.getAsStringWithLinebreaks())
.searchText(searchableRow.toString())
.build());
for (Entity entity : analysedRowSection.getEntities()) {
entity.setPositionSequences(searchableRow.getSequences(entity.getWord()));
}
documentEntities.addAll(analysedRowSection.getEntities());
}
}
documentEntities.forEach(entity -> {
entity.getPositionSequences().forEach(sequence -> {
classifiedDoc.getEntities().computeIfAbsent(sequence.getPageNumber(), (x) -> new HashSet<>()).add(
new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), List.of(sequence))
);
});
});
}
private Set<Entity> findEntities(SearchableText searchableText) {
String normalizedInputString = searchableText.toString();
Set<Entity> found = new HashSet<>();
for (Map.Entry<String, Set<String>> entry : dictionaryService.getDictionary().entrySet()) {
for (String value : entry.getValue()) {
int startIndex;
int stopIndex = 0;
do {
startIndex = normalizedInputString.indexOf(value, stopIndex);
stopIndex = startIndex + value.length();
if (startIndex > -1 &&
(startIndex == 0 || Character.isWhitespace(normalizedInputString.charAt(startIndex - 1)) || isSeparator(normalizedInputString.charAt(startIndex - 1))) &&
(stopIndex == normalizedInputString.length() || isSeparator(normalizedInputString.charAt(stopIndex)))) {
found.add(new Entity(normalizedInputString.substring(startIndex, stopIndex), entry.getKey(), startIndex, stopIndex));
}
} while (startIndex > -1);
}
}
removeEntitiesContainedInLarger(found);
return found;
}
private boolean isSeparator(char c) {
return Character.isWhitespace(c) || Pattern.matches("\\p{Punct}", String.valueOf(c)) || c == '\"' || c == '' || c == '';
}
public void removeEntitiesContainedInLarger(Set<Entity> entities) {
List<Entity> wordsToRemove = new ArrayList<>();
for (Entity word : entities) {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord().length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner) {
wordsToRemove.add(inner);
}
}
}
entities.removeAll(wordsToRemove);
}
}

View File

@ -0,0 +1,57 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.experimental.UtilityClass;
@UtilityClass
public class ResourceLoader {
public Set<String> load(String classpathPath) {
URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
}
try (InputStream is = resource.openStream();
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
BufferedReader br = new BufferedReader(isr)) {
return br.lines().collect(Collectors.toSet());
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
}
}
public String loadAsString(String classpathPath) {
URL resource = ResourceLoader.class.getClassLoader().getResource(classpathPath);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath);
}
try (InputStream is = resource.openStream();
InputStreamReader isr = new InputStreamReader(is, StandardCharsets.UTF_8);
BufferedReader br = new BufferedReader(isr)) {
StringBuffer sb = new StringBuffer();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + classpathPath, e);
}
}
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import lombok.experimental.UtilityClass;
@UtilityClass
public class TextNormalizationUtilities {
/**
* Revert hyphenation due to line breaks.
* @param text Text to be processed.
* @return Text without line-break hyphenation.
*/
public static String removeHyphenLineBreaks(String text) {
return text.replaceAll("\\s(\\S+)[\\-\\u00AD]\\R|\n\r(.+ )", "\n$1$2");
}
}

View File

@ -0,0 +1,123 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.classification.service.ClassificationService;
import com.iqser.red.service.redaction.v1.server.parsing.PDFLinesTextStripper;
import com.iqser.red.service.redaction.v1.server.parsing.model.ParsedElements;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@SuppressWarnings("PMD")
public class PdfSegmentationService {
private final RulingCleaningService rulingCleaningService;
private final TableExtractionService tableExtractionService;
private final BlockificationService blockificationService;
private final ClassificationService classificationService;
private final SectionsBuilderService sectionsBuilderService;
public Document parseDocument(PDDocument pdDocument) throws IOException {
Document document = new Document();
List<Page> pages = new ArrayList<>();
PDFLinesTextStripper stripper = new PDFLinesTextStripper();
for (int pageNumber = 1; pageNumber <= pdDocument.getNumberOfPages(); pageNumber++) {
PDPage pdPage = pdDocument.getPage(pageNumber - 1);
stripper.setPageNumber(pageNumber);
stripper.setStartPage(pageNumber);
stripper.setEndPage(pageNumber);
stripper.getText(pdDocument);
PDRectangle pdr = pdPage.getMediaBox();
boolean isLandscape = pdr.getWidth() > pdr.getHeight();
int rotation = pdPage.getRotation();
boolean isRotated = rotation != 0 && rotation != 360;
ParsedElements parsedElements = ParsedElements
.builder()
.rulings(stripper.getRulings())
.sequences(stripper.getTextPositionSequences())
.minCharWidth(Utils.round(stripper.getMinCharWidth(), 2))
.minCharHeight(Utils.round(stripper.getMinCharHeight(), 2))
.landscape(isLandscape)
.rotated(isRotated)
.build();
CleanRulings cleanRulings = rulingCleaningService.getCleanRulings(parsedElements.getRulings(), parsedElements.getMinCharWidth(), parsedElements.getMinCharHeight());
Page page = blockificationService.blockify(parsedElements.getSequences(), cleanRulings.getHorizontal(), cleanRulings.getVertical());
page.setRotation(rotation);
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
page.setLandscape(parsedElements.isLandscape() || parsedElements.isRotated());
page.setPageNumber(pageNumber);
increaseDocumentStatistics(page, document);
pages.add(page);
}
document.setPages(pages);
classificationService.classifyDocument(document);
sectionsBuilderService.buildSections(document);
return document;
}
private void increaseDocumentStatistics(Page page, Document document) {
if (!page.isLandscape()) {
document.getFontSizeCounter().addAll(page.getFontSizeCounter().getCountPerValue());
}
document.getFontCounter().addAll(page.getFontCounter().getCountPerValue());
document.getTextHeightCounter().addAll(page.getTextHeightCounter().getCountPerValue());
document.getFontStyleCounter().addAll(page.getFontStyleCounter().getCountPerValue());
}
private void buildPageStatistics(Page page) {
// Collect all statistics for the page, except from blocks inside tables, as tables will always be added to BodyTextFrame.
for (AbstractTextContainer textBlock : page.getTextBlocks()) {
if (textBlock instanceof TextBlock) {
if (((TextBlock) textBlock).getSequences() == null) {
continue;
}
for (TextPositionSequence word : ((TextBlock) textBlock).getSequences()) {
page.getTextHeightCounter().add(word.getTextHeight());
page.getFontCounter().add(word.getFont());
page.getFontSizeCounter().add(word.getFontSize());
page.getFontStyleCounter().add(word.getFontStyle());
}
}
}
}
}

View File

@ -0,0 +1,115 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
@Service
@SuppressWarnings("all")
public class SectionsBuilderService {
public void buildSections(Document document) {
List<AbstractTextContainer> chunkWords = new ArrayList<>();
List<Paragraph> chunkBlockList1 = new ArrayList<>();
AbstractTextContainer prev = null;
for (Page page : document.getPages()) {
for (AbstractTextContainer current : page.getTextBlocks()) {
if (current.getClassification() == null || current.getClassification().equals("Header") || current.getClassification().equals("Footer")) {
continue;
}
current.setPage(page.getPageNumber());
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
Paragraph cb1 = buildTextBlock(chunkWords);
chunkBlockList1.add(cb1);
chunkWords = new ArrayList<>();
}
chunkWords.add(current);
prev = current;
}
}
Paragraph cb1 = buildTextBlock(chunkWords);
if (cb1 != null) {
chunkBlockList1.add(cb1);
}
document.setParagraphs(chunkBlockList1);
}
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList) {
Paragraph paragraph = new Paragraph();
TextBlock textBlock = null;
int pageBefore = -1;
boolean splitByTable = false;
Iterator<AbstractTextContainer> itty = wordBlockList.iterator();
boolean alreadyAdded= false;
while (itty.hasNext()) {
AbstractTextContainer container = itty.next();
if (container instanceof Table) {
splitByTable = true;
if (textBlock != null && !alreadyAdded) {
paragraph.getPageBlocks().add(textBlock);
alreadyAdded =true;
}
paragraph.getPageBlocks().add(container);
continue;
}
TextBlock wordBlock = (TextBlock) container;
if (textBlock == null) {
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
} else if (splitByTable) {
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
alreadyAdded = false;
} else if (pageBefore != -1 && wordBlock.getPage() != pageBefore) {
textBlock.setPage(pageBefore);
paragraph.getPageBlocks().add(textBlock);
textBlock = new TextBlock(wordBlock.getMinX(), wordBlock.getMaxX(), wordBlock.getMinY(), wordBlock.getMaxY(), wordBlock.getSequences(), wordBlock.getRotation());
textBlock.setPage(wordBlock.getPage());
} else {
TextBlock spatialEntity = textBlock.union(wordBlock);
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(),
spatialEntity.getWidth(), spatialEntity.getHeight());
}
pageBefore = wordBlock.getPage();
splitByTable = false;
}
if (textBlock != null && !alreadyAdded) {
paragraph.getPageBlocks().add(textBlock);
}
return paragraph;
}
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.settings;
import org.springframework.boot.context.properties.ConfigurationProperties;
import lombok.Data;
@Data
@ConfigurationProperties("redaction-service")
public class RedactionServiceSettings {
/**
* Tenant used in single tenant mode.
*/
private String defaultTenant = "iqser-id";
private int flattenImageDpi = 100;
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public abstract class AbstractTextContainer {
protected float minX;
protected float maxX;
protected float minY;
protected float maxY;
protected String classification;
protected int page;
public abstract String getText();
public boolean contains(AbstractTextContainer other) {
return this.minX <= other.minX && this.maxX >= other.maxX && this.minY >= other.minY && this.maxY <= other.maxY;
}
}

View File

@ -0,0 +1,26 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import lombok.Data;
import lombok.EqualsAndHashCode;
@SuppressWarnings("serial")
@Data
@EqualsAndHashCode(callSuper = true)
public class Cell extends Rectangle {
private List<TextBlock> textBlocks = new ArrayList<>();
public Cell(Point2D topLeft, Point2D bottomRight) {
super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY()));
}
public void addTextBlock(TextBlock textBlock) {
textBlocks.add(textBlock);
}
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.util.List;
import lombok.Builder;
import lombok.Data;
@Data
@Builder
public class CleanRulings {
List<Ruling> horizontal;
List<Ruling> vertical;
}

View File

@ -0,0 +1,177 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.Comparator;
import java.util.List;
@SuppressWarnings("all")
public class Rectangle extends Rectangle2D.Float {
/**
* Ill-defined comparator, from when Rectangle was Comparable.
*
* see https://github.com/tabulapdf/tabula-java/issues/116
* @deprecated with no replacement
*/
@Deprecated
public static final Comparator<Rectangle> ILL_DEFINED_ORDER = new Comparator<Rectangle>() {
@Override public int compare(Rectangle o1, Rectangle o2) {
if (o1.equals(o2)) return 0;
if (o1.verticalOverlap(o2) > VERTICAL_COMPARISON_THRESHOLD) {
return o1.isLtrDominant() == -1 && o2.isLtrDominant() == -1
? - java.lang.Double.compare(o1.getX(), o2.getX())
: java.lang.Double.compare(o1.getX(), o2.getX());
} else {
return java.lang.Float.compare(o1.getBottom(), o2.getBottom());
}
}
};
protected static final float VERTICAL_COMPARISON_THRESHOLD = 0.4f;
public Rectangle() {
super();
}
public Rectangle(float top, float left, float width, float height) {
super();
this.setRect(left, top, width, height);
}
public int compareTo(Rectangle other) {
return ILL_DEFINED_ORDER.compare(this, other);
}
// I'm bad at Java and need this for fancy sorting in
// technology.tabula.TextChunk.
public int isLtrDominant() {
return 0;
}
public float getArea() {
return this.width * this.height;
}
public float verticalOverlap(Rectangle other) {
return Math.max(0, Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
}
public boolean verticallyOverlaps(Rectangle other) {
return verticalOverlap(other) > 0;
}
public float horizontalOverlap(Rectangle other) {
return Math.max(0, Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
}
public boolean horizontallyOverlaps(Rectangle other) {
return horizontalOverlap(other) > 0;
}
public float verticalOverlapRatio(Rectangle other) {
float rv = 0, delta = Math.min(this.getBottom() - this.getTop(), other.getBottom() - other.getTop());
if (other.getTop() <= this.getTop() && this.getTop() <= other.getBottom()
&& other.getBottom() <= this.getBottom()) {
rv = (other.getBottom() - this.getTop()) / delta;
} else if (this.getTop() <= other.getTop() && other.getTop() <= this.getBottom()
&& this.getBottom() <= other.getBottom()) {
rv = (this.getBottom() - other.getTop()) / delta;
} else if (this.getTop() <= other.getTop() && other.getTop() <= other.getBottom()
&& other.getBottom() <= this.getBottom()) {
rv = (other.getBottom() - other.getTop()) / delta;
} else if (other.getTop() <= this.getTop() && this.getTop() <= this.getBottom()
&& this.getBottom() <= other.getBottom()) {
rv = (this.getBottom() - this.getTop()) / delta;
}
return rv;
}
public float overlapRatio(Rectangle other) {
double intersectionWidth = Math.max(0,
Math.min(this.getRight(), other.getRight()) - Math.max(this.getLeft(), other.getLeft()));
double intersectionHeight = Math.max(0,
Math.min(this.getBottom(), other.getBottom()) - Math.max(this.getTop(), other.getTop()));
double intersectionArea = Math.max(0, intersectionWidth * intersectionHeight);
double unionArea = this.getArea() + other.getArea() - intersectionArea;
return (float) (intersectionArea / unionArea);
}
public Rectangle merge(Rectangle other) {
this.setRect(this.createUnion(other));
return this;
}
public float getTop() {
return (float) this.getMinY();
}
public void setTop(float top) {
float deltaHeight = top - this.y;
this.setRect(this.x, top, this.width, this.height - deltaHeight);
}
public float getRight() {
return (float) this.getMaxX();
}
public void setRight(float right) {
this.setRect(this.x, this.y, right - this.x, this.height);
}
public float getLeft() {
return (float) this.getMinX();
}
public void setLeft(float left) {
float deltaWidth = left - this.x;
this.setRect(left, this.y, this.width - deltaWidth, this.height);
}
public float getBottom() {
return (float) this.getMaxY();
}
public void setBottom(float bottom) {
this.setRect(this.x, this.y, this.width, bottom - this.y);
}
public Point2D[] getPoints() {
return new Point2D[] { new Point2D.Float(this.getLeft(), this.getTop()),
new Point2D.Float(this.getRight(), this.getTop()), new Point2D.Float(this.getRight(), this.getBottom()),
new Point2D.Float(this.getLeft(), this.getBottom()) };
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
String s = super.toString();
sb.append(s.substring(0, s.length() - 1));
sb.append(String.format(",bottom=%f,right=%f]", this.getBottom(), this.getRight()));
return sb.toString();
}
/**
* @param rectangles
* @return minimum bounding box that contains all the rectangles
*/
public static Rectangle boundingBoxOf(List<? extends Rectangle> rectangles) {
float minx = java.lang.Float.MAX_VALUE;
float miny = java.lang.Float.MAX_VALUE;
float maxx = java.lang.Float.MIN_VALUE;
float maxy = java.lang.Float.MIN_VALUE;
for (Rectangle r : rectangles) {
minx = (float) Math.min(r.getMinX(), minx);
miny = (float) Math.min(r.getMinY(), miny);
maxx = (float) Math.max(r.getMaxX(), maxx);
maxy = (float) Math.max(r.getMaxY(), maxy);
}
return new Rectangle(miny, minx, maxx - minx, maxy - miny);
}
}

View File

@ -0,0 +1,52 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.util.ArrayList;
import java.util.List;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.index.strtree.STRtree;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
@SuppressWarnings("all")
public class RectangleSpatialIndex<T extends Rectangle> {
private final STRtree si = new STRtree();
private final List<T> rectangles = new ArrayList<>();
public void add(T te) {
rectangles.add(te);
si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te);
}
public List<T> contains(Rectangle r) {
List<T> intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
List<T> rv = new ArrayList<T>();
for (T ir: intersection) {
if (r.contains(ir)) {
rv.add(ir);
}
}
Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER);
return rv;
}
public List<T> intersects(Rectangle r) {
List rv = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom()));
return rv;
}
/**
* Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex
*
* @return a Rectangle
*/
public Rectangle getBounds() {
return Rectangle.boundingBoxOf(rectangles);
}
}

View File

@ -0,0 +1,369 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Formatter;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.CohenSutherlandClipping;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@SuppressWarnings("all")
public class Ruling extends Line2D.Float {
private static int PERPENDICULAR_PIXEL_EXPAND_AMOUNT = 2;
private enum SOType {VERTICAL, HRIGHT, HLEFT}
public Ruling(Point2D p1, Point2D p2) {
super(p1, p2);
}
public boolean vertical() {
return this.length() > 0 && Utils.feq(this.x1, this.x2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
public boolean horizontal() {
return this.length() > 0 && Utils.feq(this.y1, this.y2); //diff < ORIENTATION_CHECK_THRESHOLD;
}
public boolean oblique() {
return !(this.vertical() || this.horizontal());
}
// attributes that make sense only for non-oblique lines
// these are used to have a single collapse method (in page, currently)
public float getPosition() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getLeft() : this.getTop();
}
public float getStart() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getTop() : this.getLeft();
}
public void setStart(float v) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setTop(v);
} else {
this.setLeft(v);
}
}
public float getEnd() {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
return this.vertical() ? this.getBottom() : this.getRight();
}
public void setEnd(float v) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setBottom(v);
} else {
this.setRight(v);
}
}
public void setStartEnd(float start, float end) {
if (this.oblique()) {
throw new UnsupportedOperationException();
}
if (this.vertical()) {
this.setTop(start);
this.setBottom(end);
} else {
this.setLeft(start);
this.setRight(end);
}
}
public boolean perpendicularTo(Ruling other) {
return this.vertical() == other.horizontal();
}
public boolean nearlyIntersects(Ruling another, int colinearOrParallelExpandAmount) {
if (this.intersectsLine(another)) {
return true;
}
boolean rv = false;
if (this.perpendicularTo(another)) {
rv = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another);
} else {
rv = this.expand(colinearOrParallelExpandAmount)
.intersectsLine(another.expand(colinearOrParallelExpandAmount));
}
return rv;
}
public double length() {
return Math.sqrt(Math.pow(this.x1 - this.x2, 2) + Math.pow(this.y1 - this.y2, 2));
}
public Ruling intersect(Rectangle2D clip) {
Float clipee = (Float) this.clone();
boolean clipped = new CohenSutherlandClipping(clip).clip(clipee);
if (clipped) {
return new Ruling(clipee.getP1(), clipee.getP2());
} else {
return this;
}
}
public Ruling expand(float amount) {
Ruling r = (Ruling) this.clone();
r.setStart(this.getStart() - amount);
r.setEnd(this.getEnd() + amount);
return r;
}
public Point2D intersectionPoint(Ruling other) {
Ruling this_l = this.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
Ruling other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT);
Ruling horizontal, vertical;
if (!this_l.intersectsLine(other_l)) {
return null;
}
if (this_l.horizontal() && other_l.vertical()) {
horizontal = this_l;
vertical = other_l;
} else if (this_l.vertical() && other_l.horizontal()) {
vertical = this_l;
horizontal = other_l;
} else {
log.warn("lines must be orthogonal, vertical and horizontal");
return null;
}
return new Point2D.Float(vertical.getLeft(), horizontal.getTop());
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (!(other instanceof Ruling)) {
return false;
}
Ruling o = (Ruling) other;
return this.getP1().equals(o.getP1()) && this.getP2().equals(o.getP2());
}
@Override
public int hashCode() {
return super.hashCode();
}
public float getTop() {
return this.y1;
}
public void setTop(float v) {
setLine(this.getLeft(), v, this.getRight(), this.getBottom());
}
public float getLeft() {
return this.x1;
}
public void setLeft(float v) {
setLine(v, this.getTop(), this.getRight(), this.getBottom());
}
public float getBottom() {
return this.y2;
}
public void setBottom(float v) {
setLine(this.getLeft(), this.getTop(), this.getRight(), v);
}
public float getRight() {
return this.x2;
}
public void setRight(float v) {
setLine(this.getLeft(), this.getTop(), v, this.getBottom());
}
public float getWidth() {
return this.getRight() - this.getLeft();
}
public float getHeight() {
return this.getBottom() - this.getTop();
}
public double getAngle() {
double angle = Math.toDegrees(Math.atan2(this.getP2().getY() - this.getP1().getY(),
this.getP2().getX() - this.getP1().getX()));
if (angle < 0) {
angle += 360;
}
return angle;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Formatter formatter = new Formatter(sb);
String rv = formatter.format("%s[minX=%f minY=%f maxX=%f maxY=%f]", this.getClass().toString(), this.x1, this.y1, this.x2, this.y2).toString();
formatter.close();
return rv;
}
public static List<Ruling> cropRulingsToArea(List<Ruling> rulings, Rectangle2D area) {
ArrayList<Ruling> rv = new ArrayList<>();
for (Ruling r : rulings) {
if (r.intersects(area)) {
rv.add(r.intersect(area));
}
}
return rv;
}
// log(n) implementation of find_intersections
// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf
public static Map<Point2D, Ruling[]> findIntersections(List<Ruling> horizontals, List<Ruling> verticals) {
class SortObject {
protected SOType type;
protected float position;
protected Ruling ruling;
public SortObject(SOType type, float position, Ruling ruling) {
this.type = type;
this.position = position;
this.ruling = ruling;
}
}
List<SortObject> sos = new ArrayList<>();
TreeMap<Ruling, Boolean> tree = new TreeMap<>(new Comparator<Ruling>() {
@Override
public int compare(Ruling o1, Ruling o2) {
return java.lang.Double.compare(o1.getTop(), o2.getTop());
}
});
TreeMap<Point2D, Ruling[]> rv = new TreeMap<>(new Comparator<Point2D>() {
@Override
public int compare(Point2D o1, Point2D o2) {
if (o1.getY() > o2.getY()) {
return 1;
}
if (o1.getY() < o2.getY()) {
return -1;
}
if (o1.getX() > o2.getX()) {
return 1;
}
if (o1.getX() < o2.getX()) {
return -1;
}
return 0;
}
});
for (Ruling h : horizontals) {
sos.add(new SortObject(SOType.HLEFT, h.getLeft() - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
sos.add(new SortObject(SOType.HRIGHT, h.getRight() + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h));
}
for (Ruling v : verticals) {
sos.add(new SortObject(SOType.VERTICAL, v.getLeft(), v));
}
Collections.sort(sos, new Comparator<SortObject>() {
@Override
public int compare(SortObject a, SortObject b) {
int rv;
if (Utils.feq(a.position, b.position)) {
if (a.type == SOType.VERTICAL && b.type == SOType.HLEFT) {
rv = 1;
} else if (a.type == SOType.VERTICAL && b.type == SOType.HRIGHT) {
rv = -1;
} else if (a.type == SOType.HLEFT && b.type == SOType.VERTICAL) {
rv = -1;
} else if (a.type == SOType.HRIGHT && b.type == SOType.VERTICAL) {
rv = 1;
} else {
rv = java.lang.Double.compare(a.position, b.position);
}
} else {
return java.lang.Double.compare(a.position, b.position);
}
return rv;
}
});
for (SortObject so : sos) {
switch (so.type) {
case VERTICAL:
for (Map.Entry<Ruling, Boolean> h : tree.entrySet()) {
try {
Point2D i = h.getKey().intersectionPoint(so.ruling);
if (i == null) {
continue;
}
rv.put(i,
new Ruling[]{h.getKey().expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
so.ruling.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)});
} catch(UnsupportedOperationException e){
log.info("Some line are oblique, ignoring...");
continue;
}
}
break;
case HRIGHT:
tree.remove(so.ruling);
break;
case HLEFT:
tree.put(so.ruling, true);
break;
}
}
return rv;
}
}

View File

@ -0,0 +1,305 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
import lombok.Getter;
@SuppressWarnings("all")
public class Table extends AbstractTextContainer {
private final TreeMap<CellPosition, Cell> cells = new TreeMap<>();
private RectangleSpatialIndex<Cell> si = new RectangleSpatialIndex<>();
@Getter
private int rowCount = 0;
@Getter
private int colCount = 0;
private int rotation = 0;
private List<List<Cell>> memoizedRows = null;
public Table(List<Cell> cells, Rectangle area, int rotation) {
addCells(cells);
minX = area.getLeft();
minY = area.getBottom();
maxX = area.getRight();
maxY = area.getTop();
classification = "Table";
this.rotation = rotation;
}
public List<List<Cell>> getRows() {
if (memoizedRows == null) {
memoizedRows = computeRows();
}
return memoizedRows;
}
private List<List<Cell>> computeRows() {
List<List<Cell>> rows = new ArrayList<>();
if (rotation == 90) {
for (int i = 0; i < colCount; i++) { // rows
List<Cell> lastRow = new ArrayList<>();
for (int j = rowCount - 1; j >= 0; j--) { // cols
Cell cell = cells.get(new CellPosition(j, i));
lastRow.add(cell);
}
rows.add(lastRow);
}
} else if (rotation == 270) {
for (int i = colCount - 1; i >= 0; i--) { // rows
List<Cell> lastRow = new ArrayList<>();
for (int j = 0; j < rowCount; j++) { // cols
Cell cell = cells.get(new CellPosition(i, j));
lastRow.add(cell);
}
rows.add(lastRow);
}
} else {
for (int i = 0; i < rowCount; i++) {
List<Cell> lastRow = new ArrayList<>();
for (int j = 0; j < colCount; j++) {
Cell cell = cells.get(new CellPosition(i, j)); // JAVA_8 use getOrDefault()
lastRow.add(cell);
}
rows.add(lastRow);
}
}
return rows;
}
public void add(Cell chunk, int row, int col) {
rowCount = Math.max(rowCount, row + 1);
colCount = Math.max(colCount, col + 1);
CellPosition cp = new CellPosition(row, col);
cells.put(cp, chunk);
}
private void addCells(List<Cell> cells) {
if (cells.isEmpty()) {
return;
}
Iterator<Cell> itty = cells.iterator();
while (itty.hasNext()) {
Cell cell = itty.next();
if (cell.getWidth() > 1.1 && cell.getHeight() > 1.1) {
si.add(cell);
} else {
itty.remove();
}
}
List<List<Cell>> rowsOfCells = rowsOfCells(cells);
Map<Integer, Cell> previousNonNullCellForColumnIndex = new HashMap<>();
for (int i = 0; i < rowsOfCells.size(); i++) {
List<Cell> row = rowsOfCells.get(i);
Iterator<Cell> rowCells = row.iterator();
int startColumn = 0;
int jumpToColumn = 0;
while (rowCells.hasNext()) {
Cell cell = rowCells.next();
if (i > 0) {
List<List<Cell>> others = rowsOfCells(
si.contains(
new Rectangle(cell.getBottom(),
si.getBounds().getLeft(),
cell.getLeft() - si.getBounds().getLeft() + 1,
si.getBounds().getBottom() - cell.getBottom()
)
));
for (List<Cell> r : others) {
jumpToColumn = Math.max(jumpToColumn, r.size());
}
}
while (startColumn != jumpToColumn) {
add(previousNonNullCellForColumnIndex.get(startColumn), i, startColumn);
startColumn++;
}
add(cell, i, startColumn);
previousNonNullCellForColumnIndex.put(startColumn, cell);
startColumn++;
jumpToColumn = startColumn;
}
}
}
private static List<List<Cell>> rowsOfCells(List<Cell> cells) {
Cell c;
float lastTop;
List<List<Cell>> rv = new ArrayList<>();
List<Cell> lastRow;
if (cells.isEmpty()) {
return rv;
}
Collections.sort(cells, new Comparator<Cell>() {
@Override
public int compare(Cell arg0, Cell arg1) {
return Double.compare(arg0.getLeft(), arg1.getLeft());
}
});
Collections.sort(cells, Collections.reverseOrder(new Comparator<Cell>() {
@Override
public int compare(Cell arg0, Cell arg1) {
return Float.compare(Utils.round(arg0.getBottom(), 2), Utils.round(arg1.getBottom(),2));
}
}));
Iterator<Cell> iter = cells.iterator();
c = iter.next();
lastTop = c.getBottom();
lastRow = new ArrayList<>();
lastRow.add(c);
rv.add(lastRow);
while (iter.hasNext()) {
c = iter.next();
if (!Utils.feq(c.getBottom(), lastTop)) {
lastRow = new ArrayList<>();
rv.add(lastRow);
}
lastRow.add(c);
lastTop = c.getBottom();
}
return rv;
}
@Override
public String getText() {
StringBuilder sb = new StringBuilder();
List<List<Cell>> rows = getRows();
int i = 0;
for (List<Cell> row : rows) {
if (i != 0) {
sb.append("\n");
}
if (!row.isEmpty()) {
boolean firstColumn = true;
for (Cell column : row) {
if (!firstColumn) {
sb.append(",");
}
if (column != null && column.getTextBlocks() != null) {
boolean first = true;
for (TextBlock textBlock : column.getTextBlocks()) {
if (!first) {
sb.append("\n");
}
sb.append('\"').append(textBlock.getText().replaceAll("\"", "\\\"")).append('\"');
first = false;
}
}
firstColumn = false;
}
}
i++;
}
return sb.toString();
}
public String getTextAsHtml() {
StringBuilder sb = new StringBuilder();
List<List<Cell>> rows = getRows();
sb.append("<table border=\"1\">");
int i = 0;
for (List<Cell> row : rows) {
sb.append("\n<tr>");
if (!row.isEmpty()) {
for (Cell column : row) {
sb.append(i == 0 ? "\n<th>" : "\n<td>");
if (column != null && column.getTextBlocks() != null) {
boolean first = true;
for (TextBlock textBlock : column.getTextBlocks()) {
if (!first) {
sb.append("<br />");
}
sb.append(textBlock.getText().replaceAll("\\n", "<br />"));
first = false;
}
}
sb.append(i == 0 ? "</th>" : "</td>");
}
}
sb.append("</tr>");
i++;
}
sb.append("</table>");
return sb.toString();
}
class CellPosition implements Comparable<CellPosition> {
CellPosition(int row, int col) {
this.row = row;
this.col = col;
}
final int row, col;
@Override
public int hashCode() {
return row + 101 * col;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
CellPosition other = (CellPosition) obj;
return row == other.row && col == other.col;
}
@Override
public int compareTo(CellPosition other) {
int rowdiff = row - other.row;
return rowdiff != 0 ? rowdiff : col - other.col;
}
}
}

View File

@ -0,0 +1,165 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
@Service
public class RulingCleaningService {
public CleanRulings getCleanRulings(List<Ruling> rulings, float minCharWidth, float minCharHeight){
if (!rulings.isEmpty()) {
snapPoints(rulings, minCharWidth , minCharHeight);
}
List<Ruling> vrs = new ArrayList<>();
for (Ruling vr : rulings) {
if (vr.vertical()) {
vrs.add(vr);
}
}
List<Ruling> verticalRulingLines = collapseOrientedRulings(vrs);
List<Ruling> hrs = new ArrayList<>();
for (Ruling hr : rulings) {
if (hr.horizontal()) {
hrs.add(hr);
}
}
List<Ruling> horizontalRulingLines = collapseOrientedRulings(hrs);
return CleanRulings
.builder()
.vertical(verticalRulingLines)
.horizontal(horizontalRulingLines)
.build();
}
public void snapPoints(List<? extends Line2D.Float> rulings, float xThreshold, float yThreshold) {
// collect points and keep a Line -> p1,p2 map
Map<Line2D.Float, Point2D[]> linesToPoints = new HashMap<>();
List<Point2D> points = new ArrayList<>();
for (Line2D.Float r : rulings) {
Point2D p1 = r.getP1();
Point2D p2 = r.getP2();
linesToPoints.put(r, new Point2D[]{p1, p2});
points.add(p1);
points.add(p2);
}
// snap by X
points.sort(Comparator.comparingDouble(Point2D::getX));
List<List<Point2D>> groupedPoints = new ArrayList<>();
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
for (Point2D p : points.subList(1, points.size() - 1)) {
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
if (Math.abs(p.getX() - last.get(0).getX()) < xThreshold) {
groupedPoints.get(groupedPoints.size() - 1).add(p);
} else {
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
}
}
for (List<Point2D> group : groupedPoints) {
float avgLoc = 0;
for (Point2D p : group) {
avgLoc += p.getX();
}
avgLoc /= group.size();
for (Point2D p : group) {
p.setLocation(avgLoc, p.getY());
}
}
// ---
// snap by Y
points.sort(Comparator.comparingDouble(Point2D::getY));
groupedPoints = new ArrayList<>();
groupedPoints.add(new ArrayList<>(Collections.singletonList(points.get(0))));
for (Point2D p : points.subList(1, points.size() - 1)) {
List<Point2D> last = groupedPoints.get(groupedPoints.size() - 1);
if (Math.abs(p.getY() - last.get(0).getY()) < yThreshold) {
groupedPoints.get(groupedPoints.size() - 1).add(p);
} else {
groupedPoints.add(new ArrayList<>(Collections.singletonList(p)));
}
}
for (List<Point2D> group : groupedPoints) {
float avgLoc = 0;
for (Point2D p : group) {
avgLoc += p.getY();
}
avgLoc /= group.size();
for (Point2D p : group) {
p.setLocation(p.getX(), avgLoc);
}
}
// ---
// finally, modify lines
for (Map.Entry<Line2D.Float, Point2D[]> ltp : linesToPoints.entrySet()) {
Point2D[] p = ltp.getValue();
ltp.getKey().setLine(p[0], p[1]);
}
}
private List<Ruling> collapseOrientedRulings(List<Ruling> lines) {
int COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT = 1;
return collapseOrientedRulings(lines, COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT);
}
private List<Ruling> collapseOrientedRulings(List<Ruling> lines, int expandAmount) {
ArrayList<Ruling> rv = new ArrayList<>();
lines.sort((a, b) -> {
final float diff = a.getPosition() - b.getPosition();
return Float.compare(diff == 0 ? a.getStart() - b.getStart() : diff, 0f);
});
for (Ruling next_line : lines) {
Ruling last = rv.isEmpty() ? null : rv.get(rv.size() - 1);
// if current line colinear with next, and are "close enough": expand current line
if (last != null && Utils.feq(next_line.getPosition(), last.getPosition()) && last.nearlyIntersects(next_line, expandAmount)) {
final float lastStart = last.getStart();
final float lastEnd = last.getEnd();
final boolean lastFlipped = lastStart > lastEnd;
final boolean nextFlipped = next_line.getStart() > next_line.getEnd();
boolean differentDirections = nextFlipped != lastFlipped;
float nextS = differentDirections ? next_line.getEnd() : next_line.getStart();
float nextE = differentDirections ? next_line.getStart() : next_line.getEnd();
final float newStart = lastFlipped ? Math.max(nextS, lastStart) : Math.min(nextS, lastStart);
final float newEnd = lastFlipped ? Math.min(nextE, lastEnd) : Math.max(nextE, lastEnd);
last.setStartEnd(newStart, newEnd);
assert !last.oblique();
}
else if (next_line.length() == 0) {
continue;
}
else {
rv.add(next_line);
}
}
return rv;
}
}

View File

@ -0,0 +1,330 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.service;
import java.awt.geom.Point2D;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.tableextraction.utils.Utils;
@Service
@SuppressWarnings("all")
public class TableExtractionService {
public void extractTables(CleanRulings cleanRulings, Page page){
List<Cell> cells = findCells(cleanRulings.getHorizontal(), cleanRulings.getVertical());
Iterator<AbstractTextContainer> itty = page.getTextBlocks().iterator();
while (itty.hasNext()) {
TextBlock textBlock = (TextBlock) itty.next();
for (Cell cell : cells) {
if (cell.intersects(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight())) {
cell.addTextBlock(textBlock);
break;
}
}
}
List<Rectangle> spreadsheetAreas = findSpreadsheetsFromCells(cells)
.stream()
.filter(r -> r.getWidth() > 0f && r.getHeight() > 0f)
.collect(Collectors.toList());
List<Table> tables = new ArrayList<>();
for (Rectangle area : spreadsheetAreas) {
List<Cell> overlappingCells = new ArrayList<>();
for (Cell c : cells) {
if (c.intersects(area)) {
overlappingCells.add(c);
}
}
tables.add(new Table(overlappingCells, area, page.getRotation()));
}
for (Table table : tables) {
int position = -1;
itty = page.getTextBlocks().iterator();
while (itty.hasNext()) {
AbstractTextContainer textBlock = (AbstractTextContainer) itty.next();
if (table.contains(textBlock)) {
if (position == -1) {
position = page.getTextBlocks().indexOf(textBlock);
}
itty.remove();
}
}
if (position != -1) {
page.getTextBlocks().add(position, table);
}
}
}
public List<Cell> findCells(List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
List<Cell> cellsFound = new ArrayList<>();
Map<Point2D, Ruling[]> intersectionPoints = Ruling.findIntersections(horizontalRulingLines, verticalRulingLines);
List<Point2D> intersectionPointsList = new ArrayList<>(intersectionPoints.keySet());
Collections.sort(intersectionPointsList, POINT_COMPARATOR);
boolean doBreak;
for (int i = 0; i < intersectionPointsList.size(); i++) {
Point2D topLeft = intersectionPointsList.get(i);
Ruling[] hv = intersectionPoints.get(topLeft);
doBreak = false;
// CrossingPointsDirectlyBelow( topLeft );
List<Point2D> xPoints = new ArrayList<>();
// CrossingPointsDirectlyToTheRight( topLeft );
List<Point2D> yPoints = new ArrayList<>();
for (Point2D p : intersectionPointsList.subList(i, intersectionPointsList.size())) {
if (p.getX() == topLeft.getX() && p.getY() > topLeft.getY()) {
xPoints.add(p);
}
if (p.getY() == topLeft.getY() && p.getX() > topLeft.getX()) {
yPoints.add(p);
}
}
outer:
for (Point2D xPoint : xPoints) {
if (doBreak) {
break;
}
// is there a vertical edge b/w topLeft and xPoint?
if (!hv[1].equals(intersectionPoints.get(xPoint)[1])) {
continue;
}
for (Point2D yPoint : yPoints) {
// is there an horizontal edge b/w topLeft and yPoint ?
if (!hv[0].equals(intersectionPoints.get(yPoint)[0])) {
continue;
}
Point2D btmRight = new Point2D.Float((float) yPoint.getX(), (float) xPoint.getY());
if (intersectionPoints.containsKey(btmRight)
&& intersectionPoints.get(btmRight)[0].equals(intersectionPoints.get(xPoint)[0])
&& intersectionPoints.get(btmRight)[1].equals(intersectionPoints.get(yPoint)[1])) {
cellsFound.add(new Cell(topLeft, btmRight));
doBreak = true;
break outer;
}
}
}
}
// TODO create cells for vertical ruling lines with aligned endpoints at the top/bottom of a grid
// that aren't connected with an horizontal ruler?
// see: https://github.com/jazzido/tabula-extractor/issues/78#issuecomment-41481207
return cellsFound;
}
public List<Rectangle> findSpreadsheetsFromCells(List<? extends Rectangle> cells) {
// via: http://stackoverflow.com/questions/13746284/merging-multiple-adjacent-rectangles-into-one-polygon
List<Rectangle> rectangles = new ArrayList<>();
Set<Point2D> pointSet = new HashSet<>();
Map<Point2D, Point2D> edgesH = new HashMap<>();
Map<Point2D, Point2D> edgesV = new HashMap<>();
int i = 0;
cells = new ArrayList<>(new HashSet<>(cells));
Utils.sort(cells, Rectangle.ILL_DEFINED_ORDER);
for (Rectangle cell : cells) {
for (Point2D pt : cell.getPoints()) {
if (pointSet.contains(pt)) { // shared vertex, remove it
pointSet.remove(pt);
} else {
pointSet.add(pt);
}
}
}
// X first sort
List<Point2D> pointsSortX = new ArrayList<>(pointSet);
Collections.sort(pointsSortX, X_FIRST_POINT_COMPARATOR);
// Y first sort
List<Point2D> pointsSortY = new ArrayList<>(pointSet);
Collections.sort(pointsSortY, POINT_COMPARATOR);
while (i < pointSet.size()) {
float currY = (float) pointsSortY.get(i).getY();
while (i < pointSet.size() && Utils.feq(pointsSortY.get(i).getY(), currY)) {
edgesH.put(pointsSortY.get(i), pointsSortY.get(i + 1));
edgesH.put(pointsSortY.get(i + 1), pointsSortY.get(i));
i += 2;
}
}
i = 0;
while (i < pointSet.size()) {
float currX = (float) pointsSortX.get(i).getX();
while (i < pointSet.size() && Utils.feq(pointsSortX.get(i).getX(), currX)) {
edgesV.put(pointsSortX.get(i), pointsSortX.get(i + 1));
edgesV.put(pointsSortX.get(i + 1), pointsSortX.get(i));
i += 2;
}
}
// Get all the polygons
List<List<PolygonVertex>> polygons = new ArrayList<>();
Point2D nextVertex;
while (!edgesH.isEmpty()) {
ArrayList<PolygonVertex> polygon = new ArrayList<>();
Point2D first = edgesH.keySet().iterator().next();
polygon.add(new PolygonVertex(first, Direction.HORIZONTAL));
edgesH.remove(first);
while (true) {
PolygonVertex curr = polygon.get(polygon.size() - 1);
PolygonVertex lastAddedVertex;
if (curr.direction == Direction.HORIZONTAL) {
nextVertex = edgesV.get(curr.point);
edgesV.remove(curr.point);
lastAddedVertex = new PolygonVertex(nextVertex, Direction.VERTICAL);
polygon.add(lastAddedVertex);
} else {
nextVertex = edgesH.get(curr.point);
edgesH.remove(curr.point);
lastAddedVertex = new PolygonVertex(nextVertex, Direction.HORIZONTAL);
polygon.add(lastAddedVertex);
}
if (lastAddedVertex.equals(polygon.get(0))) {
// closed polygon
polygon.remove(polygon.size() - 1);
break;
}
}
for (PolygonVertex vertex : polygon) {
edgesH.remove(vertex.point);
edgesV.remove(vertex.point);
}
polygons.add(polygon);
}
// calculate grid-aligned minimum area rectangles for each found polygon
for (List<PolygonVertex> poly : polygons) {
float top = java.lang.Float.MAX_VALUE;
float left = java.lang.Float.MAX_VALUE;
float bottom = java.lang.Float.MIN_VALUE;
float right = java.lang.Float.MIN_VALUE;
for (PolygonVertex pt : poly) {
top = (float) Math.min(top, pt.point.getY());
left = (float) Math.min(left, pt.point.getX());
bottom = (float) Math.max(bottom, pt.point.getY());
right = (float) Math.max(right, pt.point.getX());
}
rectangles.add(new Rectangle(top, left, right - left, bottom - top));
}
return rectangles;
}
private static final Comparator<Point2D> X_FIRST_POINT_COMPARATOR = new Comparator<Point2D>() {
@Override
public int compare(Point2D arg0, Point2D arg1) {
int rv = 0;
float arg0X = Utils.round(arg0.getX(), 2);
float arg0Y = Utils.round(arg0.getY(), 2);
float arg1X = Utils.round(arg1.getX(), 2);
float arg1Y = Utils.round(arg1.getY(), 2);
if (arg0X > arg1X) {
rv = 1;
} else if (arg0X < arg1X) {
rv = -1;
} else if (arg0Y > arg1Y) {
rv = 1;
} else if (arg0Y < arg1Y) {
rv = -1;
}
return rv;
}
};
private static final Comparator<Point2D> POINT_COMPARATOR = new Comparator<Point2D>() {
@Override
public int compare(Point2D arg0, Point2D arg1) {
int rv = 0;
float arg0X = Utils.round(arg0.getX(), 2);
float arg0Y = Utils.round(arg0.getY(), 2);
float arg1X = Utils.round(arg1.getX(), 2);
float arg1Y = Utils.round(arg1.getY(), 2);
if (arg0Y > arg1Y) {
rv = 1;
} else if (arg0Y < arg1Y) {
rv = -1;
} else if (arg0X > arg1X) {
rv = 1;
} else if (arg0X < arg1X) {
rv = -1;
}
return rv;
}
};
private enum Direction {
HORIZONTAL,
VERTICAL
}
static class PolygonVertex {
Point2D point;
Direction direction;
public PolygonVertex(Point2D point, Direction direction) {
this.direction = direction;
this.point = point;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (!(other instanceof PolygonVertex)) {
return false;
}
return this.point.equals(((PolygonVertex) other).point);
}
@Override
public int hashCode() {
return this.point.hashCode();
}
@Override
public String toString() {
return String.format("%s[point=%s,direction=%s]", this.getClass().getName(), this.point.toString(), this.direction.toString());
}
}
}

View File

@ -0,0 +1,140 @@
/*
* CohenSutherland.java
* --------------------
* (c) 2007 by Intevation GmbH
*
* @author Sascha L. Teichmann (teichmann@intevation.de)
* @author Ludwig Reiter (ludwig@intevation.de)
*
* This program is free software under the LGPL (>=v2.1)
* Read the file LICENSE.txt coming with the sources for details.
*/
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
import java.awt.geom.Line2D;
import java.awt.geom.Rectangle2D;
/**
* Implements the well known Cohen Sutherland line
* clipping algorithm (line against clip rectangle).
*/
@SuppressWarnings("all")
public final class CohenSutherlandClipping
{
private double xMin;
private double yMin;
private double xMax;
private double yMax;
/**
* Creates a Cohen Sutherland clipper with clip rect (0, 0, 0, 0).
*/
public CohenSutherlandClipping() {
}
/**
* Creates a Cohen Sutherland clipper with the given clip rectangle.
* @param clip the clip rectangle to use
*/
public CohenSutherlandClipping(Rectangle2D clip) {
setClip(clip);
}
/**
* Sets the clip rectangle.
* @param clip the clip rectangle
*/
public void setClip(Rectangle2D clip) {
xMin = clip.getX();
xMax = xMin + clip.getWidth();
yMin = clip.getY();
yMax = yMin + clip.getHeight();
}
private static final int INSIDE = 0;
private static final int LEFT = 1;
private static final int RIGHT = 2;
private static final int BOTTOM = 4;
private static final int TOP = 8;
private final int regionCode(double x, double y) {
int code = x < xMin
? LEFT
: x > xMax
? RIGHT
: INSIDE;
if (y < yMin) code |= BOTTOM;
else if (y > yMax) code |= TOP;
return code;
}
/**
* Clips a given line against the clip rectangle.
* The modification (if needed) is done in place.
* @param line the line to clip
* @return true if line is clipped, false if line is
* totally outside the clip rect.
*/
public boolean clip(Line2D.Float line) {
double p1x = line.getX1();
double p1y = line.getY1();
double p2x = line.getX2();
double p2y = line.getY2();
double qx = 0d;
double qy = 0d;
boolean vertical = p1x == p2x;
double slope = vertical
? 0d
: (p2y-p1y)/(p2x-p1x);
int c1 = regionCode(p1x, p1y);
int c2 = regionCode(p2x, p2y);
while (c1 != INSIDE || c2 != INSIDE) {
if ((c1 & c2) != INSIDE)
return false;
int c = c1 == INSIDE ? c2 : c1;
if ((c & LEFT) != INSIDE) {
qx = xMin;
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
}
else if ((c & RIGHT) != INSIDE) {
qx = xMax;
qy = (Utils.feq(qx, p1x) ? 0 : qx-p1x)*slope + p1y;
}
else if ((c & BOTTOM) != INSIDE) {
qy = yMin;
qx = vertical
? p1x
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
}
else if ((c & TOP) != INSIDE) {
qy = yMax;
qx = vertical
? p1x
: (Utils.feq(qy, p1y) ? 0 : qy-p1y)/slope + p1x;
}
if (c == c1) {
p1x = qx;
p1y = qy;
c1 = regionCode(p1x, p1y);
}
else {
p2x = qx;
p2y = qy;
c2 = regionCode(p2x, p2y);
}
}
line.setLine(p1x, p1y, p2x, p2y);
return true;
}
}
// end of file

View File

@ -0,0 +1,35 @@
package com.iqser.red.service.redaction.v1.server.tableextraction.utils;
import java.math.BigDecimal;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@SuppressWarnings("all")
public class Utils {
private final static float EPSILON = 0.1f;
public static boolean feq(double f1, double f2) {
return (Math.abs(f1 - f2) < EPSILON);
}
public static float round(double d, int decimalPlace) {
BigDecimal bd = BigDecimal.valueOf(d);
bd = bd.setScale(decimalPlace, BigDecimal.ROUND_HALF_UP);
return bd.floatValue();
}
public static <T> void sort(List<T> list, Comparator<? super T> comparator) {
try {
Collections.sort(list, comparator);
} catch (IllegalArgumentException e){
//TODO Figure out why this happens.
log.warn(e.getMessage());
}
}
}

View File

@ -0,0 +1,242 @@
package com.iqser.red.service.redaction.v1.server.visualization.service;
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.ADDRESS_CODE;
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NAME_CODE;
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.NO_REDACTION_INDICATOR;
import static com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService.VERTEBRATES_CODE;
import java.awt.Color;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class AnnotationHighlightService {
public void highlight(PDDocument document, Document classifiedDoc, boolean flatRedaction) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
PDPage pdPage = document.getPage(page - 1);
if (!flatRedaction) {
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
if (textBlock.getPage() != page) {
continue;
}
if (textBlock instanceof TextBlock) {
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTextBlock((TextBlock) textBlock, contentStream);
} else if (textBlock instanceof Table) {
textBlock.setClassification((i + 1) + "/" + paragraph.getPageBlocks().size());
visualizeTable((Table) textBlock, contentStream);
}
}
}
contentStream.close();
}
if (classifiedDoc.getEntities().get(page) == null) {
continue;
}
for (Entity entity : classifiedDoc.getEntities().get(page)) {
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
if (flatRedaction && !isRedactionType(entity)) {
continue;
}
for (TextPositionSequence textPositions : entityPositionSequence.getSequences()) {
float height = textPositions.getTextPositions().get(0).getHeightDir() + 2;
float posXInit;
float posXEnd;
float posYInit;
float posYEnd;
float[] quadPoints;
if (textPositions.getTextPositions().get(0).getRotation() == 90) {
posXEnd = textPositions.getTextPositions().get(0).getYDirAdj() + 2;
posXInit = textPositions.getTextPositions().get(0).getYDirAdj() - height;
posYInit = textPositions.getTextPositions().get(0).getXDirAdj();
posYEnd = textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getXDirAdj() - height + 2;
quadPoints = new float[]{posXInit, posYInit, posXInit, posYEnd + height + 2, posXEnd, posYInit, posXEnd, posYEnd + height + 2};
} else {
posXInit = textPositions.getTextPositions().get(0).getXDirAdj();
posXEnd = textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getXDirAdj() + textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getWidth() + 1;
posYInit = textPositions.getTextPositions().get(0).getPageHeight() - textPositions.getTextPositions().get(0).getYDirAdj();
posYEnd = textPositions.getTextPositions().get(0).getPageHeight() - textPositions.getTextPositions().get(textPositions.getTextPositions().size() - 1).getYDirAdj();
quadPoints = new float[]{posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};
}
List<PDAnnotation> annotations = pdPage.getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
highlight.constructAppearances();
PDRectangle position = new PDRectangle();
position.setLowerLeftX(posXInit);
position.setLowerLeftY(posYEnd);
position.setUpperRightX(posXEnd);
position.setUpperRightY(posYEnd + height);
highlight.setRectangle(position);
if (!flatRedaction) {
highlight.setAnnotationName(entityPositionSequence.getId().toString());
highlight.setTitlePopup(entityPositionSequence.getId().toString());
highlight.setContents(entity.getRedactionReason());
}
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor color;
if (flatRedaction) {
color = new PDColor(new float[]{0, 0, 0}, PDDeviceRGB.INSTANCE);
} else {
color = new PDColor(getColor(entity), PDDeviceRGB.INSTANCE);
}
highlight.setColor(color);
annotations.add(highlight);
}
}
}
}
}
private boolean isRedactionType(Entity entity) {
if (!entity.isRedaction()) {
return false;
}
if (entity.getType().equals(ADDRESS_CODE)) {
return true;
}
if (entity.getType().equals(NAME_CODE)) {
return true;
}
return false;
}
private float[] getColor(Entity entity) {
if (!entity.isRedaction()) {
return new float[]{0.627f, 0.627f, 0.627f};
}
if (entity.getType().equals(VERTEBRATES_CODE)) {
return new float[]{0, 1, 0};
}
if (entity.getType().equals(ADDRESS_CODE)) {
return new float[]{0, 1, 1};
}
if (entity.getType().equals(NAME_CODE)) {
return new float[]{1, 1, 0};
}
if (entity.getType().equals(NO_REDACTION_INDICATOR)) {
return new float[]{1, 0.502f, 0};
}
return null;
}
private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException {
contentStream.setStrokingColor(Color.LIGHT_GRAY);
contentStream.setLineWidth(0.5f);
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
contentStream.stroke();
if (textBlock.getClassification() != null) {
contentStream.beginText();
contentStream.setNonStrokingColor(Color.DARK_GRAY);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
contentStream.showText(textBlock.getClassification());
contentStream.endText();
}
}
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
for (List<Cell> row : table.getRows()) {
for (Cell cell : row) {
if (cell != null) {
contentStream.setLineWidth(0.5f);
contentStream.setStrokingColor(Color.CYAN);
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell.getHeight());
contentStream.stroke();
// contentStream.setStrokingColor(Color.GREEN);
// for (TextBlock textBlock : cell.getTextBlocks()) {
// contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
// contentStream.stroke();
// }
}
}
}
if (table.getClassification() != null) {
contentStream.beginText();
contentStream.setNonStrokingColor(Color.DARK_GRAY);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 8f);
contentStream.newLineAtOffset(table.getMinX(), table.getMinY());
contentStream.showText(table.getClassification());
contentStream.endText();
}
}
}

View File

@ -0,0 +1,68 @@
package com.iqser.red.service.redaction.v1.server.visualization.service;
import java.awt.image.BufferedImage;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class PdfFlattenService {
private final RedactionServiceSettings settings;
public PDDocument flattenPDF(PDDocument sourceDoc) throws IOException {
PDDocument destDoc = new PDDocument();
PDFRenderer pdfRenderer = new PDFRenderer(sourceDoc);
final int pageCount = sourceDoc.getDocumentCatalog().getPages().getCount();
log.info(pageCount + " page" + (pageCount == 1 ? "" : "s") + " to flatten.");
for (int i = 0; i < pageCount; i += 1) {
log.info("Flattening page " + (i + 1) + " of " + pageCount + "...");
BufferedImage img = pdfRenderer.renderImageWithDPI(i, settings.getFlattenImageDpi(), ImageType.RGB);
log.info("Image rendered in memory (" + img.getWidth() + "x" + img.getHeight() + " " + settings.getFlattenImageDpi() + "DPI). Adding to PDF...");
PDPage imagePage = new PDPage(new PDRectangle(img.getWidth(), img.getHeight()));
destDoc.addPage(imagePage);
PDImageXObject imgObj = LosslessFactory.createFromImage(destDoc, img);
PDPageContentStream imagePageContentStream = new PDPageContentStream(destDoc, imagePage);
imagePageContentStream.drawImage(imgObj, 0, 0);
log.info("Image added successfully.");
imagePageContentStream.close();
img.flush();
}
log.info("New flattened PDF created in memory.");
sourceDoc.close();
return destDoc;
}
}

View File

@ -0,0 +1,146 @@
package com.iqser.red.service.redaction.v1.server.visualization.service;
import java.awt.Color;
import java.io.IOException;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class PdfVisualisationService {
public void visualizeParagraphs(Document classifiedDoc, PDDocument document) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
PDPage pdPage = document.getPage(page - 1);
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
for(Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
if (textBlock.getPage() != page) {
continue;
}
if (textBlock instanceof TextBlock) {
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
visualizeTextBlock((TextBlock) textBlock, contentStream);
} else if (textBlock instanceof Table) {
textBlock.setClassification((i+1) + "/" + paragraph.getPageBlocks().size());
visualizeTable((Table) textBlock, contentStream);
}
}
}
contentStream.close();
}
}
public void visualizeClassifications(Document classifiedDoc, PDDocument document) throws IOException {
for (int page = 1; page <= document.getNumberOfPages(); page++) {
Page analyzedPage = classifiedDoc.getPages().get(page - 1);
PDPage pdPage = document.getPage(page - 1);
PDPageContentStream contentStream = new PDPageContentStream(document, pdPage, PDPageContentStream.AppendMode.APPEND, true);
for (AbstractTextContainer textBlock : analyzedPage.getTextBlocks()) {
if (textBlock == null) {
continue;
}
if (textBlock instanceof TextBlock) {
visualizeTextBlock((TextBlock) textBlock, contentStream);
} else if (textBlock instanceof Table) {
visualizeTable((Table) textBlock, contentStream);
}
}
contentStream.setStrokingColor(Color.YELLOW);
contentStream.addRect((float) analyzedPage.getBodyTextFrame().getX(), (float) analyzedPage.getBodyTextFrame().getY(), (float) analyzedPage.getBodyTextFrame().getWidth(), (float) analyzedPage.getBodyTextFrame().getHeight());
contentStream.stroke();
contentStream.close();
}
}
private void visualizeTextBlock(TextBlock textBlock, PDPageContentStream contentStream) throws IOException {
contentStream.setStrokingColor(Color.RED);
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
contentStream.stroke();
if (textBlock.getClassification() != null) {
contentStream.beginText();
contentStream.setNonStrokingColor(Color.BLUE);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 12f);
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
contentStream.showText(textBlock.getClassification());
contentStream.endText();
}
}
private void visualizeTable(Table table, PDPageContentStream contentStream) throws IOException {
for (List<Cell> row : table.getRows()) {
for (Cell cell : row) {
if (cell != null) {
contentStream.setStrokingColor(Color.BLUE);
contentStream.addRect((float) cell.getX(), (float) cell.getY(), (float) cell.getWidth(), (float) cell.getHeight());
contentStream.stroke();
contentStream.setStrokingColor(Color.GREEN);
for (TextBlock textBlock : cell.getTextBlocks()) {
contentStream.addRect(textBlock.getMinX(), textBlock.getMinY(), textBlock.getWidth(), textBlock.getHeight());
contentStream.stroke();
}
}
}
}
if (table.getClassification() != null) {
contentStream.beginText();
contentStream.setNonStrokingColor(Color.BLUE);
contentStream.setFont(PDType1Font.TIMES_ROMAN, 12f);
contentStream.newLineAtOffset(table.getMinX(), table.getMinY());
contentStream.showText(table.getClassification());
contentStream.endText();
}
}
}

View File

@ -0,0 +1,22 @@
info:
description: Redaction Service Server V1
server:
port: 8080
spring:
profiles:
active: kubernetes
platform.multi-tenancy:
enabled: ${multitenancy.enabled:false}
tenantFilter:
urlPatterns: /redact
urlPatternsToIgnore:
management:
endpoint:
metrics.enabled: ${monitoring.enabled:false}
prometheus.enabled: ${monitoring.enabled:false}
endpoints.web.exposure.include: prometheus, health
metrics.export.prometheus.enabled: ${monitoring.enabled:false}

View File

@ -0,0 +1,3 @@
===================================
Redaction Service Server V1
===================================

View File

@ -0,0 +1,11 @@
spring:
application:
name: redaction-service-v1
management:
endpoints:
web:
base-path: /
path-mapping:
health: "health"

View File

@ -0,0 +1,796 @@
Aquatic BioSystems Inc, Fort Collins, Colorado, USA
Aquatic BioSystems, Inc., Ft. Collins, Colorado, USA.
Biological Research Laboratory (BRL), Füllinsdorf, Switzerland.
Biological Serviced Section, Alderley Park, Macclesfield, Cheshire
Harlan Laboratories Ltd., Itingen,
Jealotts Hill, International Research Station, Bracknell,
Jealotts Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom
Jealotts Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom.
Obtained from P. Hohler, trout breeding station Zeiningen, CH-4314 Zeiningen, Switzerland
P. Hohler, Forellenzucht Zeiningen, CH-4314 Zeiningen Switzerland
P.Hohler trout breeding station Zeiningen, CH-4314 Zeiningen, Swit-zerland, and held in the test facility for more than 2 weeks
RCC Biotechnology & Animal Breeding Division, Füllinsdorf,
RCC Biotechnology & Animal Breeding Division, Füllinsdorf, Switzerland
Sequani Limited, Ledbury, United Kingdom, BFI0274
Springborn Laboratories Inc., 790 Main St., Wareham, Massachusetts, 02571-1075, USA.
Syngenta, Jealotts Hill, International Research Station, Bracknell, RG42 6EY, United Kingdom
adama max rudong 2014 - huifeng
animal metabolism, dietary exposure, product safety, research and development, ciba-geigy limited, basle, switzerland
aquatic bio systems, inc., fort collins, colorado.
aquatic bioassay laboratory, baton rouge, louisiana
arysta lifescience north america, llc, cary, nc, usa
arysta lifescience sas, noguères, france
bayer crop-science
bayer crop-science ag
bc potter, rosedean, woodhurst, cambridgeshire, england
biospheric inc., rockville, usa
birds obtained from m & m quail farm, 4090 campbell road, gillsville, ga 30543 u.s.a
brixham environmental laboratory, astrazeneca uk limited, brixham, uk
brixham environmental laboratory, brixham, uk
brixham environmental laboratory, brixham, united kingdom
brood stock maintained at springborn laboratories
buffalo creek quail farm, po box 579, ellerbe, nc
bybrook bass hatchery, connecticut
c.i.t, miserey, france
celsius property b.v., amsterdam, netherlands
central toxicology laboratory
central toxicology laboratory (ctl), cheshire, united kingdom
central toxicology laboratory (ctl), cheshire, united kingdom, hr2464
central toxicology laboratory, alderley park, macclesfield, cheshire uk
centre international de toxicologie (c.i.t.), miserey, 27005 evreux, france
charles river
charles river (uk) limited
charles river (uk) limited, margate, kent, ct9 4lt, england.
charles river aquaria, margate, uk
charles river breeding laboratories, raleigh, nc, usa
charles river deutschland gmbh, stolzenseeweg 32-36, d-88353 kisslegg / germany
charles river france
charles river laboratories edinburgh ltd, tranent, eh33 2ne
charles river laboratories edinburgh ltd, tranent, eh33 2ne, uk
charles river laboratories france, bp 0109, f-69592 larbresle
charles river laboratories, edinburgh, united kingdom
charles river laboratories, edinburgh, united kingdom, 38674
charles river laboratories, portage, mi
charles river laboratories, raleigh, nc, usa
charles river uk limited, margate, kent.
charles river, 76410, saint-aubin-les-elbeuf, france
cheshire, united kingdom,
china agricultural university, no.2, yuan ming yuan west road, haidian district, beijing, 100193, p.r. china
ciba-geigy agricultural division, 410 swing road, p.o. box 18300, greensboro, north carolina 27419
ciba-geigy basel, oekotoxikologie, basel, switzerland, 953609
ciba-geigy corp. environmental health centre, farmington, ct, usa.
ciba-geigy corp., greensboro, us
ciba-geigy corp., vero beach, us
ciba-geigy corporation agricultural division, environmental health centre (ehc), 400 farmington avenue, farmington, ct 06032
ciba-geigy limited, animal production unit, basle, switzerland.
ciba-geigy limited, animal production unit, stein, switzerland.
ciba-geigy limited, animal production, 4332 stein, switzerland
ciba-geigy limited, basle, switzerland, toxicology ii. laboratories, animal facilities of toxicology ii. laboratories of residue analysis unit, agricultural division ciba-geigy limited, basle.
ciba-geigy limited, metabolism and ecology department, r&d plant protection agricultural division, basle, switzerland
ciba-geigy limited, plant protection division, ch-4002 basle, switzerland
ciba-geigy limited, research and development department, product safety, safety evaluation, basle, switzerland.
ciba-geigy limited, tierfarm, 4334 sisseln, switzerland
ciba-geigy ltd. ch-4002 basle, switzerland
ciba-geigy ltd., basel, switzerland
ciba-geigy ltd., basel, switzerland,
ciba-geigy ltd., basle, ch
ciba-geigy ltd., genetic toxicology, basel, switzerland
ciba-geigy,greensboro, united states
citoxlab france
covance laboratories inc.9200 leesburg pike, vienna, virginia 22182
covance laboratories limited, harrogate, uk
covance laboratories ltd., north yorkshire, uk.
covance laboratories, harrogate, united kingdom
cultures maintained at wildlife international ltd. laboratories
division of toxicology, institute of environmental toxicology
eba inc.
eba inc., snow camp, usa
eg&g bionomics
epl inc., research triangle
eurofins agroscience services chem sas, vergèze, france
experimental toxicology, ciba-geigy limited, 4332 stein, switzerland
fine organics limited, seal sands, middlesbrough ts2 1ub, uk
genetic toxicology, novartis crop protection ag, ch-4002 basel, switzerland
granja perrone, são bernardo do campo - sp brazil
harlan (ad zeist, the netherlands).
harlan france, zi le malcourlet, 03800 gannat / france
harlan laboratories b.v. kreuzelweg 53 5961 nm horst / the netherlands
harlan laboratories b.v. postbus 6174 5960 ad horst / the netherlands
harlan laboratories b.v., kreuzelweg 53, 5961 nm horst / the netherlands, postbus 6174, 5960 ad horst / the netherlands
harlan laboratories ltd., itingen, switzerland, d24665
harlan sprague dawley, inc., madison, wi.
harlan uk, shaws farm, blackthorn, bicester, oxon, ox6 0tp
harlan winkelmann gmbh, d-33178 borchen, germany
hazleton wisconsin
hazleton wisconsin, inc.
hazleton wisconsin, inc., 3301 kinsman boulevard, madison, wisconsin
houghton springs fish farm, dorset, uk
huntingdon research centre ltd, cambridgeshire, england
huntingdon research centre ltd., huntingdon, united kingdom
huntingdon research centre ltd., p.o. box 2, huntingdon, cambridgeshire, pe18 6es, england
ibc manufacturing co., memphis, tn, usa
j. cole, the county game farms, ashford, kent, england
jealotts hill international, bracknell, berkshire, united kingdom
jiangsu huifeng agrochemicals co. ltd.
kleintierfarm madoerin ag, ch-4414 fuellinsdorf
m & m quail farm, 4090 campbell road, gillsville, ga 30543, u.s.a.
maryland exotic birds of pasadena, maryland usa
max (rudong) chemical co ltd
morse laboratories llc, 1525 fulton avenue, sacramento, ca 95825 usa
mount lassen trout farms, california
mr j. coles, the country game farms, ashford, kent, england.
mt. lassen trout farm, rt. 5, box 36, red bluff, california 98080
nichols rabbitry inc. ; lumberton, tx
nichols rabbitry inc; lumberton, tx., us
notox b.v., hertogenbosch, netherlands
novartis crop protection ag, basel, switzerland ciba-geigy ltd., basel, switzerland
novartis crop protection ag, product portfolio management, environmental safety, ecotoxicology, ch-4002 basel, switzerland
organics limited, middlesbrough, united kingdom
osage catfish./box 222/missouri 65065/usa
osage catfisheries inc., lake road 54-56, route 4, box 1500, osage beach, mo65065, usa
p. hohler / ch-4341 zeiningen, switzerland
p. hohler, trout breeding station zeiningen, switzerland
park, nc, usa
plant protection division ciba-geigy limited basle, switzerland. genetic toxicology cibageigy limited basle, switzerland
product safety laboratories, east brunswick, new jersey 08816-3206, usa
product safety labs, east brunswick, usa
rcc - biological research laboratories, füllinsdorf, switzerland,
rcc cytotest cell research gmbh, rossdorf, germany
rcc ltd, environmental chemistry & pharmanalytics, ch-4452 itingen / switzerland
rcc ltd, itingen, switzerland
rcc ltd, laboratory animal services, wölferstrasse 4, 4414 füllinsdorf, switzerland
rcc ltd., itingen, switzerland,
rcc ltd., itingen, switzerland, b18966, t009636-06
rcc ltd., laboratory animal services, ch-4414 füllinsdorf, switzerland
rcc ltd., toxicology, wölferstrasse 4, ch-4414 füllinsdorf, switzerland
rcc ltd., zelgliweg 1, 4452 itingen, switzerland
rcc, cytotest cell research gmbh (rcc-ccr), in den leppsteinwiesen19, 64380 rossdorf, germany
research department, pharmaceuticals division, ciba-geigy corporation, 556 morris avenue, summit, new jersey 07901
ricerca, inc., ohio, usa
rodent breeding unit, alderley park, macclesfield, uk
sequani limited, bromyard road, ledbury, herefordshire, hr8 1lh, united kingdom
sequani limited, ledbury, united kingdom
sequani limited, ledbury, united kingdom,
sipcamadvan, durham, nc, usa
smithers viscient, 790 main street, wareham, ma 02571-1037 usa
smithers viscient, 790 main street, wareham, ma, usa
smithers viscient, 790 main street, wareham, massachusetts 02571 usa
smithers viscient, 790 main street, wareham, massachusetts 02571-1037, usa
source tierfarm sisseln, switzerland
southwest bio-labs, inc.401 n. 17th street, suite 11, las cruces, nm 88005 usa.
spring creek trout hatchery, lewistown, montana, usa
springborn (europe) ag, horn, switzerland
springborn laboratories inc., wareham, usa
springborn laboratories, inc. 790 main street wareham, massachusetts 02571
springborn laboratories, inc. environmental sciences division, 790 main street, wareham, 02571, usa massachusetts
springborn laboratories, inc.,
springborn laboratories, inc., health and environmental sciences, 790 main street, wareham, massachusetts, 02571-1075, usa
springborn life sciences inc.,
springborn smithers laboratories, wareham, usa
stillmeadow inc. study number 9062-05,
stillmeadow inc., sugar land, united states,
stillmeadow inc., sugarland tx, usa
stillmeadow inc., sugarland tx, usa, 8065-04 8321-03
stillmeadow, inc, 12852 park one drive, sugar land, tx 77478, us
stillmeadow, inc., 12852 park one drive, sugar land, tx 77478, usa
syngenta - jealotts hill, bracknell, united kingdom
syngenta -jealotts hill international research centre, uk
syngenta central toxicology laboratory, alderley park, macclesfield, cheshire, uk
syngenta crop protection, llc, greensboro, nc, usa
syngenta crop protection, llc, greensboro, usa
syngenta crop protection, monthey, switzerland
syngenta ctl, alderley park, macclesfield, cheshire, sk10 4tj, uk
syngenta jealotts hill international, bracknell, berkshire, united kingdom
syngenta, jealotts hill, international research station, bracknell, rg42 6ey, united kingdom
texas animal specialties, humble, tx
texas animal specialties, humble, tx, us
toxigeneticsinc. decatur, il, us
uk. charles river
veterinary health research pty ltd, nsw, australia
vischim srl, c/o lewis & harrison, llc, washington, dc, usa
vischim srl, milano, italy
wil research laboratories, llc, 1407 george road.ashland, oh, usa
wil research laboratories, llc, ashland, oh, usa
wil research laboratories, llc, ashland, oh, usa,
wil research, 1407 george road, ashland, oh, 44805-8946, usa
wil research, llc, 1407 george road, ashland, oh 44805-8946, usa
wildlife international a division of eag inc. 8598 commerce drive easton, md 21601
wildlife international ltd. cultures, 8651 brooks drive, easton, maryland 21601
wildlife international ltd., 8598 commerce drive, easton, maryland 21601, usa
wildlife international ltd., 8598 commerce drive, maryland 21601, usa
wildlife international ltd., easton md, usa
wildlife international ltd., easton, maryland 21601, usa
wildlife international ltd., easton, usa
wildlife international ltd., maryland, us
wildlife international ltd., maryland, usa
wildlife international, 8598 commerce drive, easton, md 21601 usa
wildlife international, a division of eag inc., 8598 commerce drive, easton, md 21601 usa
wise d.r. & wise r.e., monkfield, bourn, cambridgeshire, england
zeneca agrochemicals, jealotts hill, united kingdom
zentralinstitut fur versuchstierzucht gmbh, hannover, germany",
Syngenta Ltd., Jealotts Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK.
Sequani Limited, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, UK.
Harlan Cytotest Cell Research GmbH (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
Harlan Laboratories Ltd, Itingen, Switzerland.
Bioassay Labor fuer biologische Analytik GmbH INF 515, 69120 Heidelberg, Germany
Syngenta Crop Protection Ltd.
Syngenta, Jealotts Hill, Bracknell, United Kingdom
Charles River Laboratories, Preclinical Services, Tranent (PCS-EDI) Edinburgh, EH33 2NE, UK
CXR Biosciences, 2, James Lindsay Place, Dundee Technopole, Dundee, DD1 5JJ, Scotland, UK
CiToxLAB Hungary Ltd. H-8200 Veszprém, Szabadságpuszta Hungary
Charles River, Tranent, Edinburgh, EH33 2NE, UK
Charles River Laboratories Edinburgh Ltd., Tranent, Edinburgh, EH33 2NE, UK
BASF SE; Ludwigshafen/Rhein; Germany Fed.Rep.
Leatherhead Food Research (LFR), Molecular Sciences Department, Randalls Road, Leatherhead, Surrey, KT22 7RY, UK
Syngenta, Jealotts Hill, Bracknell, United Kingdom
Department of Veterinary & Biomedical Sciences, 101 Life Sciences Building, Penn State University, University Park, PA 16802, USA
CiToxLAB Hungary Ltd., H-8200 Veszprém, Szabadságpuszta, Hungary
SafePharm Laboratories Ltd, Shardlow Business Park, Shardlow, Derbyshire, UK
Harlan Laboratories Ltd., Zelgliweg 1, 4452 Itingen, Switzerland
RCC, Cytotest Cell Research GmbH (RCC-CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
Harlan, Cytotest Cell Research GmbH (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
Harlan Laboratories Ltd. Zelgliweg 1, CH-4452 Itingen / Switzerland
Quotient Bioresearch (Rushden) Ltd., Pegasus Way, Crown Business Park, Rushden, Northamptonshire, NN10 6ER, UK
Charles River Laboratories Edinburgh, Ltd., Elphinstone Research Centre, Tranent, East Lothian, EH33 2NE, United Kingdom
CiToxLAB Hungary Ltd. H-8200 Veszprém, Szabadságpuszta, Hungary
Harlan Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf Germany
Charles River, Tranent, Edinburgh, EH32 2NE, UK
Charles River Laboratories Edinburgh Ltd, Tranent, Edinburgh, EH33 2NE, UK
Harlan Cytotest Cell Research GmbH, (Harlan CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
Charles River UK Limited, Margate, Kent, UK
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fuellinsdorf, Switzerland
Charles River (UK) Ltd., Margate, Kent, CT9 4LT, England
Charles River Ltd., Margate, Kent, United Kingdom
Charles River UK Ltd, Manston Road, Margate, Kent CT9 4LT, England, UK
Syngenta Crop Protection, Toxicology, 4332 Stein, Switzerland
Safepharm Laboratories Limited, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, United Kingdom
Sequani Ltd, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, United Kingdom
Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, SK10 4TJ, UK
Charles River UK
Department of Veterinary & Biomedical Sciences, Penn State University
Syngenta Ltd. Jealotts Hill International Research, Bracknell, Berks RG42 6EY
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
Novartis Crop Protection AG, Toxicology, 4332 Stein, Switzerland
BRL Biological Research Laboratories Ltd., Wölferstrasse 4, 4414 Füllinsdorf, Switzerland
B&K Universal Ltd, Grimston, Aldbrough, Hull, HU11 4QE, East Yorkshire, UK
B&K Universal Ltd, Grimston, Aldborough, Hull, UK
Nunc GmbH & Co. KG, 65203 Wiesbaden, Germany
Fluka, 89203 Neu-Ulm, Germany
MERCK, 64293 Darmstadt, Germany
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
Animal Production, Novartis Pharma AG, 4332 Stein, Switzerland
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fuellinsdorf, Switzerland.
SYSTAT Software, Inc., 501, Canal Boulevard, Suite C, Richmond, CA 94804, USA
Safepharm Laboratories Limited, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, United Kingdom
Charles River (UK) Limited, Margate, Kent, CT9 4LT, England
CXR Biosciences, 2 James Lindsay Place, Dundee Technopole, Dundee, DD1 5JJ, Scotland, UK
Granja Perrone, São Bernardo do Campo - SP Brazil
Harlan Sprague-Dawley, Inc. Houston/Texas
P. Hohler, trout breeding station Zeiningen, 4314 Zeiningen, Switzerland
Spring Creek trout hatchery, Lewistown, Montana, USA
Springborn laboratories culture facility
Springborn culture
University of Texas
Institute for Plant Physiology, University of Göttingen, 37073 Göttingen, Germany
Bayer CropScience AG, 40789 Monheim, Germany
Koppert B. V. Berkel en Rodenrijs, Nederland
Bio-Test Labor GmbH, Sagerheide, Germany
Ciba-Geigy
Ciba-Geigy Ltd.
Harlan Laboratories Ltd., Itingen, Switzerland, D24643
Springborn Laboratories Inc., Wareham, USA
Springborn Laboratories (Europe) AG
Syngenta Eurofins - GAB, Niefern Öschelbronn, Germany
Syngenta Eurofins Agroscience Services EcoChem GmbH, N-Osch., Germany
Novartis Crop Protection AG, Basel, CH
Springborn (Europe) AG, Horn, Switzerland
Springborn Smithers Laboratories (Europe) AG, Horn, Switzerland
Syngenta Crop Protection AG, Basel, Switzerland
GAB Biotechnologie GmbH, Niefern, Germany
BioChem Agrar, Gerichshain, Germany
AgroChemex Ltd, Manningtree, United Kingdom
Ciba-Geigy Ltd., Basel, Switzerland
Ciba-Geigy Muenchwilen AG, Muenchwilen, Switzerland
Novartis Crop Protection Münchwilen AG, Münchwilen, Switzerland
Novartis Crop Protection AG, Basel, Switzerland
Ciba-Geigy Muenchwilen AG, Muenchwilen, Switzerland
Charles River Laboratories, Research Models and Services Germany GmbH; Sandhofer Weg 7, 97633 Sulzfeld, Germany
Alderley Park
Alderley Park Swiss
Stillmeadow, Inc., 12852 Park One Drive, Sugar Land, TX 77478, USA
Texas Animal Specialties, Humble, TX
Nichols Rabbitry Inc. ; Lumberton, TX
Charles River Laboratories., Wilmington, MA
Charles River Laboratories Edinburgh Ltd., Elphinstone Research Centre, Tranent, East Lothian, EH33 2NE
Syngenta Crop Protection, Monthey, Switzerland
Syngenta Crop Protection, Münchwilen, Switzerland
Fine Organics Limited, Middlesbrough, United Kingdom
Fine Organics Limited, Seal Sands, Middlesbrough TS2 1UB, UK
Syngenta Crop Protection, Inc., Greensboro, USA
Syngenta Technology & Projects, Huddersfield, United Kingdom
Syngenta Biosciences Pvt. Ltd., Ilhas Goa, India
Syngenta - Process Hazards Section, Huddersfield, United Kingdom
Syngenta Walloon Agricultural Research Centre, Gembloux, Belgium , 21764
Syngenta Crop Protection, Münchwilen, Switzerland, 300052719
Syngenta Crop Protection Münchwilen AG, Münchwilen, Switzerland, 109747
Syngenta Crop Protection, Münchwilen, Switzerland, 300073294
Syngenta - Jealotts Hill, Bracknell, United Kingdom RCC Ltd., Itingen, Switzerland, B18977, T003446-06
Syngenta - Jealotts Hill, Bracknell, United Kingdom RCC Ltd., Itingen, Switzerland, B18966, T009636-06
RCC Cytotest Cell Research GmbH, Rossdorf, Germany, RCC 107662
Syngenta Syngenta - Jealotts Hill, Bracknell, United Kingdom,
RCC Cytotest Cell Research GmbH, Rossdorf, Germany
WIL Research Laboratories, LLC, Ashland, OH, USA
Charles River Laboratories, Edinburgh, United Kingdom, 36955
Syngenta Crop Protection AG, Basel, Switzerland Stillmeadow Inc., Sugarland TX, USA
Novartis Crop Protection Inc., Greensboro, USA
Syngenta - Jealotts Hill, Bracknell, United Kingdom
Eurofins - ADME Bioanalyses, Vergeze, France
BioChem GmbH, Cunnersdorf, Germany
Syngenta Syngenta Crop Protection, LLC, Greensboro, NC, USA
Syngenta Eurofins Agroscience Services Chem SAS, Vergèze, France
Syngenta Innovative Environmental Services, Witterswil, Switzerland
Ricerca Biosciences, LLC, Concord, OH, USA
Dr Knoell Consult GmbH, Mannheim, Germany
RCC Umweltchemie GmbH & Co. KG, Rossdorf, Germany
JSC International Ltd., Harrogate, United Kingdom
Wildlife International Ltd., Easton, Maryland 21601, USA
Syngenta Crop Protection, LLC, Greensboro, NC, USA
Novartis - Greensboro, Greensboro, USA
Smithers Viscient, 790 Main Street, Wareham, MA, USA
Syngenta Cambridge Environmental Assessments, United Kingdom
Ciba-Geigy Basel, Oekotoxikologie, Basel, Switzerland
RCC Ltd., Itingen, Switzerland
IBACON GmbH, Rossdorf, Germany
Envigo Research Limited, Shardlow, UK
Syngenta Crop Protection Münchwilen AG, Münchwilen, Switzerland
Ciba-Geigy Münchwilen AG, Münchwilen, Switzerland
Huntingdon Research Centre Ltd., Huntingdon, United Kingdom
Syngenta Technology & Projects, Huddersfield, United Kingdom
Harlan Laboratories Ltd., Shardlow, Derbyshire, UK
Dr. Specht & Partner Chem. Laboratorien GmbH, Hamburg, Germany
Institut Fresenius, Taunusstein, Germany
Syngenta - Jealotts Hill International, Bracknell, Berkshire, United Kingdom
Ciba-Geigy Corp., Greensboro, USA
CIP Chemisches Institut Pforzheim GmbH, Pforzheim, Germany
Charles River Laboratories Edinburgh Ltd, Tranent, EH33 2NE, UK
Hazleton Laboratories, Madison, USA
Eurofins BioPharma, Planegg, Germany, 150556
Syngenta Environ. Health Center, Farmington, USA
Centre International de Toxicologie C.I.T., Evreux, France
Toxalim, Research Centre in Food Toxicology, F- 31027 Toulouse, France
Harlan Laboratories Ltd., Shardlow, Derbyshire, UK
CRS GmbH GmbH, In den Leppsteinswies en 19, 64380 Rossdorf Germany
Environ. Health Center, Farmington, USA
Ciba-Geigy Corp., Summit, USA
Ciba-Geigy Basel, Genetische Toxikologie, Basel, Switzerland
Ciba-Geigy Ltd., Stein, Switzerland
Novartis Crop Protection AG, Stein, Switzerland
Central Toxicology Laboratory (CTL), Cheshire, United Kingdom
Sequani Limited, Bromyard Road, Ledbury, Herefordshire, HR8 1LH, United Kingdom
Brixham Environmental Laboratory, Brixham, United Kingdom
Springborn Smithers Laboratories, Horn, Switzerland
Huntingdon Research Centre, Cambridgeshire, United Kingdom
Mambo-Tox Ltd., Southampton, United Kingdom
MITOX Consultants, Amsterdam, Netherlands
Charles River Aquaria, Margate, UK
Brixham Environmental Laboratory, Brixham, UK
O.Keller, Mörschwil, CH
Huntingdon Life Sciences Ltd., Huntingdon, UK
BTL Bio-Test Labor GmbH, Sagerheide, Germany
Mambo-Tox Ltd., Southampton, UK
Mambo-Tox Ltd. 2 Venture Road, University Science Park, Southampton SO16 7NP, United Kingdom
BioChem GmbH, Germany
PK Nützlingszuchten, Welzheim, Germany
BioChem agrar, Germany
Sautter & Stepper, Ammerbuch, Germany
Koppert, The Netherlands
Kraut & Rubeen (Doris Haber), Zeilstraße 40, 64367 Mühltal-Frankenhausen, Germany
Springborn Laboratories (Europe) AG, Seestrasse 21, CH-9326 Horn, Switzerland
Biologische Bundesanstalt (BBA), Braunschweig, Germany
Institut für Biologische Analytik und Consulting, IBACON GmbH, Arheilger Weg 17, 64380 Rossdorf, Germany
Abandoned vineyard, Northern Italy
Syngenta Limited, Cheshire, United Kingdom
Agrochemex, Lawford, United Kingdom
Staphyt, Inchy en Artois, France
Dermal Technology Laboratory Ltd., Staffordshire, UK
Ciba Agriculture, Whittlesford, United Kingdom
Bayer Crop Science AG, Monheim, Germany
tier3 solutions GmbH, Leichlingen, Germany
Mambo-Tox. Ltd., Southampton, United Kingdom
Syngenta Crop Protection AG, Stein, Switzerland
Stillmeadow Inc, Sugar Land, TX 77478, US
Texas Animal Specialities, Humble, TX, US
CiToxLAB, 8200 Veszprem, Szabadsagpuszta, Hungary
Syngenta Ltd, Jealott's Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, United Kingdom
Stillmeadow, Inc, 12852 Park One Drive, Sugar Land
Syngenta Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, UK
Syngenta Limited, Alderley Park, Macclesfield, Cheshire, SK10 4TJ
Nichols Rabbitry Inc; Lumberton, TX., US
AgroChemex International Ltd, Aldhams Farm Research Station, Lawford, Essex, UK
Ciba Agriculture, Whittlesford, Cambridge, UK
Ricerca Inc., Department of Residue Analysis, Painesville OH, USA
Staphyt, 23 rue de Moeuvres, F-62860 Inchy en Artois, France
Dermal Technology Laboratory Ltd., Med IC4, Keele University Science and Business Park, Keele, Staffordshire, ST5 5NL, United Kingdom
Tier3 solutions GmbH, Kolberger Strasse 61-63 51381 Leverkusen, Germany
RCC Ltd, Environmental Chemistry & Pharmanalytics, CH-4452 Itingen / Switzerland
GAB Biotechnologie GmbH & IFU Umweltanalytik GmbH, Niefern-Öschelbronn, Germany
Biochem agrar, Germany
Bienenfarm Kern GmbH, Am Rehbacher Anger 10, 04249 Leipzig, Germany
Joaquin Cordero, Paseo de Colón No. 19, 41370 Cazalla (Sevilla), Spain
Mambo-Tox Ltd, Southampton, UK
GAB Biotechnologie GmbH & IFU Umweltanalytik GmbH, Niefern-Öschelbronn, Germany
Innovative Environmental Services (IES), Benkenstrasse 260, 4108 Witterswil, Switzerland
BioChem agrar GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
RCC - Biological Research Laboratories, Füllinsdorf, Switzerland, 859442
RCC Ltd., Toxicology, Wölferstrasse 4, CH-4414 Füllinsdorf, Switzerland
RCC Ltd., Laboratory Animal Services, CH-4414 Füllinsdorf, Switzerland
Charles River Laboratories France, BP 0109, F-69592 LArbresle
Charles River Deutschland GmbH, Stolzenseeweg 32-36, D-88353 Kisslegg / Germany
Syngenta CTL, Alderley Park, Macclesfield, Cheshire, SK10 4TJ, UK
Harlan UK, Shaws Farm, Blackthorn, Bicester, Oxon, OX6 0TP
Syngenta Central Toxicology Laboratory, UK
RCC Ltd., Toxicology, Wölferstrasse 4, CH- 4414 Füllinsdorf, Switzerland
RCC Ltd, Itingen, Switzerland
P. Hohler, trout breeding station Zeiningen, Switzerland
SAG, Institute for Plant Physiology, University of Göttingen, Germany
GAB Biotechnologie GmbH, Niefern-Öschelbronn, Germany
Beekeeper Mr. Berthold Nengel, Brückenstraße 12, 56348 Dahlheim, Germany
Syngenta Crop Protection, Münchwilen, Switzerland, CHMU140561
Syngenta Crop Protection, Münchwilen, Switzerland
Sequani Limited, Ledbury, United Kingdom, BFI0516
PTRL Europe, Ulm, Germany
SGS Institut Fresenius GmbH, Taunusstein, Germany
CEM Analytical Services Ltd (CEMAS) - Berkshire, UK
PTRL Europe, Ulm, Germany
Sequani Limited, Ledbury, United Kingdom
SGS Institut Fresenius GmbH, Taunusstein, Germany
CEM Analytical Services, UK
Eurofins Agroscience Services Chem SAS, Vergà ̈ze, France
Novartis Services AG, Basel, Switzerland
BSL Bioservice Scientific, Planegg, Germany
Envigo CRS GmbH, Rossdorf, Germany
Envigo CRS GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
BASF Ltd., Ludwigshafen, Germany
ALS Laboratory Group, Edmonton, Alberta, Canada
Syngenta Crop Protection, Inc., Greensboro, USA
ADME - Bioanalyses, Vergeze, France
Battelle UK Ltd., Ongar, United Kingdom
SGS Institut Fresenius GmbH
Novartis Agro GmbH, Frankfurt, Germany
Supervision & Test Center Pesticide Safety Evaluation, China
T. R. Wilbury Laboratories, Inc., Marblehead, MA, USA
CEMAS, North Ascot, United Kingdom
EAG Laboratories PTRL Europe GmbH, Germany
Syngenta Crop Protection Inc., USA
Syngenta Crop Protection Inc., 410 Swing Road, Greensboro, NC 27409, USA
Huntingdon Research Centre Ltd., UK
Huntingdon Research Centre Ltd., England
T.R. Wilbury Laboratories, Inc., USA
Wildlife International Ltd., USA
RCC Ltd, Switzerland
RCC Ltd. Environmental Chemistry & Pharmanalytics Division CH-4452 Itingen/Switzerland
Harlan Laboratories Ltd., Switzerland
CIBA-GEIGY Ltd., Switzerland
Syngenta Crop Protection AG, Basel , Switzerland
Syngenta Crop Protection LLC, Greensboro, USA
PTRL Europe GmbH, Helmholtzstr. 22, Science Park, Ulm, Germany
PTRL Europe GmbH, Germany
CEM Analytical Services Ltd (CEMAS), Imperial House, Oaklands Business Centre, Oaklands Park, Wokingham, Berkshire, RG41 2FD UK
SGS INSTITUT FRESENIUS GmbH
Syngenta Ltd, Jealotts Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK
Fraunhofer Institute for Molecular Biology and Applied Ecology, IME, Auf dem Aberg 1, 57392 Schmallenberg, Germany
Eurofins Agroscience Services Chem SAS, 75B, Avenue du Pascalet, 30310 Vergèze, France
Innovative Environmental Services (IES) Ltd, Benkenstrasse 260, 4108 Witterswil, Switzerland
BSL Bioservice, Scientific Laboratories GmbH, Behringstrasse 6/8, 82152 Planegg, Germany
RCC Ltd, Zelgliweg 1, CH-4452 Itingen, Switzerland
RCC Ltd, Laboratory Animal Services, CH-4414 Fuellinsdorf
Harlan Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
Ciba-Geigy Limited, Basel, Switzerland
BASF SE, Experimental Toxicology and Ecology, 67056 Ludwigshafen, Germany
Ciba-Geigy Limited, Animal Production, 4332 Stein, Switzerland
RCC Ltd. Biotechnology & Animal Breeding Division, 4414 Füllinsdorf, Switzerland
Syngenta Ltd. Jealotts Hill International Research Centre, Bracknell, Berks RG42 6EY
WIL Research Laboratories, LLC, 1407 George Road, Ashland, Ohio 44805-8946, USA
Charles River Laboratories Inc., Kingston, New York, USA
Syngenta, Jealotts Hill International Research Centre, Bracknell, United Kingdom
Battelle UK Ltd
D.R. & R.E. Wise, Monkfield, Bourn, Cambridgeshire, England
Wildlife International. 8598 Commerce Drive, Easton, MD 21601 USA
Maryland Exotic Birds of Pasadena, MD 21122
Mr D. R. Wise, Monkfield, Bourn, Cambridgeshire, England
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire, CB23 4NN, UK
J. Coles, The County Game Farms, Ashford, Kent, England
Osage Catfisheries, MO 65 065, USA
Supervision and Test Center for Pesticide Safety Evaluation and Quality Control, 600 Shenliao Road, Tiexi District, Shengyang 110141, Liaoning Province, P.R. China
Syngenta, Jealotts Hill International Research Centre, Bracknell, Berkshire, RG42 6EY
Harlan Laboratories Ltd., 4452 Itingen, Switzerland
Ciba-Geigy Ltd., Product Safety, Ecotoxicology, CH-4002 Basel, Switzerland
P. Hohler, CH-4314 Zeiningen
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire, CB23 4NN/UK
Wildlife International, A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
Novartis Crop Protection AG, Kanton Aargau, Switzerland.
RCC Ltd, CH-4452 Itingen, Switzerland
CEMAS, North Ascot, Berkshire, UK
Wilbury Laboratories Inc, 40 Doaks Lane, Marblehead, Massachusetts
P. Cummins Oyster Company, Pasadena, Maryland
Harlan Laboratories Ltd, Zelgliweg 1, 4452 Itingen/Switzerland
PK Nützlingszuchten, D-73642 Welzheim, Germany
Institut für Biologische Analytik und Consulting IBACON GmbH Arheilger Weg 17, 64380 Rossdorf, Germany
ABC Laboratories Inc., Analytical Chemistry and Field Services, 7200 E. ABC Lane, Columbia, Missouri
Ciba-Geigy Corporation, Farmington, CT, USA
Syngenta Ltd. Jealotts Hill, Bracknell, United Kingdom
Eurofins Agroscience Services EcoChem GmbH, N- Osch., Germany
Ciba-Geigy Limited, Animal Production Unit, Basle, Switzerland
Ciba-Geigy Limited, Basle, Switzerland
Charles River Laboratories, Raleigh, NC, USA
Charles River (UK) Limited
Harlan Sprague Dawley, Inc., Madison, WI
CIBA-GEIGY Limited, Animal Production, 4332 Stein, Switzerland
CIBA-GEIGY Limited, 4332 Stein, Switzerland
Kleintierfarm Madoerin AG, CH-4414 Fuellinsdorf
CIBA-GEIGY Limited, Tierfarm, 4334 Sisseln, Switzerland
Animal production, CIBA-GEIGY Limited, 4332 Stain/Switzerland
Environmental Health Centre (EHC), 400 Farmington Avenue, Farmington, CT 06032
Charles River Laboratories, Kingston, NY
Harlan (Ad Zeist, the Netherlands)
Animal Production CIBA-GEIGY Limited 4332 Stein / Switzerland
Tierfarm, Sisseln, Switzerland
Zen-tralinstitut fur Versuchstier-zucht GmbH, Hannover, Germany
Charles River Laboratories, Portage, MI
CIBA-GEIGY Limited, Basel, Switzerland
Novartis Crop Protection AG, CH-4002 Basel, Switzerland
RCC Ltd., Biotechnology and animal breeding division, Fullinsdorf, Switzerland
Tierfarm Sisseln, Switzerland
Charles River Breeding Laboratories, Raleigh, NC, USA
Ciba-Geigy Corporation, Plant Protection Division, Environmental Health Center, 400 Farmington Avenue, Farmington, Connecticut 06032, USA
Charles River Breeding Laboratories, Inc., Raleigh, North Carolina USA
Charles River, 76410, Saint-Aubin-les-Elbeuf, France
Charles River Laboratories, Inc., Raleigh, NC, USA
WIL Research Laboratories, LLC, 1407 George Road, Ashland, OH 44805-8946 USA
RCC Ltd., Biotechnology & Animal Breeding Division, 4414 Fȕllinsdorf, Switzerland
Alderley Park, Macclesfield, Cheshire UK
Rodent Breeding Unit, Alderley Park, Macclesfield, UK
Harlan Winkelmann GmbH, D-33178 Borchen, Germany
WIL Research Laboratories, LLC, 1407 George Road.Ashland, OH 44805-8946 USA
Centre dElevage Charles River
CIBA-GEIGY Limited, Experimental Toxicology, 4332 Stein/Switzerland
Centre International de Toxicologie (C.I.T.), Miserey, 27005 Evreux, France
Centre Internationale de Toxicologie, Miserey, 27005 Evreux, France
CIBA-GEIGY Limited, Basle, Switzerland
Harlan Laboratories Ltd, Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, UK
Envigo CRS GmbH GmbH, In den Leppsteinswiesen 19, 64380 Rossdorf Germany
Ciba-Geigy Ltd., Genetic Toxicology, Basel, Switzerland
Toxalim, Research Centre in Food Toxicology, F-31027 Toulouse, France
Ciba-Geigy Corp, Plant Protection Division, Environmental Health Center, 400 Farmington Avenue, Farmington, Connecticut 06032, USA
Charles River France
Charles River US
WIL Research, LLC, 1407 George Road, Ashland, OH 44805-8946, USA
Novartis Crop Protection AG, Toxicology, 4332 Stein Switzerland
Syngenta Crop Protection, Health Assessment 2 Stein, 4332 Stein, Switzerland
RCC Ltd. Biotechnology and Animal Breeding Division, 4414 Füllinsdorf, Switzerland
Genetic Toxicology, Novartis Crop Protection AG, CH-40002 Basel, Switzerland
RCC - Cytotest Cell Research GmbH In den Leppsteinswiesen 19, D- 64380 Roβdorf, Germany
RCC - Cytotest Cell Research GmbH, In den Leppsteinswiesen 19, D-64380 Rofldorf, Germany
Ciba-Geigy Limited, Animal production, 4332 Stein, Switzerland
RCC Ltd., Zelgliweg 1, 4452 Itingen, Switzerland
RCC Ltd, Laboratory Animal Services, Wölferstrasse 4, 4414 Füllinsdorf, Switzerland
RCC Ltd, Laboratory Animal Services, 4414 Füllinsdorf, Switzerland
CIBA-GEIGY Limited, Basle, Switzerland
RCC Cytotest Cell Research GmbH (RCC-CCR), In den Leppsteinswiesen 19, 64380 Rossdorf, Germany
RCC Cytotest cell Research GmbH, In den Leppsteinwiesen 19, Rossdorf, Germany
Centre International de Toxicologie (CIT)
C iba-Geigy
Ciba-Geigy, Greensboro, North Carolina
Ciba-Geigy Corp., Greensboro, United States
Ciba-Geigy Vero Beach Research Center, Florida, USA
Ciba-Geigy Corporation, Environ. Health Center, Farmington, United States
Ciba-Geigy GmbH, Frankfurt a.Main, Germany
Ciba-Geigy Corp., Greensboro, United States
Wise D.R. & Wise R.E., Monkfield, Bourn, Cambridgeshire, England
Mr J. Coles, The Country Game Farms, Ashford, Kent, England
Maryland Exotic Birds of Pasadena, Maryland USA
J. Cole, The County Game Farms, Ashford, Kent, England
BC Potter, Rosedean, Woodhurst, Cambridgeshire, England
M & M Quail Farm, 4090 Campbell Road, Gillsville, GA 30543, U.S.A
Wildlife International A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
M & M Quail Farm, 4090 Campbell Road, Gillsville, GA 30543 U.S.A
China Agricultural University, No.2, Yuan Ming Yuan West Road, Haidian District, Beijing, 100193, P.R. China
Mt. Lassen Trout Farm, Rt. 5, Box 36, Red Bluff, California 98080
Bybrook Bass Hatchery, Connecticut
CIBA-GEIGY Ltd. CH-4002 Basle, Switzerland
Wildlife International Ltd. Cultures, 8651 Brooks Drive, Easton, Maryland 21601
Aquatic bioassay laboratory, Baton Rouge, Louisiana
P. Hohler/ CH-4314 Zeiningen, Switzerland
Houghton Springs Fish Farm, Dorset, UK
Cultures maintained at Wildlife International Ltd. Laboratories
Aquatic Bio Systems, Inc., Fort Collins, Colorado
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571- 1037 USA
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571 USA
Springborn laboratories
Syngenta Ltd. Jealotts Hill International Research Centre Bracknell, Berkshire, RG42 6EY United Kingdom
Wildlife International Ltd., Maryland, USA
Wildlife International Ltd., Easton, USA
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571 USA
Brixham Environmental Laboratory, AstraZeneca UK Limited, Brixham, UK
Springborn Laboratories Inc., Massachusetts 02571, USA
Smithers Viscient, 790 Main Street, Wareham, MA 02571-1037, USA
Wildlife International Ltd, Easton, MD, USA
Wildlife International A Division of EAG Inc. 8598 Commerce Drive Easton, MD 21601 USA
Smithers Viscient, 790 Main Street, Wareham, MA 02571-1037 USA
Ciba-Geigy Corporation, Post Office Box 18300, Greensboro, NC 27419, USA
Chesapeake Cultures, Hayes, Virginia
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571-1037 USA
University of Sheffield, UK
Blue Frog Scientific Limited, Scott House, South St. Andrew Street, Edinburgh, EH2 2AZ, UK
MBL Aquaculture, Sarasota, Florida
Bayer AG (Pflanzenschutz Umweltforschung, Institut für Oekobiologie, D- 5090 Leverkusen)
Pflanzenphysiologisches Institut University, Nikolausberger Weg 180, D-3400 Göttingen, Germany
Envigo Research Limited Shardlow Business Park, Shardlow, Derbyshire, DE72 2GD, UK
Smithers Viscient, 790 Main Street, Wareham, Massachusetts 02571- 1037 USA
Wildlife International Ltd., Easton, Maryland, USA
David Francis, W.J. Mead Apiarist Supplies, Fowlmere, Cambridgshire
RCC AG, Itingen, Switzerland
Blades Biological Ltd, United Kingdom
ECT Oekotoxikologie GmbH, Germany
BioChem agrar, Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
RCC Umweltchemie AG, P.O. Box, CH-4452 Itingen/BL, Switzerland
RCC Ltd, Environmental Chemistry & Pharmanalytics Division, CH-4452 Itingen, Switzerland
BioChem agrar Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6 04827 Gerichshain, Germany
BioChem agrar, Labor für biologische und chemische, Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
Pan-Agricultural Labs, Inc. 32380 Avenue 10 Madera, CA 93638 USA
Syngenta AG. Basel. Switzerland
Deutsche Sammlung von Mikroorganismen und Zellkulturen GmbH, Inhoffenstraße 7 B, 38124 Braunschweig, Germany
CIBA-GEIGY Ltd., Product Safety, Ecotoxicology, CH-4002 Basel, Switzerland
Springborn Smithers Laboratories 790 Main Street Wareham, MA 02571-1037
Syngenta crop protection AG, Research Biological science, Disease control, Stein
Syngenta Biosciences Pvt. Ltd., Ilhas Goa, India
Syngenta Technology & Projects, Huddersfield, United Kingdom
Stillmeadow. Inc.. 12852 Park One Drive. Sugar Land. TX 77478. USA
Texas Animal Specialties. Humble. TX
Nichols Rabbitry Inc. ; Lumberton. TX
Charles River Laboratories.. Wilmington. MA
Charles River Laboratories Edinburgh Ltd.. Elphinstone Research Centre. Tranent. East Lothian. EH33 2NE
Charles River Laboratories, Edinburgh, United Kingdom
tier3 solutions GmbH
tier3 solutions GmbH, Kolberger Str. 61-63, 51381 Leverkusen, Germany
Bayer CropScience AG
Syngenta, Jealotts Hill International Research Centre, UK
Brixham Environmental Laboratory, Brixham, Devon, TQ5 8BA, UK
MITOX Consultants Science Park 408, 1098XH Amsterdam, The Netherlands
Eurofins Agrosciences Services EcoChem GmbH, Eutinger Str. 24, 75233 Niefern-Öschelbronn, Germany
Mambo-Tox Ltd., 2 Venture Road, Chilworth Science Park, Southampton SO16 7NP, United Kingdom
Biochem agrar GmbH, Gerichshain, Germany
“W. Neudorff GmbH KG”, An der Mühle 3, D- 31860 Emmertal
BioChem Agrar, Kupferstraβe 6, 04827 Gerichshain, Germany
Bayer CropScience AG, Monheim
BioChem agrar, Labor für biologische und chemische Analytik GmbH, Kupferstraße 6, 04827 Gerichshain, Germany
RIFCON GmbH, Hirschberg, Germany
Dr K Thomae GMBH, Chemisch-pharmazeutische Fabrik, D-7950 Biberach, Riss
Centre International de Toxicologie (C.I.T), Miserey, 27005 Evreux, France
Centre dElevage Lebeau, 78950 Gambais, France
CIBA-GEIGY Limited, Toxicology Services, Short-term Toxicology, 4332 Stein, Switzerland
Ciba-Geigy Ltd., CH-4002, Basel, Switzerland
Osage Catfish, Box 222, Missouri, USA
Mambo-Tox Ltd., 2 Venture Road, University Science Park, Southampton, SO16 7NP
Biologische Bundesanstalt (BBA), Berlin-Dahlem
“Bayer CropScience AG” Monheim
Zeneca Agrochemicals, Jealotts Hill, United Kingdom
Eurofins Agroscience Services Chem GmbH, Hamburg, Germany
Harlan Cytotest Cell Research GmbH (Harlan CCR), Germany
Smithers Viscient (ESG) Ltd, Harrogate, UK
Covance Laboratories Limited, Harrogate, UK
Central Toxicology Laboratory, Alderley Park, Macclesfield, Cheshire, UK
Biological Services Section, Alderley Park, Macclesfield, Cheshire, UK
Charles River
Harlan Cytotest Cell Research GmBH, Rossdorf, Germany
Syngenta Crop Protection, Inc., Greensboro, NC 27419, USA
Cambridge Environmental Assessments, Battlegate Road, Boxworth, Cambridgeshire
Central Toxicology Laboratory, Syngenta
Harlan Laboratories Ltd. Zelgliweg,445 Itingen/Switzerland
Tecsolve UK Ltd., Glendale Park, North Ascot, Berkshire
Harlan Laboratories Ltd, Zelgliweg 1, 4452 Itingen, Switzerland
Harlan Laboratories
Katz Biotech AG, Baruth, Germany
Mambo-Tox Ltd., 2 Venture Road, Chilworth Science Park, Southampton, SO16 7NP
BioChem agrar, 04827 Gerichshain, Germany
W. Neudorff, 31860 Emmerthal, Germany
W. Neudorff GmbH KG, An der Mühle 3, 31860 Emmerthal, Germany
BioChem agrar Labor für biologische und chemische Analytik GmbH, Kupferstraße 6 04827 Gerichshain, Germany
“Biologische Bundesanstalt (BBA)”, Berlin-Dahlem
BioChem agrar, Labor für biologische und chemische Analytik GmbH, Kupferstraβe 6, 04827 Gerichshain, Germany
Syngenta Crop Protection, Münchwilen, Switzerland
Ciba-Geigy Ltd., Basle, Switzerland
Ciba-Geigy Corporation , Greensboro, NC, USA
Ciba-Geigy Corp., Greensboro, NC, USA
Nauchi, Shiraimachi, Inba-Gun, Chiba, Japan
Animal Metabolism, Ciba-Geigy Ltd., Basle, Switzerland
Hazleton Wisconsin, Inc. Madison, Wisconsin USA
CiToxLAB Hungary Ltd, Szabadsagpuszta, Hungary
Hazleton Wisconsin, Inc. Madison, Wis- consin USA
Stillmeadow Inc., Sugar Land TX, USA
Ciba-Geigy Corporation, Summit, NJ, USA
Ciba-Geigy Corp., Environmental Health Center, Farmington, CT, USA
Ciba-Geigy Limited, Pharmaceutical Division, 4002 Basel / Switzerland
Ciba-Geigy Limited, Experimental Pathology, 4002 Basel/ Switzerland
Ciba-Geigy Limited, Experimental Pathol- ogy, 4002 Basel / Switzerland
Hazleton Wisconsin, Madison, WI, USA
Ciba-Geigy Toxicology Services, ShortTerm Toxicology, 4332 Stein/ Switzerland
Ciba-Geigy Limited, Experimental Pathology, 4002 Basel / Switzerland
Hazleton Biotechnologies Company, Kensington, Maryland, USA
Ciba-Geigy Limited, Genetic Toxicology, 4002 Basel / Switzerland
Hazleton Washington, Inc., Vienna, Virginia 22182, USA
Ciba-Geigy Limited, 4002 Basel / Switzerland
Hazleton Raltech, Inc., a Subsidiary of Hazleton Laboratories America, Inc., Madison, Wisconsin, USA
Experimental Pathology Laboratories, Research Triangle Park
Toxicology/Cell Biology, Novartis Crop Protection Inc., Basel, Switzerland
Toxigenics, Inc., Decatur, IL 62526, USA
Argus Research Laboratories, Inc., Perkasie, PA, USA
Argus Research Laboratories Inc., Horsham, Pennsylvania 19044, USA
Ciba-Geigy Ltd.,Stein, Switzerland
Ciba-Geigy Ltd., Genetic Toxicology, Basle, Switzerland
Novartis Crop Protection AG, Stein, CH
Safepharm Laboratories Ltd., Shadlow, United Kingdom
Sandoz Agro Ltd., Department of Toxicology CH-4132 Muttenz, Switzerland
Hazleton Washington, Inc. Vienna, Virginia, USA
CXR Biosciences. Laboratory
Ciba-Geigy Corp., Greensboro NC, USA
Ciba-Geigy Ltd., Basel, CH
Novartis Agro S.A., Aigues-Vives, F
Ciba-Geigy SA, Rueil-Malmaison, F
Novartis Agro S.A., Aigues-Vives, France
Osage Catfisheries Inc., Osage Beach, Missouri 65065, USA
Aquatic Biosystems Corvalis
EPA, Corvalis, OR
Wards Natural Science, ON
Chilliwack Hatchery
Sun Valley Trout Farm, Abbotsford BC
Chilliwack Hatchery, BC
P. Hohler, CH-4314 Zeiningen, Switzerland
University of Sheffield , UK
Wildlife International Ltd., Maryland, US
Ciba-Geigy Ltd., Basle, CH
Stillmeadow Inc., Sugar Land, United States
Hazleton Wisconsin, Inc
ToxigeneticsINc. Decatur, IL, US
EG&G Bionomics
Biospheric Inc., Rockville, USA
Bionomics Aquatic Tox. Lab., Wareham, USA
Springborn Laboratories Inc.
Syngenta Jealotts Hill International, Bracknell, Berkshire, United Kingdom
Wildlife International Ltd., Easton MD, USA
Springborn Smithers Laboratories, Wareham, USA
Springborn Life Sciences Inc
Eg&G Bionomics (Fl), Pensacola, USA
Harlan Laboratories Ltd., Itingen, Switzerland
Solvias AG, Basel, Switzerland
T.R. Wilbury Laboratories Inc., Massachusetts, USA
Ciba-Geigy Ltd., Basle, CH
Stillmeadow Inc., Sugar Land, TX, USA
Syngenta Crop Protection, Munchwilen, Switzerland
RCC - Biological Research Laboratories, Füllinsdorf, Switzerland
Covance Laboratories, Harrogate, United Kingdom
Battelle UK Ltd, Chelmsford, Essex, UK
Zeneca Agrochemicals, Jealotts Hill Research Station, Bracknell, Berkshire, UK
Xenobiotic Laboratories, Inc., Plainsboro, USA
Fraunhofer Institute, Schmallenberg, Germany
PTRL West, Hercules CA, USA
Eurofins Agroscience Services GmbH, Niefern-Öschel., Germany
ICI Agrochemicals, Bracknell, Berkshire, United Kingdom
Chemex International plc, Cambridge, United Kingdom
BASF, Limburgerhof, Germany
RIFCON, Leichlingen, Germany
Eurofins - GAB, Niefern Öschelbronn, Germany
River Thames, Maidenhead, Berkshire, UK
Beach N o . 24, Hayling Island, Hampshire, UK
Jealotts Hill International Research Centre, Bracknell, Berkshire, RG42 6EY, UK
Zeneca Agrochemical s, Jealotts Hill, United Kingdom
Zeneca Agrochemicals, Jealotts Hill, United Kingdom
Jealotts Hill Research Station. Syngenta Crop protection AG
Bayer CropScience, Monheim, Germany
Huntingdon Life Sciences Ltd., Huntingdon, United Kingdom
Eurofins Agroscience Services EcoChem GmbH, N- Osch., Germany
Eurofins Agroscience Services EcoChem GmbH, NOsch., Germany
Tier3 solutions GmbH, Germany
Syngenta Crop Protection AG
Jealotts Hill Research Centre. Syngenta Crop protection AG
RCC Umweltchemie GmbH & Co KG

View File

@ -0,0 +1,233 @@
Vulpes vulpes
african clawed frog
agalychnis callidryas
amphibian
amphibians
American bullfrog tadpole
american toad
anad platyrhynchos
Anas platyrhynchos
anuran
anurans
apodemus syl vaticus
avian
bird
birds
bluegill
bluegill sunfish
bobwhite
bobwhite quail
bullfrog
Bufo americanus
brachydanio rerio
canary
carassius carassius
carp
catfish
cattle
cattles
channel catfish
Chinook
chicken
Colinus virginianus
colinus virginianus
Common carp
coturnix japonica
Coturnix japonica
cow
cows
Crucian carp
cyprinodon variegatus
cyprinus carpio
dog
dogs
duck
ducks
fathead minnow
fish
fishes
fox
frog
frogs
fudulus heteroclitus
fundulus heteroclitus
galaxias maculatus
galaxias truttaceus
gasterosteus aculeatus
goat
goats
guinea
guinea pig
guinea pigs
Guppy
hamster
hamsters
hen
hens
Hyla versicolor
ictalurus melas
ictalurus punctatus
japanese quail
japonica
lebistes reticulatus
leiostomus xanthurus
leisostomus xanthurus
lepomis macrochirus
livestock
livestocks
mallard duck
mammal
mammals
Mammalian
mice
midwestern anurans
monkey
mouse
northern bobwhite
o. mykiss
Oncorhynchus mykiss
Oncorhynchus
O. mykiss
oryzias melastigma
oryzias melastigma larvae
p. promelas
pagrus major
pig
pigeon
pigeons
pimephales promelas
Pseudacris triseriata
poecilia reticulata
poultry
quail
rabbit
rabbits
rainbow trout
Rana limnocharis
rana
limnocharis
rana pipiens
rat
rats
reptile
reptiles
ricefish
ruminant
ruminants
sheepshead minnow
sheepshead minnows
spea multiplicata
Salmo gairdneri
salmon
spotted march frog
tadpoles
treefrog
toad
terrestrial vertrebrates
Limnodynastes tasmaniensis
trout
Vulpes vulpes
wistar
xenopus laevis
xenpous leavis
zebra fish
zebrafish
Salmo gairdneri
minnow
minnows
Pimephales promela
Cyprinodon variegatus
limnodynastes
Rana catesbeiana
R. catesbeiana
coho salmon
Oncorhynchus tshawytscha
O. tshawytscha
tshawytscha
catesbeiana
kisutch
Pseudacris triseriata
Pseudacris
triseriata
Wood pigeon
Columba palumbus
palumbus
Columbidae
shrew
shrews
bank vole
common vole
vole
voles
lagomorph
Wood mouse
Apodemus sylvaticus
A. sylvaticus
Apodemus flavicollis
Apodemus
mus musculus
Microtus arvalis
Microtus agrestis
Microtus
Arvicola terrestris
Sorex araneus
Myodes glareolus
yellow-necked mouse
house mouse
Oryctolagus cuniculus
marten
martes
white-toothed shrew
greater white-toothed shrew
Lepus europaeus
brown hare
European brown hare
European rabbit
O. cuniculus
Crocidura russula
Chinese Hamster
Rat
Rats
Dog
Chinese hamsters
Chinese hamster
Mouse
Guinea pig
Wistar rats
Rabbit
mammalian
Japanese quail
Microtus subterraneus
Lepomis macrochirus
P. promelas
Cyprinus carpio
Fish
Ictalurus punctatus
Carassius carassius
Lepomis macrochirus
Poecilia reticulata
Lebistes reticulatus
Lepomis macrochirus
Leiostomus xanthurus
Pimephales promelas
Lepomis macrochirus
Albino rat
Hen
Goat
Livestock
Guinea Pigs
Hamster
wood mouse
Rabbits
Mice
Rainbow trout
Canary
Serinus canaria
Guinea Pig
Cow
Pigs
Poultry
Guinea-pigs
White rabbits
Birds
Wood mice

View File

@ -0,0 +1,66 @@
package drools
import com.iqser.red.service.redaction.v1.server.redaction.model.Section
global Section section
rule "0: Highlight Indicators"
when
eval(section.getEntities().isEmpty()==false);
then
section.highlightAll("VERTEBRATE");
section.highlightAll("NO_REDACTION_INDICATOR");
end
rule "1: Redacted because Section contains Vertebrate"
when
eval(section.contains("VERTEBRATE")==true);
then
section.redact("NAME", 1, "Redacted because Section contains Vertebrate");
section.redact("ADDRESS", 1, "Redacted because Section contains Vertebrate");
end
rule "2: Not Redacted because Section contains no Vertebrate"
when
eval(section.contains("VERTEBRATE")==false);
then
section.redactNot("NAME", 2, "Not Redacted because Section contains no Vertebrate");
section.redactNot("ADDRESS", 2, "Not Redacted because Section contains no Vertebrate");
end
rule "3: Do not redact Names and Addresses if no redaction Indicator is contained"
when
eval(section.contains("VERTEBRATE")==true && section.contains("NO_REDACTION_INDICATOR")==true);
then
section.redactNot("NAME", 3, "Vertebrate was found, but also a no redaction indicator");
section.redactNot("ADDRESS", 3, "Vertebrate was found, but also a no redaction indicator");
end
rule "4: Redact contact information, if applicant is found"
when
eval(section.getText().toLowerCase().contains("applicant"));
then
section.redactLineAfter("Name:", "ADDRESS", 4, "Redacted because of Rule 4");
section.redactBetween("Address:", "Contact", "ADDRESS", 4, "Redacted because of Rule 4");
section.redactLineAfter("Contact point:", "ADDRESS", 4, "Redacted because of Rule 4");
section.redactLineAfter("Phone:", "ADDRESS", 4, "Redacted because of Rule 4");
section.redactLineAfter("Fax:", "ADDRESS", 4, "Redacted because of Rule 4");
section.redactLineAfter("E-mail:", "ADDRESS", 4, "Redacted because of Rule 4");
end
rule "5: Redact contact information, if 'Producer of the plant protection product' is found"
when
eval(section.getText().contains("Producer of the plant protection product"));
then
section.redactLineAfter("Name:", "ADDRESS", 5, "xxxx");
section.redactBetween("Address:", "Contact", "ADDRESS", 5, "xxxx");
section.redactBetween("Contact:", "Phone", "ADDRESS", 5, "xxxx");
section.redactLineAfter("Phone:", "ADDRESS", 5, "xxxx");
section.redactLineAfter("Fax:", "ADDRESS", 5, "xxxx");
section.redactLineAfter("E-mail:", "ADDRESS", 5, "xxxx");
end

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.server;
import org.junit.Test;
/**
*
*/
public class DummyTest {
@Test
public void dummy(){
System.out.println("Hello World");
}
}

View File

@ -0,0 +1,110 @@
package com.iqser.red.service.redaction.v1.server;
import static org.springframework.boot.test.context.SpringBootTest.WebEnvironment.DEFINED_PORT;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.commons.io.IOUtils;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
@Ignore
@RunWith(SpringRunner.class)
@SpringBootTest(webEnvironment = DEFINED_PORT)
public class RedactionIntegrationTest {
@Autowired
private RedactionController redactionController;
@Test
public void redactionTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
request.setFlatRedaction(false);
RedactionResult result = redactionController.redact(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Redacted.pdf")) {
fileOutputStream.write(result.getDocument());
}
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
System.out.println("numberOfPages: " + result.getNumberOfPages());
}
@Test
public void classificationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionResult result = redactionController.classify(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Classified.pdf")) {
fileOutputStream.write(result.getDocument());
}
}
@Test
public void sectionsTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionResult result = redactionController.sections(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Sections.pdf")) {
fileOutputStream.write(result.getDocument());
}
}
@Test
public void htmlTablesTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionResult result = redactionController.htmlTables(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
}
}
@Test
public void htmlTableRotationTest() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
RedactionRequest request = RedactionRequest.builder().document(IOUtils.toByteArray(pdfFileResource.getInputStream())).build();
RedactionResult result = redactionController.htmlTables(request);
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Tables.html")) {
fileOutputStream.write(result.getDocument());
}
}
}

View File

@ -0,0 +1,14 @@
ribbon:
ConnectTimeout: 600000
ReadTimeout: 600000
spring:
main:
allow-bean-definition-overriding: true
processing.kafkastreams: false
platform.multi-tenancy:
enabled: false

View File

@ -0,0 +1,3 @@
spring:
application:
name: pdf-redaction-service-v1

Some files were not shown because too many files have changed in this diff Show More