RED-5232: Code reformatting
This commit is contained in:
parent
69540bcd5e
commit
76fda2b573
@ -1,4 +1,5 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
@ -1,37 +1,37 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>8.1.3</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>8.1.3</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
</project>
|
||||
|
||||
@ -70,7 +70,12 @@ public class PlanSpec {
|
||||
|
||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
||||
|
||||
Permissions permission = new Permissions().userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
||||
Permissions permission = new Permissions().userPermissions("atlbamboo",
|
||||
PermissionType.EDIT,
|
||||
PermissionType.VIEW,
|
||||
PermissionType.ADMIN,
|
||||
PermissionType.CLONE,
|
||||
PermissionType.BUILD)
|
||||
.groupPermissions("development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.loggedInUserPermissions(PermissionType.VIEW)
|
||||
@ -89,33 +94,16 @@ public class PlanSpec {
|
||||
|
||||
return new Plan(project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)).description("Plan created from (enter repository url of your plan)")
|
||||
.variables(new Variable("maven_add_param", ""))
|
||||
.stages(new Stage("Default Stage").jobs(new Job("Default Job",
|
||||
new BambooKey("JOB1")).tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask().description("Checkout Default Repository")
|
||||
.cleanCheckout(true)
|
||||
.checkoutItems(
|
||||
new CheckoutItem()
|
||||
.defaultRepository()),
|
||||
new ScriptTask().description("Build")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
|
||||
.argument(SERVICE_NAME), createJUnitParserTask()
|
||||
.description("Resultparser")
|
||||
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new CleanWorkingDirectoryTask().description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask().description("Build").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh").argument(SERVICE_NAME),
|
||||
createJUnitParserTask().description("Resultparser")
|
||||
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
|
||||
.enabled(true),
|
||||
new InjectVariablesTask().description("Inject git Tag")
|
||||
.path("git.tag")
|
||||
.namespace("g")
|
||||
.scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask().description("${bamboo.g.gitTag}")
|
||||
.tagName("${bamboo.g.gitTag}")
|
||||
.defaultRepository())
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
|
||||
.enabled(true),
|
||||
new InjectVariablesTask().description("Inject git Tag").path("git.tag").namespace("g").scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask().description("${bamboo.g.gitTag}").tagName("${bamboo.g.gitTag}").defaultRepository())
|
||||
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RED / " + SERVICE_NAME)
|
||||
@ -130,29 +118,19 @@ public class PlanSpec {
|
||||
|
||||
return new Plan(project(), SERVICE_NAME + "-Night", new BambooKey(SERVICE_KEY + "NIGHT")).description("Long running nightly Plan for tests")
|
||||
.variables(new Variable("maven_add_param", "-Dtest-groups=rules-test"))
|
||||
.stages(new Stage("Default Stage").jobs(
|
||||
new Job("Default Job", new BambooKey("JOB1")).tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout Default Repository")
|
||||
.cleanCheckout(true)
|
||||
.checkoutItems(
|
||||
new CheckoutItem()
|
||||
.defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build")
|
||||
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new CleanWorkingDirectoryTask().description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask().description("Build")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
|
||||
.argument(SERVICE_NAME + " verify"), createJUnitParserTask().description("Resultparser")
|
||||
.argument(SERVICE_NAME + " verify"),
|
||||
createJUnitParserTask().description("Resultparser")
|
||||
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
|
||||
.enabled(true))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RED / " + SERVICE_NAME)
|
||||
.triggers(new ScheduledTrigger().scheduleOnceDaily(LocalTime.of(23, 00)))
|
||||
.planBranchManagement(new PlanBranchManagement().delete(new BranchCleanup().whenInactiveInRepositoryAfterDays(14)).notificationForCommitters());
|
||||
@ -163,12 +141,9 @@ public class PlanSpec {
|
||||
|
||||
return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
|
||||
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
|
||||
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"), new VcsCheckoutTask().description("Checkout Default Repository")
|
||||
.cleanCheckout(true)
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()), new ScriptTask().description("Sonar")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh")
|
||||
.argument(SERVICE_NAME))
|
||||
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
|
||||
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask().description("Sonar").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh").argument(SERVICE_NAME))
|
||||
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
|
||||
.dockerRunArguments("--net=host")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml")
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
package buildjob;
|
||||
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
@ -8,6 +7,7 @@ import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
||||
|
||||
public class PlanSpecTest {
|
||||
|
||||
@Test
|
||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
||||
|
||||
@ -20,4 +20,5 @@ public class PlanSpecTest {
|
||||
Plan secPlan = new PlanSpec().createSecBuild();
|
||||
EntityPropertiesBuilders.build(secPlan);
|
||||
}
|
||||
|
||||
}
|
||||
@ -6,7 +6,7 @@
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<artifactId>platform-docker-dependency</artifactId>
|
||||
<version>1.2.0</version>
|
||||
<relativePath />
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
<artifactId>platform-dependency</artifactId>
|
||||
<groupId>com.iqser.red</groupId>
|
||||
<version>1.10.0</version>
|
||||
<relativePath />
|
||||
<relativePath/>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
<plugin>
|
||||
<groupId>org.sonarsource.scanner.maven</groupId>
|
||||
<artifactId>sonar-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.owasp</groupId>
|
||||
<artifactId>dependency-check-maven</artifactId>
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -31,6 +32,5 @@ public class AnalyzeResult {
|
||||
|
||||
private ManualRedactions manualRedactions;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -2,6 +2,14 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum ArgumentType {
|
||||
|
||||
INTEGER, BOOLEAN, STRING, FILE_ATTRIBUTE, REGEX, TYPE, RULE_NUMBER, LEGAL_BASIS, REFERENCE_TYPE
|
||||
INTEGER,
|
||||
BOOLEAN,
|
||||
STRING,
|
||||
FILE_ATTRIBUTE,
|
||||
REGEX,
|
||||
TYPE,
|
||||
RULE_NUMBER,
|
||||
LEGAL_BASIS,
|
||||
REFERENCE_TYPE
|
||||
|
||||
}
|
||||
|
||||
@ -16,4 +16,5 @@ public class Change {
|
||||
private int analysisNumber;
|
||||
private ChangeType type;
|
||||
private OffsetDateTime dateTime;
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum ChangeType {
|
||||
ADDED, REMOVED, CHANGED
|
||||
ADDED,
|
||||
REMOVED,
|
||||
CHANGED
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum Engine {
|
||||
DICTIONARY, NER, RULE
|
||||
DICTIONARY,
|
||||
NER,
|
||||
RULE
|
||||
}
|
||||
|
||||
@ -18,4 +18,5 @@ public class ImportedRedaction {
|
||||
|
||||
@Builder.Default
|
||||
private List<Rectangle> positions = new ArrayList<>();
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -19,4 +20,5 @@ public class ImportedRedactions {
|
||||
|
||||
@Builder.Default
|
||||
private Map<Integer, List<ImportedRedaction>> importedRedactions = new HashMap<>();
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.BaseAnnotation;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -24,7 +25,9 @@ public class ManualChange {
|
||||
private String userId;
|
||||
private Map<String, String> propertyChanges = new HashMap<>();
|
||||
|
||||
|
||||
public static ManualChange from(BaseAnnotation baseAnnotation) {
|
||||
|
||||
ManualChange manualChange = new ManualChange();
|
||||
manualChange.annotationStatus = baseAnnotation.getStatus();
|
||||
manualChange.processedDate = baseAnnotation.getProcessedDate();
|
||||
@ -33,16 +36,22 @@ public class ManualChange {
|
||||
return manualChange;
|
||||
}
|
||||
|
||||
|
||||
public boolean isProcessed() {
|
||||
|
||||
return processedDate != null;
|
||||
}
|
||||
|
||||
|
||||
public ManualChange withManualRedactionType(ManualRedactionType manualRedactionType) {
|
||||
|
||||
this.manualRedactionType = manualRedactionType;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public ManualChange withChange(String property, String value) {
|
||||
|
||||
this.propertyChanges.put(property, value);
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -2,6 +2,9 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
public enum MessageType {
|
||||
|
||||
ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT
|
||||
ANALYSE,
|
||||
REANALYSE,
|
||||
STRUCTURE_ANALYSE,
|
||||
SURROUNDING_TEXT
|
||||
|
||||
}
|
||||
|
||||
@ -12,4 +12,5 @@ import lombok.NoArgsConstructor;
|
||||
public class ReanalyzeResult {
|
||||
|
||||
private RedactionLog redactionLog;
|
||||
|
||||
}
|
||||
|
||||
@ -14,4 +14,5 @@ public class Rectangle {
|
||||
private float height;
|
||||
|
||||
private int page;
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -8,14 +9,12 @@ import lombok.NoArgsConstructor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class RedactionLog {
|
||||
|
||||
|
||||
/**
|
||||
* Version 0 Redaction Logs have manual redactions merged inside them
|
||||
* Version 1 Redaction Logs only contain system ( rule/dictionary ) redactions. Manual Redactions are merged in at runtime.
|
||||
@ -35,5 +34,4 @@ public class RedactionLog {
|
||||
private long rulesVersion = -1;
|
||||
private long legalBasisVersion = -1;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
|
||||
|
||||
import lombok.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@ -35,7 +35,6 @@ public class RedactionLogEntry {
|
||||
private List<Rectangle> positions = new ArrayList<>();
|
||||
private int sectionNumber;
|
||||
|
||||
|
||||
private String textBefore;
|
||||
private String textAfter;
|
||||
|
||||
@ -70,21 +69,27 @@ public class RedactionLogEntry {
|
||||
@Builder.Default
|
||||
private Set<String> importedRedactionIntersections = new HashSet<>();
|
||||
|
||||
|
||||
public boolean lastChangeIsRemoved() {
|
||||
|
||||
return last(changes).map(c -> c.getType() == ChangeType.REMOVED).orElse(false);
|
||||
}
|
||||
|
||||
|
||||
public boolean isLocalManualRedaction() {
|
||||
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.ADD_LOCALLY &&
|
||||
mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
|
||||
|
||||
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.ADD_LOCALLY && mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
|
||||
}
|
||||
|
||||
|
||||
public boolean isManuallyRemoved() {
|
||||
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.REMOVE_LOCALLY &&
|
||||
mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
|
||||
|
||||
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.REMOVE_LOCALLY && mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
|
||||
}
|
||||
|
||||
|
||||
private <T> Optional<T> last(List<T> list) {
|
||||
|
||||
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(list.size() - 1));
|
||||
}
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.model;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -29,4 +30,5 @@ public class RedactionRequest {
|
||||
private List<Type> types;
|
||||
|
||||
private boolean includeFalsePositives;
|
||||
|
||||
}
|
||||
|
||||
@ -15,13 +15,20 @@ public class SectionArea {
|
||||
private int page;
|
||||
private String header;
|
||||
|
||||
|
||||
public boolean contains(Rectangle other) {
|
||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
|
||||
|
||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft()
|
||||
.getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft()
|
||||
.getY() + other.getHeight();
|
||||
}
|
||||
|
||||
|
||||
// TODO we should only use one rectangle class.
|
||||
public boolean contains(com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle other) {
|
||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeftX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeftX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeftY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeftY() + other.getHeight();
|
||||
|
||||
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeftX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeftX() + other.getWidth() && this.getTopLeft()
|
||||
.getY() <= other.getTopLeftY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeftY() + other.getHeight();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -28,4 +29,5 @@ public class SectionGrid {
|
||||
private List<SectionArea> sectionAreas;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Manual
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionLog;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
|
||||
import com.iqser.red.service.redaction.v1.model.RedactionResult;
|
||||
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
@ -32,8 +33,6 @@ public interface RedactionResource {
|
||||
|
||||
|
||||
@PostMapping(value = "/manual/surrounding-text/{dossierId}/{fileId}", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
|
||||
ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId,
|
||||
@PathVariable("fileId") String fileId,
|
||||
@RequestBody ManualRedactions manualRedactions);
|
||||
ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId, @PathVariable("fileId") String fileId, @RequestBody ManualRedactions manualRedactions);
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.resources;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server;
|
||||
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
|
||||
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
@ -22,14 +23,16 @@ import io.micrometer.core.instrument.MeterRegistry;
|
||||
public class Application {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
System.setProperty("org.apache.pdfbox.rendering.UsePureJavaCMYKConversion", "true");
|
||||
SpringApplication.run(Application.class, args);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public TimedAspect timedAspect(MeterRegistry registry) {
|
||||
|
||||
return new TimedAspect(registry);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -14,7 +14,9 @@ public class FloatFrequencyCounter {
|
||||
@Getter
|
||||
Map<Float, Integer> countPerValue = new HashMap<>();
|
||||
|
||||
|
||||
public void add(float value) {
|
||||
|
||||
if (!countPerValue.containsKey(value)) {
|
||||
countPerValue.put(value, 1);
|
||||
} else {
|
||||
@ -22,7 +24,9 @@ public class FloatFrequencyCounter {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void addAll(Map<Float, Integer> otherCounter) {
|
||||
|
||||
for (Map.Entry<Float, Integer> entry : otherCounter.entrySet()) {
|
||||
if (countPerValue.containsKey(entry.getKey())) {
|
||||
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
|
||||
@ -32,7 +36,9 @@ public class FloatFrequencyCounter {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Float getMostPopular() {
|
||||
|
||||
Map.Entry<Float, Integer> mostPopular = null;
|
||||
for (Map.Entry<Float, Integer> entry : countPerValue.entrySet()) {
|
||||
if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
|
||||
@ -44,6 +50,7 @@ public class FloatFrequencyCounter {
|
||||
|
||||
|
||||
public List<Float> getHighterThanMostPopular() {
|
||||
|
||||
Float mostPopular = getMostPopular();
|
||||
List<Float> higher = new ArrayList<>();
|
||||
for (Float value : countPerValue.keySet()) {
|
||||
@ -57,6 +64,7 @@ public class FloatFrequencyCounter {
|
||||
|
||||
|
||||
public Float getHighest() {
|
||||
|
||||
Float highest = null;
|
||||
for (Float value : countPerValue.keySet()) {
|
||||
if (highest == null || value > highest) {
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@ -14,6 +15,7 @@ public class Footer {
|
||||
|
||||
private List<TextBlock> textBlocks;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@ -14,6 +15,7 @@ public class Header {
|
||||
|
||||
private List<TextBlock> textBlocks;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
@ -2,5 +2,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
public enum Orientation {
|
||||
|
||||
NONE, LEFT, RIGHT
|
||||
NONE,
|
||||
LEFT,
|
||||
RIGHT
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@ -4,6 +4,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ import com.iqser.red.service.redaction.v1.model.SectionArea;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -40,10 +41,12 @@ public class SectionText {
|
||||
|
||||
|
||||
public void setTabularData(Map<String, CellValue> tabularData) {
|
||||
|
||||
tabularData.remove(null);
|
||||
this.tabularData = tabularData;
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -45,12 +46,12 @@ public class TextBlock extends AbstractTextContainer {
|
||||
@JsonIgnore
|
||||
private float highestFontSize;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
private String classification;
|
||||
|
||||
|
||||
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
|
||||
|
||||
this.minX = minX;
|
||||
this.maxX = maxX;
|
||||
this.minY = minY;
|
||||
@ -59,19 +60,25 @@ public class TextBlock extends AbstractTextContainer {
|
||||
this.rotation = rotation;
|
||||
}
|
||||
|
||||
|
||||
public TextBlock union(TextPositionSequence r) {
|
||||
|
||||
TextBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
|
||||
public TextBlock union(TextBlock r) {
|
||||
|
||||
TextBlock union = this.copy();
|
||||
union.add(r);
|
||||
return union;
|
||||
}
|
||||
|
||||
|
||||
public void add(TextBlock r) {
|
||||
|
||||
if (r.getMinX() < minX) {
|
||||
minX = r.getMinX();
|
||||
}
|
||||
@ -87,7 +94,9 @@ public class TextBlock extends AbstractTextContainer {
|
||||
sequences.addAll(r.getSequences());
|
||||
}
|
||||
|
||||
|
||||
public void add(TextPositionSequence r) {
|
||||
|
||||
if (r.getX1() < minX) {
|
||||
minX = r.getX1();
|
||||
}
|
||||
@ -102,15 +111,21 @@ public class TextBlock extends AbstractTextContainer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public TextBlock copy() {
|
||||
|
||||
return new TextBlock(minX, maxX, minY, maxY, sequences, rotation);
|
||||
}
|
||||
|
||||
|
||||
public void resize(float x1, float y1, float width, float height) {
|
||||
|
||||
set(x1, y1, x1 + width, y1 + height);
|
||||
}
|
||||
|
||||
|
||||
public void set(float x1, float y1, float x2, float y2) {
|
||||
|
||||
this.minX = Math.min(x1, x2);
|
||||
this.maxX = Math.max(x1, x2);
|
||||
this.minY = Math.min(y1, y2);
|
||||
@ -136,6 +151,7 @@ public class TextBlock extends AbstractTextContainer {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
|
||||
import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@ -14,6 +15,7 @@ public class UnclassifiedText {
|
||||
|
||||
private List<TextBlock> textBlocks;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public SearchableText getSearchableText() {
|
||||
|
||||
@ -29,8 +29,7 @@ public class BlockificationService {
|
||||
static final float THRESHOLD = 1f;
|
||||
|
||||
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines,
|
||||
List<Ruling> verticalRulingLines) {
|
||||
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
sortRotatedSequences(textPositions);
|
||||
|
||||
@ -48,12 +47,17 @@ public class BlockificationService {
|
||||
boolean startFromTop = word.getY1() > maxY + word.getHeight();
|
||||
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
|
||||
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
|
||||
boolean splittedByRuling =
|
||||
isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) ||
|
||||
isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
|
||||
boolean splittedByRuling = isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || isSplittedByRuling(minX,
|
||||
minY,
|
||||
word.getX1(),
|
||||
word.getY2(),
|
||||
horizontalRulingLines)
|
||||
|
||||
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|
||||
|| isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|
||||
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || isSplittedByRuling(minX,
|
||||
minY,
|
||||
word.getX1(),
|
||||
word.getY2(),
|
||||
verticalRulingLines);
|
||||
|
||||
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
|
||||
|
||||
@ -115,8 +119,7 @@ public class BlockificationService {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if (previousLeft != null && block.getOrientation().equals(Orientation.LEFT)) {
|
||||
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft
|
||||
.getMinY()) {
|
||||
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()) {
|
||||
previousLeft.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
@ -124,8 +127,7 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
if (previousRight != null && block.getOrientation().equals(Orientation.RIGHT)) {
|
||||
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight
|
||||
.getMinY()) {
|
||||
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()) {
|
||||
previousRight.add(block);
|
||||
itty.remove();
|
||||
continue;
|
||||
@ -144,10 +146,8 @@ public class BlockificationService {
|
||||
while (itty.hasNext()) {
|
||||
TextBlock block = (TextBlock) itty.next();
|
||||
|
||||
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
|
||||
.equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY()) || previous != null && previous
|
||||
.getOrientation()
|
||||
.equals(Orientation.LEFT) && block.getOrientation()
|
||||
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(),
|
||||
previous.getMaxY()) || previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
|
||||
.equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) {
|
||||
previous.add(block);
|
||||
itty.remove();
|
||||
@ -186,12 +186,10 @@ public class BlockificationService {
|
||||
styleFrequencyCounter.add(wordBlock.getFontStyle());
|
||||
|
||||
if (textBlock == null) {
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock
|
||||
.getRotation());
|
||||
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
|
||||
} else {
|
||||
TextBlock spatialEntity = textBlock.union(wordBlock);
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
|
||||
.getHeight());
|
||||
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,19 +202,14 @@ public class BlockificationService {
|
||||
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
|
||||
}
|
||||
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences()
|
||||
.stream()
|
||||
.map(t -> round(t.getY1(), 3))
|
||||
.collect(toSet())
|
||||
.size() == 1) {
|
||||
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getY1(), 3)).collect(toSet()).size() == 1) {
|
||||
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getX1));
|
||||
}
|
||||
return textBlock;
|
||||
}
|
||||
|
||||
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1,
|
||||
List<Ruling> rulingLines) {
|
||||
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
|
||||
|
||||
for (Ruling ruling : rulingLines) {
|
||||
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
|
||||
@ -227,8 +220,7 @@ public class BlockificationService {
|
||||
}
|
||||
|
||||
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter,
|
||||
boolean landscape) {
|
||||
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
|
||||
|
||||
float minX = 10000;
|
||||
float maxX = -100;
|
||||
|
||||
@ -6,8 +6,10 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
@ -48,35 +50,34 @@ public class ClassificationService {
|
||||
}
|
||||
|
||||
|
||||
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document,
|
||||
List<Float> headlineFontSizes) {
|
||||
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
|
||||
|
||||
if (document.getFontSizeCounter().getMostPopular() == null) {
|
||||
textBlock.setClassification("Other");
|
||||
return;
|
||||
}
|
||||
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.isRotated()) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
|
||||
.getMostPopular())) {
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Header");
|
||||
|
||||
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock) && (document.getFontSizeCounter()
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
|
||||
.getMostPopular())) {
|
||||
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
|
||||
textBlock.setClassification("Footer");
|
||||
} else if (page.getPageNumber() == 1 && (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame, textBlock) && PositionUtils
|
||||
.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter()
|
||||
.getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter()
|
||||
.getMostPopular() || page.getTextBlocks().size() == 1)) {
|
||||
} else if (page.getPageNumber() == 1 && (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame,
|
||||
textBlock) && PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
|
||||
document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
|
||||
.size() == 1)) {
|
||||
if (!Pattern.matches("[0-9]+", textBlock.toString())) {
|
||||
textBlock.setClassification("Title");
|
||||
}
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document
|
||||
.getFontSizeCounter()
|
||||
.getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle()
|
||||
.equals("bold") || !document.getFontStyleCounter().getCountPerValue().containsKey("bold") && textBlock.getMostPopularWordFontSize() > document
|
||||
.getFontSizeCounter()
|
||||
.getMostPopular() + 1) && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
|
||||
.getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
|
||||
.getCountPerValue()
|
||||
.containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
|
||||
.get(0)
|
||||
.getTextPositions()
|
||||
.get(0)
|
||||
.getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
|
||||
|
||||
for (int i = 1; i <= headlineFontSizes.size(); i++) {
|
||||
if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
|
||||
@ -84,28 +85,25 @@ public class ClassificationService {
|
||||
document.setHeadlines(true);
|
||||
}
|
||||
}
|
||||
} else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText()
|
||||
.startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle()
|
||||
.equals("bold") && !document.getFontStyleCounter()
|
||||
} else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame,
|
||||
textBlock) && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter()
|
||||
.getMostPopular()
|
||||
.equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
|
||||
.equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences()
|
||||
.get(0)
|
||||
.getTextPositions()
|
||||
.get(0)
|
||||
.getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
|
||||
textBlock.setClassification("H " + (headlineFontSizes.size() + 1));
|
||||
document.setHeadlines(true);
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document
|
||||
.getFontSizeCounter()
|
||||
.getMostPopular() && textBlock.getMostPopularWordStyle()
|
||||
.equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
|
||||
.getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
|
||||
textBlock.setClassification("TextBlock Bold");
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont()
|
||||
.equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle()
|
||||
.equals(document.getFontStyleCounter()
|
||||
.getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
|
||||
.getMostPopular()) {
|
||||
.equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
|
||||
textBlock.setClassification("TextBlock");
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document
|
||||
.getFontSizeCounter()
|
||||
.getMostPopular() && textBlock.getMostPopularWordStyle()
|
||||
.equals("italic") && !document.getFontStyleCounter()
|
||||
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
|
||||
.getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter()
|
||||
.getMostPopular()
|
||||
.equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
|
||||
textBlock.setClassification("TextBlock Italic");
|
||||
|
||||
@ -2,13 +2,13 @@ package com.iqser.red.service.redaction.v1.server.classification.utils;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
@SuppressWarnings("all")
|
||||
public class PositionUtils {
|
||||
|
||||
|
||||
public boolean isWithinBodyTextFrame(Rectangle btf, TextBlock textBlock) {
|
||||
|
||||
//TODO Currently this is not working for rotated pages.
|
||||
@ -19,10 +19,7 @@ public class PositionUtils {
|
||||
|
||||
double threshold = textBlock.getMostPopularWordHeight() * 3;
|
||||
|
||||
if (textBlock.getMinX() + threshold > btf.getX() &&
|
||||
textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() &&
|
||||
textBlock.getMinY() + threshold > btf.getY() &&
|
||||
textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
|
||||
if (textBlock.getMinX() + threshold > btf.getX() && textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() && textBlock.getMinY() + threshold > btf.getY() && textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -84,11 +81,14 @@ public class PositionUtils {
|
||||
|
||||
|
||||
public float getHeightDifferenceBetweenChunkWordAndDocumentWord(TextBlock textBlock, Float documentMostPopularWordHeight) {
|
||||
|
||||
return textBlock.getMostPopularWordHeight() - documentMostPopularWordHeight;
|
||||
}
|
||||
|
||||
|
||||
public Float getApproxLineCount(TextBlock textBlock) {
|
||||
|
||||
return textBlock.getHeight() / textBlock.getMostPopularWordHeight();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.DictionaryReso
|
||||
|
||||
@FeignClient(name = "DictionaryResource", url = "${persistence-service.url}")
|
||||
public interface DictionaryClient extends DictionaryResource {
|
||||
|
||||
}
|
||||
@ -1,10 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client;
|
||||
|
||||
|
||||
import org.springframework.cloud.openfeign.FeignClient;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource;
|
||||
|
||||
@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}")
|
||||
public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource {
|
||||
|
||||
}
|
||||
|
||||
@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.LegalBasisMapp
|
||||
|
||||
@FeignClient(name = "LegalBasisMappingResource", url = "${persistence-service.url}")
|
||||
public interface LegalBasisClient extends LegalBasisMappingResource {
|
||||
|
||||
}
|
||||
|
||||
@ -32,8 +32,7 @@ public class MockMultipartFile implements MultipartFile {
|
||||
}
|
||||
|
||||
|
||||
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
|
||||
@Nullable byte[] content) {
|
||||
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, @Nullable byte[] content) {
|
||||
|
||||
Assert.hasLength(name, "Name must not be empty");
|
||||
this.name = name;
|
||||
@ -43,8 +42,7 @@ public class MockMultipartFile implements MultipartFile {
|
||||
}
|
||||
|
||||
|
||||
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
|
||||
InputStream contentStream) throws IOException {
|
||||
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, InputStream contentStream) throws IOException {
|
||||
|
||||
this(name, originalFilename, contentType, FileCopyUtils.copyToByteArray(contentStream));
|
||||
}
|
||||
|
||||
@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.RulesResource;
|
||||
|
||||
@FeignClient(name = "RulesResource", url = "${persistence-service.url}")
|
||||
public interface RulesClient extends RulesResource {
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.client.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
@ -13,9 +14,9 @@ import lombok.NoArgsConstructor;
|
||||
@NoArgsConstructor
|
||||
public class EntityRecogintionEntity {
|
||||
|
||||
private String value;
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private String type;
|
||||
private String value;
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private String type;
|
||||
|
||||
}
|
||||
|
||||
@ -13,6 +13,6 @@ import lombok.NoArgsConstructor;
|
||||
@NoArgsConstructor
|
||||
public class EntityRecognitionRequest {
|
||||
|
||||
private List<EntityRecognitionSection> data;
|
||||
private List<EntityRecognitionSection> data;
|
||||
|
||||
}
|
||||
|
||||
@ -17,4 +17,5 @@ public class EntityRecognitionResult {
|
||||
|
||||
@Builder.Default
|
||||
private Map<Integer, List<EntityRecogintionEntity>> entities = new HashMap<>();
|
||||
|
||||
}
|
||||
|
||||
@ -13,4 +13,5 @@ public class EntityRecognitionSection {
|
||||
|
||||
private int sectionNumber;
|
||||
private String text;
|
||||
|
||||
}
|
||||
|
||||
@ -3,7 +3,9 @@ package com.iqser.red.service.redaction.v1.server.controller;
|
||||
import com.iqser.red.commons.spring.ErrorMessage;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
|
||||
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.ResponseBody;
|
||||
@ -18,10 +20,12 @@ public class ControllerAdvice {
|
||||
|
||||
/* error handling */
|
||||
|
||||
|
||||
@ResponseBody
|
||||
@ResponseStatus(value = HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
@ExceptionHandler(value = NullPointerException.class)
|
||||
public ErrorMessage handleContentNotFoundException(NullPointerException e) {
|
||||
|
||||
if (e != null) {
|
||||
log.error(e.getMessage(), e);
|
||||
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
|
||||
@ -30,17 +34,21 @@ public class ControllerAdvice {
|
||||
return new ErrorMessage(OffsetDateTime.now(), "Nullpointer exception");
|
||||
}
|
||||
|
||||
|
||||
@ResponseBody
|
||||
@ResponseStatus(value = HttpStatus.BAD_REQUEST)
|
||||
@ExceptionHandler(value = RulesValidationException.class)
|
||||
public ErrorMessage handleRulesValidationException(RulesValidationException e) {
|
||||
|
||||
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
@ResponseBody
|
||||
@ResponseStatus(value = HttpStatus.NOT_FOUND)
|
||||
@ExceptionHandler(value = NotFoundException.class)
|
||||
public ErrorMessage handleFileNotFoundException(NotFoundException e) {
|
||||
|
||||
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
@ -46,11 +46,15 @@ public class RedactionController implements RedactionResource {
|
||||
@Override
|
||||
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
|
||||
redactionRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
|
||||
redactionRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -72,11 +76,15 @@ public class RedactionController implements RedactionResource {
|
||||
@Override
|
||||
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
|
||||
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
|
||||
redactionRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
try {
|
||||
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
|
||||
redactionRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
|
||||
pdDocument.setAllSecurityToBeRemoved(true);
|
||||
|
||||
@ -100,7 +108,9 @@ public class RedactionController implements RedactionResource {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
|
||||
redactionRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
|
||||
} catch (Exception e) {
|
||||
throw new RedactionException(e);
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
package com.iqser.red.service.redaction.v1.server.controller;
|
||||
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import com.iqser.red.service.redaction.v1.resources.RuleBuilderResource;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.rulebuilder.RuleBuilderModelService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
@RestController
|
||||
@ -13,8 +14,10 @@ public class RuleBuilderController implements RuleBuilderResource {
|
||||
|
||||
private final RuleBuilderModelService ruleBuilderModelService;
|
||||
|
||||
|
||||
@Override
|
||||
public RuleBuilderModel getRuleBuilderModel() {
|
||||
|
||||
return ruleBuilderModelService.getRuleBuilderModel();
|
||||
}
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.exception;
|
||||
public class NotFoundException extends RuntimeException {
|
||||
|
||||
public NotFoundException(String message) {
|
||||
|
||||
super(message);
|
||||
}
|
||||
|
||||
|
||||
@ -3,10 +3,13 @@ package com.iqser.red.service.redaction.v1.server.exception;
|
||||
public class RedactionException extends RuntimeException {
|
||||
|
||||
public RedactionException(Throwable cause) {
|
||||
|
||||
super("Could not parse document", cause);
|
||||
}
|
||||
|
||||
|
||||
public RedactionException() {
|
||||
|
||||
super("Could not parse document");
|
||||
}
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.exception;
|
||||
public class RulesValidationException extends RuntimeException {
|
||||
|
||||
public RulesValidationException(String message, Throwable t) {
|
||||
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
|
||||
@ -69,17 +69,17 @@ import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
|
||||
|
||||
/**
|
||||
* LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
|
||||
*
|
||||
* <p>
|
||||
* This class exists only so that we don't break the code of users who have their own subclasses of
|
||||
* PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine
|
||||
* with a heuristic implementation which is backwards compatible.
|
||||
*
|
||||
* <p>
|
||||
* DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
|
||||
* THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD.
|
||||
*/
|
||||
@SuppressWarnings({"PMD", "checkstyle:all"})
|
||||
class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
{
|
||||
class LegacyPDFStreamEngine extends PDFStreamEngine {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class);
|
||||
|
||||
private int pageRotation;
|
||||
@ -88,11 +88,12 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
private final GlyphList glyphList;
|
||||
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
|
||||
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
LegacyPDFStreamEngine() throws IOException
|
||||
{
|
||||
LegacyPDFStreamEngine() throws IOException {
|
||||
|
||||
addOperator(new BeginText());
|
||||
addOperator(new Concatenate());
|
||||
addOperator(new DrawObject()); // special text version
|
||||
@ -122,6 +123,7 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This will initialize and process the contents of the stream.
|
||||
*
|
||||
@ -129,33 +131,27 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
* @throws java.io.IOException if there is an error accessing the stream.
|
||||
*/
|
||||
@Override
|
||||
public void processPage(PDPage page) throws IOException
|
||||
{
|
||||
public void processPage(PDPage page) throws IOException {
|
||||
|
||||
this.pageRotation = page.getRotation();
|
||||
this.pageSize = page.getCropBox();
|
||||
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0)
|
||||
{
|
||||
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
|
||||
translateMatrix = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// translation matrix for cropbox
|
||||
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
|
||||
}
|
||||
super.processPage(page);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called when a glyph is to be processed. The heuristic calculations here were originally
|
||||
* written by Ben Litchfield for PDFStreamEngine.
|
||||
*/
|
||||
@Override
|
||||
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,
|
||||
String unicode,
|
||||
Vector displacement)
|
||||
throws IOException
|
||||
{
|
||||
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException {
|
||||
//
|
||||
// legacy calculations which were previously in PDFStreamEngine
|
||||
//
|
||||
@ -173,25 +169,19 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
// the sorting algorithm is based on the width of the character. As the displacement
|
||||
// for vertical characters doesn't provide any suitable value for it, we have to
|
||||
// calculate our own
|
||||
if (font.isVertical())
|
||||
{
|
||||
if (font.isVertical()) {
|
||||
displacementX = font.getWidth(code) / 1000;
|
||||
// there may be an additional scaling factor for true type fonts
|
||||
TrueTypeFont ttf = null;
|
||||
if (font instanceof PDTrueTypeFont)
|
||||
{
|
||||
ttf = ((PDTrueTypeFont)font).getTrueTypeFont();
|
||||
}
|
||||
else if (font instanceof PDType0Font)
|
||||
{
|
||||
PDCIDFont cidFont = ((PDType0Font)font).getDescendantFont();
|
||||
if (cidFont instanceof PDCIDFontType2)
|
||||
{
|
||||
ttf = ((PDCIDFontType2)cidFont).getTrueTypeFont();
|
||||
if (font instanceof PDTrueTypeFont) {
|
||||
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
|
||||
} else if (font instanceof PDType0Font) {
|
||||
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
|
||||
if (cidFont instanceof PDCIDFontType2) {
|
||||
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
|
||||
}
|
||||
}
|
||||
if (ttf != null && ttf.getUnitsPerEm() != 1000)
|
||||
{
|
||||
if (ttf != null && ttf.getUnitsPerEm() != 1000) {
|
||||
displacementX *= 1000f / ttf.getUnitsPerEm();
|
||||
}
|
||||
}
|
||||
@ -219,8 +209,7 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
// (modified) width and height calculations
|
||||
float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
|
||||
Float fontHeight = fontHeightMap.get(font.getCOSObject());
|
||||
if (fontHeight == null)
|
||||
{
|
||||
if (fontHeight == null) {
|
||||
fontHeight = computeFontHeight(font);
|
||||
fontHeightMap.put(font.getCOSObject(), fontHeight);
|
||||
}
|
||||
@ -237,30 +226,24 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
// saved).
|
||||
|
||||
float glyphSpaceToTextSpaceFactor = 1 / 1000f;
|
||||
if (font instanceof PDType3Font)
|
||||
{
|
||||
if (font instanceof PDType3Font) {
|
||||
glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
|
||||
}
|
||||
|
||||
float spaceWidthText = 0;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// to avoid crash as described in PDFBOX-614, see what the space displacement should be
|
||||
spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
|
||||
}
|
||||
catch (Throwable exception)
|
||||
{
|
||||
} catch (Throwable exception) {
|
||||
LOG.warn(exception, exception);
|
||||
}
|
||||
|
||||
if (spaceWidthText == 0)
|
||||
{
|
||||
if (spaceWidthText == 0) {
|
||||
spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
|
||||
// the average space width appears to be higher than necessary so make it smaller
|
||||
spaceWidthText *= .80f;
|
||||
}
|
||||
if (spaceWidthText == 0)
|
||||
{
|
||||
if (spaceWidthText == 0) {
|
||||
spaceWidthText = 1.0f; // if could not find font, use a generic value
|
||||
}
|
||||
|
||||
@ -273,15 +256,11 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
// when there is no Unicode mapping available, Acrobat simply coerces the character code
|
||||
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
|
||||
// this, which is why we leave it until this point in PDFTextStreamEngine.
|
||||
if (unicodeMapping == null)
|
||||
{
|
||||
if (font instanceof PDSimpleFont)
|
||||
{
|
||||
if (unicodeMapping == null) {
|
||||
if (font instanceof PDSimpleFont) {
|
||||
char c = (char) code;
|
||||
unicodeMapping = new String(new char[] { c });
|
||||
}
|
||||
else
|
||||
{
|
||||
unicodeMapping = new String(new char[]{c});
|
||||
} else {
|
||||
// Acrobat doesn't seem to coerce composite font's character codes, instead it
|
||||
// skips them. See the "allah2.pdf" TestTextStripper file.
|
||||
return;
|
||||
@ -290,100 +269,118 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
|
||||
// adjust for cropbox if needed
|
||||
Matrix translatedTextRenderingMatrix;
|
||||
if (translateMatrix == null)
|
||||
{
|
||||
if (translateMatrix == null) {
|
||||
translatedTextRenderingMatrix = textRenderingMatrix;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
|
||||
nextX -= pageSize.getLowerLeftX();
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if(unicodeMapping.length() == 2){
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||
Math.abs(dyDisplay), dxDisplay,
|
||||
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(0)), new int[] { code }, font,
|
||||
if (unicodeMapping.length() == 2) {
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(0)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||
Math.abs(dyDisplay), dxDisplay,
|
||||
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(1)), new int[] { code }, font,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
Character.toString(unicodeMapping.charAt(1)),
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
} else {
|
||||
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), dxDisplay, Math.abs(spaceWidthDisplay), unicodeMapping, new int[]{code}, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation,
|
||||
pageSize.getWidth(),
|
||||
pageSize.getHeight(),
|
||||
translatedTextRenderingMatrix,
|
||||
nextX,
|
||||
nextY,
|
||||
Math.abs(dyDisplay),
|
||||
dxDisplay,
|
||||
Math.abs(spaceWidthDisplay),
|
||||
unicodeMapping,
|
||||
new int[]{code},
|
||||
font,
|
||||
fontSize,
|
||||
(int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the font height. Override this if you want to use own calculations.
|
||||
*
|
||||
*
|
||||
* @param font the font.
|
||||
* @return the font height.
|
||||
*
|
||||
* @throws IOException if there is an error while getting the font bounding box.
|
||||
*/
|
||||
protected float computeFontHeight(PDFont font) throws IOException
|
||||
{
|
||||
protected float computeFontHeight(PDFont font) throws IOException {
|
||||
|
||||
BoundingBox bbox = font.getBoundingBox();
|
||||
if (bbox.getLowerLeftY() < Short.MIN_VALUE)
|
||||
{
|
||||
if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
|
||||
// PDFBOX-2158 and PDFBOX-3130
|
||||
// files by Salmat eSolutions / ClibPDF Library
|
||||
bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536));
|
||||
bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
|
||||
}
|
||||
// 1/2 the bbox is used as the height todo: why?
|
||||
float glyphHeight = bbox.getHeight() / 2;
|
||||
|
||||
// sometimes the bbox has very high values, but CapHeight is OK
|
||||
PDFontDescriptor fontDescriptor = font.getFontDescriptor();
|
||||
if (fontDescriptor != null)
|
||||
{
|
||||
if (fontDescriptor != null) {
|
||||
float capHeight = fontDescriptor.getCapHeight();
|
||||
if (Float.compare(capHeight, 0) != 0 &&
|
||||
(capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0))
|
||||
{
|
||||
if (Float.compare(capHeight, 0) != 0 && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
|
||||
glyphHeight = capHeight;
|
||||
}
|
||||
// PDFBOX-3464, PDFBOX-4480, PDFBOX-4553:
|
||||
// sometimes even CapHeight has very high value, but Ascent and Descent are ok
|
||||
float ascent = fontDescriptor.getAscent();
|
||||
float descent = fontDescriptor.getDescent();
|
||||
if (capHeight > ascent && ascent > 0 && descent < 0 &&
|
||||
((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0))
|
||||
{
|
||||
if (capHeight > ascent && ascent > 0 && descent < 0 && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
|
||||
glyphHeight = (ascent - descent) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
// transformPoint from glyph space -> text space
|
||||
float height;
|
||||
if (font instanceof PDType3Font)
|
||||
{
|
||||
if (font instanceof PDType3Font) {
|
||||
height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
height = glyphHeight / 1000;
|
||||
}
|
||||
|
||||
return height;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A method provided as an event interface to allow a subclass to perform some specific
|
||||
* functionality when text needs to be processed.
|
||||
*
|
||||
* @param text The text to be processed.
|
||||
*/
|
||||
protected void processTextPosition(TextPosition text)
|
||||
{
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
// subclasses can override to provide specific functionality
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
package com.iqser.red.service.redaction.v1.server.parsing;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
import org.apache.pdfbox.text.PDFTextStripperByArea;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
@ -18,19 +20,19 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
@Setter
|
||||
private int pageNumber;
|
||||
|
||||
|
||||
public PDFAreaTextStripper() throws IOException {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
|
||||
int startIndex = 0;
|
||||
for (int i = 0; i <= textPositions.size() - 1; i++) {
|
||||
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0"))) {
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0"))) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
@ -38,32 +40,23 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0")) && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
startIndex = i + 1;
|
||||
@ -71,14 +64,10 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
}
|
||||
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\u00A0")))) {
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
}
|
||||
super.writeString(text);
|
||||
@ -86,6 +75,7 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
|
||||
|
||||
|
||||
public void clearPositions() {
|
||||
|
||||
textPositionSequences = new ArrayList<>();
|
||||
}
|
||||
|
||||
|
||||
@ -3,9 +3,11 @@ package com.iqser.red.service.redaction.v1.server.parsing;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.contentstream.operator.OperatorName;
|
||||
import org.apache.pdfbox.contentstream.operator.color.*;
|
||||
@ -155,7 +157,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
graphicsPath.clear();
|
||||
break;
|
||||
|
||||
|
||||
}
|
||||
|
||||
super.processOperator(operator, arguments);
|
||||
@ -182,14 +183,11 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
|
||||
try {
|
||||
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor()
|
||||
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor()
|
||||
.isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
|
||||
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
|
||||
rulings.addAll(path);
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor()
|
||||
.getColorSpace()
|
||||
.getName() + " or " + getGraphicsState().getNonStrokingColor()
|
||||
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor()
|
||||
.getColorSpace()
|
||||
.getName() + " does not support toRGB");
|
||||
}
|
||||
@ -226,9 +224,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
maxCharHeight = charHeight;
|
||||
}
|
||||
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
|
||||
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
|
||||
startIndex++;
|
||||
continue;
|
||||
}
|
||||
@ -236,9 +232,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
|
||||
if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
@ -246,12 +240,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i)
|
||||
.getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
|
||||
@ -259,15 +250,11 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\u00A0") || textPositions.get(i)
|
||||
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i)
|
||||
.getUnicode()
|
||||
.equals("\t")) && i <= textPositions.size() - 2) {
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, i);
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\t")))) {
|
||||
|
||||
@ -286,17 +273,15 @@ public class PDFLinesTextStripper extends PDFTextStripper {
|
||||
}
|
||||
|
||||
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals(" ") || sublist.get(sublist.size() - 1)
|
||||
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1)
|
||||
.getUnicode()
|
||||
.equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
|
||||
sublist = sublist.subList(0, sublist.size() - 1);
|
||||
}
|
||||
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
|
||||
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
|
||||
.getUnicode()
|
||||
.equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
|
||||
.equals("\t")))) {
|
||||
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
|
||||
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
|
||||
for (TextPosition t : sublist) {
|
||||
|
||||
@ -1368,7 +1368,9 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
|
||||
* @return start position of the last line
|
||||
* @throws IOException if something went wrong
|
||||
*/
|
||||
private PositionWrapper handleLineSeparation(PositionWrapper current, PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
|
||||
private PositionWrapper handleLineSeparation(PositionWrapper current,
|
||||
PositionWrapper lastPosition,
|
||||
PositionWrapper lastLineStartPosition,
|
||||
float maxHeightForLine) throws IOException {
|
||||
|
||||
current.setLineStart();
|
||||
|
||||
@ -55,6 +55,7 @@ public class RedTextPosition {
|
||||
|
||||
@SneakyThrows
|
||||
public static RedTextPosition fromTextPosition(TextPosition textPosition) {
|
||||
|
||||
var pos = new RedTextPosition();
|
||||
BeanUtils.copyProperties(textPosition, pos);
|
||||
pos.setFontName(textPosition.getFont().getName());
|
||||
@ -76,24 +77,30 @@ public class RedTextPosition {
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getXDirAdj(){
|
||||
public float getXDirAdj() {
|
||||
|
||||
return position[0];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getYDirAdj(){
|
||||
public float getYDirAdj() {
|
||||
|
||||
return position[1];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getWidthDirAdj(){
|
||||
public float getWidthDirAdj() {
|
||||
|
||||
return position[2];
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getHeightDir(){
|
||||
public float getHeightDir() {
|
||||
|
||||
return position[3];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -20,21 +20,28 @@ public enum TextDirection {
|
||||
private final float degrees;
|
||||
private final float radians;
|
||||
|
||||
|
||||
TextDirection(float degreeValue) {
|
||||
|
||||
degrees = degreeValue;
|
||||
radians = (float) Math.toRadians(degreeValue);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return degrees + VALUE_STRING_SUFFIX;
|
||||
}
|
||||
|
||||
|
||||
@com.dslplatform.json.JsonValue
|
||||
public float jsonValue() {
|
||||
|
||||
return getDegrees();
|
||||
}
|
||||
|
||||
|
||||
@JsonCreator(mode = JsonCreator.Mode.DELEGATING)
|
||||
public static TextDirection fromDegrees(float degrees) {
|
||||
|
||||
@ -47,6 +54,7 @@ public enum TextDirection {
|
||||
throw new IllegalArgumentException(String.format("A value of %f is not supported by TextDirection", degrees));
|
||||
}
|
||||
|
||||
|
||||
public static TextDirection fromString(String degreesAsString) {
|
||||
|
||||
Objects.requireNonNull(degreesAsString, "Cannot construct a text direction from a null value");
|
||||
|
||||
@ -304,7 +304,10 @@ public class TextPositionSequence implements CharSequence {
|
||||
topRight = transform.transform(topRight, null);
|
||||
|
||||
return new Rectangle( //
|
||||
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()), (float) (topRight.getX() - bottomLeft.getX()), (float) (topRight.getY() - bottomLeft.getY()), page);
|
||||
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
|
||||
(float) (topRight.getX() - bottomLeft.getX()),
|
||||
(float) (topRight.getY() - bottomLeft.getY()),
|
||||
page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -17,9 +17,11 @@ public class MessageReceiver {
|
||||
|
||||
private final RedactionMessageReceiver redactionMessageReceiver;
|
||||
|
||||
|
||||
@RabbitHandler
|
||||
@RabbitListener(queues = REDACTION_QUEUE)
|
||||
public void receiveAnalyzeRequest(Message message) {
|
||||
|
||||
redactionMessageReceiver.receiveAnalyzeRequest(message, false);
|
||||
}
|
||||
|
||||
|
||||
@ -17,16 +17,19 @@ public class MessagingConfiguration {
|
||||
|
||||
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
|
||||
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(prefix = "redaction-service", name = "priorityMode", havingValue = "false")
|
||||
public MessageReceiver messageReceiver(RedactionMessageReceiver redactionMessageReceiver){
|
||||
public MessageReceiver messageReceiver(RedactionMessageReceiver redactionMessageReceiver) {
|
||||
|
||||
return new MessageReceiver(redactionMessageReceiver);
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
@ConditionalOnProperty(prefix = "redaction-service", name = "priorityMode", havingValue = "true")
|
||||
public PriorityMessageReceiver priorityMessageReceiver(RedactionMessageReceiver redactionMessageReceiver){
|
||||
public PriorityMessageReceiver priorityMessageReceiver(RedactionMessageReceiver redactionMessageReceiver) {
|
||||
|
||||
return new PriorityMessageReceiver(redactionMessageReceiver);
|
||||
}
|
||||
|
||||
@ -34,11 +37,7 @@ public class MessagingConfiguration {
|
||||
@Bean
|
||||
public Queue redactionQueue() {
|
||||
|
||||
return QueueBuilder.durable(REDACTION_QUEUE)
|
||||
.withArgument("x-dead-letter-exchange", "")
|
||||
.withArgument("x-dead-letter-routing-key", REDACTION_DQL)
|
||||
.maxPriority(2)
|
||||
.build();
|
||||
return QueueBuilder.durable(REDACTION_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", REDACTION_DQL).maxPriority(2).build();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -38,13 +38,16 @@ public class RedactionMessageReceiver {
|
||||
var analyzeRequest = objectMapper.readValue(message.getBody(), AnalyzeRequest.class);
|
||||
|
||||
// This prevents from endless retries oom errors.
|
||||
if(message.getMessageProperties().isRedelivered()){
|
||||
throw new AmqpRejectAndDontRequeueException(String.format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.", analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
|
||||
if (message.getMessageProperties().isRedelivered()) {
|
||||
throw new AmqpRejectAndDontRequeueException(String.format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId()));
|
||||
}
|
||||
|
||||
receiveAnalyzeRequest(analyzeRequest, priority);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void receiveAnalyzeRequest(AnalyzeRequest analyzeRequest, boolean priority) {
|
||||
|
||||
@ -70,8 +73,13 @@ public class RedactionMessageReceiver {
|
||||
break;
|
||||
|
||||
case SURROUNDING_TEXT:
|
||||
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
|
||||
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
|
||||
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
analyzeRequest.getManualRedactions());
|
||||
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}",
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
result.getDuration());
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
@ -20,6 +21,7 @@ public class CellValue {
|
||||
|
||||
private int rowSpanStart;
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
@ -45,9 +47,7 @@ public class CellValue {
|
||||
}
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" {2}", " ");
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
|
||||
@ -11,7 +11,6 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
public class DictionaryModel implements Serializable {
|
||||
@ -33,6 +32,7 @@ public class DictionaryModel implements Serializable {
|
||||
|
||||
private final Set<String> localEntries = new HashSet<>();
|
||||
|
||||
|
||||
public DictionaryModel(String type,
|
||||
int rank,
|
||||
float[] color,
|
||||
@ -42,6 +42,7 @@ public class DictionaryModel implements Serializable {
|
||||
Set<DictionaryEntry> falsePositives,
|
||||
Set<DictionaryEntry> falseRecommendations,
|
||||
boolean isDossierDictionary) {
|
||||
|
||||
this.type = type;
|
||||
this.rank = rank;
|
||||
this.color = color;
|
||||
@ -53,39 +54,58 @@ public class DictionaryModel implements Serializable {
|
||||
this.falsePositives = falsePositives;
|
||||
this.falseRecommendations = falseRecommendations;
|
||||
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public SearchImplementation getLocalSearch() {
|
||||
|
||||
if (this.localSearch == null) {
|
||||
this.localSearch = new SearchImplementation(this.localEntries, caseInsensitive);
|
||||
}
|
||||
return this.localSearch;
|
||||
}
|
||||
|
||||
|
||||
public SearchImplementation getEntriesSearch() {
|
||||
|
||||
if (entriesSearch == null) {
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
|
||||
caseInsensitive);
|
||||
}
|
||||
return entriesSearch;
|
||||
}
|
||||
|
||||
|
||||
public SearchImplementation getFalsePositiveSearch() {
|
||||
|
||||
if (falsePositiveSearch == null) {
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
}
|
||||
return falsePositiveSearch;
|
||||
}
|
||||
|
||||
|
||||
public SearchImplementation getFalseRecommendationsSearch() {
|
||||
|
||||
if (falseRecommendationsSearch == null) {
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
|
||||
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
|
||||
.filter(e -> !e.isDeleted())
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toList()), caseInsensitive);
|
||||
}
|
||||
return falseRecommendationsSearch;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -19,5 +19,4 @@ public class DictionaryRepresentation {
|
||||
private float[] notRedactedColor;
|
||||
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -13,4 +13,5 @@ public class DictionaryVersion {
|
||||
|
||||
long dossierTemplateVersion;
|
||||
long dossierVersion;
|
||||
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.model.Engine;
|
||||
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
|
||||
|
||||
import lombok.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -57,10 +58,24 @@ public class Entity implements ReasonHolder {
|
||||
private boolean skipRemoveEntitiesContainedInLarger;
|
||||
|
||||
|
||||
public Entity(String word, String type, boolean redaction, String redactionReason,
|
||||
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
|
||||
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
|
||||
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references, EntityType entityType) {
|
||||
public Entity(String word,
|
||||
String type,
|
||||
boolean redaction,
|
||||
String redactionReason,
|
||||
List<EntityPositionSequence> positionSequences,
|
||||
String headline,
|
||||
int matchedRule,
|
||||
int sectionNumber,
|
||||
String legalBasis,
|
||||
boolean isDictionaryEntry,
|
||||
String textBefore,
|
||||
String textAfter,
|
||||
Integer start,
|
||||
Integer end,
|
||||
boolean isDossierDictionaryEntry,
|
||||
Set<Engine> engines,
|
||||
Set<Entity> references,
|
||||
EntityType entityType) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -83,8 +98,16 @@ public class Entity implements ReasonHolder {
|
||||
}
|
||||
|
||||
|
||||
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
|
||||
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine, EntityType entityType) {
|
||||
public Entity(String word,
|
||||
String type,
|
||||
Integer start,
|
||||
Integer end,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
boolean isDictionaryEntry,
|
||||
boolean isDossierDictionaryEntry,
|
||||
Engine engine,
|
||||
EntityType entityType) {
|
||||
|
||||
this.word = word;
|
||||
this.type = type;
|
||||
@ -98,5 +121,4 @@ public class Entity implements ReasonHolder {
|
||||
this.entityType = entityType;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
public enum EntityType {
|
||||
ENTITY, RECOMMENDATION, FALSE_POSITIVE, FALSE_RECOMMENDATION
|
||||
ENTITY,
|
||||
RECOMMENDATION,
|
||||
FALSE_POSITIVE,
|
||||
FALSE_RECOMMENDATION
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model;
|
||||
|
||||
public enum ImageType {
|
||||
LOGO, FORMULA, SIGNATURE, OTHER, OCR
|
||||
LOGO,
|
||||
FORMULA,
|
||||
SIGNATURE,
|
||||
OTHER,
|
||||
OCR
|
||||
}
|
||||
|
||||
@ -19,5 +19,4 @@ public class PageEntities {
|
||||
@Builder.Default
|
||||
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -4,11 +4,13 @@ public interface ReasonHolder {
|
||||
|
||||
String getRedactionReason();
|
||||
|
||||
|
||||
void setRedactionReason(String reason);
|
||||
|
||||
|
||||
boolean isRedaction();
|
||||
|
||||
|
||||
void setRedaction(boolean value);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -23,26 +23,30 @@ public class RedRectangle2D {
|
||||
private double width;
|
||||
private double height;
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
public boolean isEmpty() {
|
||||
|
||||
return width <= 0.0f || height <= 0.0f;
|
||||
}
|
||||
|
||||
|
||||
public boolean contains(double x, double y, double w, double h) {
|
||||
|
||||
if (isEmpty() || w <= 0 || h <= 0) {
|
||||
return false;
|
||||
}
|
||||
double x0 = getX();
|
||||
double y0 = getY();
|
||||
return round(x) >= round(x0) &&
|
||||
round(y) >= round(y0) &&
|
||||
(x + w) - (x0 + getWidth()) <= THRESHOLD &&
|
||||
(y + h) - (y0 + getHeight()) <= THRESHOLD;
|
||||
return round(x) >= round(x0) && round(y) >= round(y0) && (x + w) - (x0 + getWidth()) <= THRESHOLD && (y + h) - (y0 + getHeight()) <= THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
private double round(double value) {
|
||||
|
||||
double d = Math.pow(10, 2);
|
||||
return Math.round(value * d) / d;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -17,7 +17,6 @@ import lombok.Getter;
|
||||
|
||||
public class SearchableText {
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
@JsonAttribute(ignore = true)
|
||||
private transient String stringRepresentation;
|
||||
@ -45,8 +44,7 @@ public class SearchableText {
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:ModifiedControlVariable")
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive,
|
||||
List<TextPositionSequence> sequencesSubList) {
|
||||
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive, List<TextPositionSequence> sequencesSubList) {
|
||||
|
||||
String normalizedSearchString;
|
||||
if (caseInsensitive) {
|
||||
@ -78,36 +76,27 @@ public class SearchableText {
|
||||
for (int j = 0; j < searchSpace.get(i).length(); j++) {
|
||||
|
||||
if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i).charAt(j, caseInsensitive) == ' ' && searchSpace.get(i)
|
||||
.charAt(j - 1, caseInsensitive) == ' ') {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions()
|
||||
.isEmpty()) {
|
||||
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (j == 0 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
|
||||
counter++;
|
||||
}
|
||||
|
||||
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace
|
||||
.get(i)
|
||||
.charAt(j, caseInsensitive) == '-') {
|
||||
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i).charAt(j, caseInsensitive) == '-') {
|
||||
|
||||
if (counter != 0 || i == 0 && j == 0 || j != 0 && SeparatorUtils.isSeparator(searchSpace.get(i)
|
||||
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && SeparatorUtils.isSeparator(searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1)
|
||||
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ') {
|
||||
partMatch.add(searchSpace.get(i),searchSpace.get(i).textPositionAt(j));
|
||||
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
|
||||
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i).charAt(j, caseInsensitive) != ' ') {
|
||||
partMatch.add(searchSpace.get(i), searchSpace.get(i).textPositionAt(j));
|
||||
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i).charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
@ -115,14 +104,10 @@ public class SearchableText {
|
||||
if (counter == searchString.length()) {
|
||||
crossSequenceParts.add(partMatch);
|
||||
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i)
|
||||
.length() - 1 || j != searchSpace.get(i).length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i)
|
||||
.charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && searchSpace.get(i)
|
||||
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1)
|
||||
.charAt(0, caseInsensitive) != ' ') {
|
||||
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
|
||||
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i).charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1).charAt(0, caseInsensitive)) || j == searchSpace.get(i)
|
||||
.length() - 1 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1).charAt(0, caseInsensitive) != ' ') {
|
||||
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts, normalizedSearchString));
|
||||
}
|
||||
|
||||
@ -149,8 +134,7 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
|
||||
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts,
|
||||
String searchString) {
|
||||
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts, String searchString) {
|
||||
|
||||
List<EntityPositionSequence> result = new ArrayList<>();
|
||||
String asString = buildString(crossSequenceParts);
|
||||
@ -188,10 +172,13 @@ public class SearchableText {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return buildString(sequences);
|
||||
}
|
||||
|
||||
|
||||
public String asString() {
|
||||
|
||||
if (stringRepresentation == null) {
|
||||
stringRepresentation = buildString(sequences);
|
||||
}
|
||||
@ -217,9 +204,7 @@ public class SearchableText {
|
||||
previous = word;
|
||||
}
|
||||
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
|
||||
.replaceAll("\n", " ")
|
||||
.replaceAll(" {2}", " ");
|
||||
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
|
||||
}
|
||||
|
||||
|
||||
@ -230,7 +215,12 @@ public class SearchableText {
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaksSorted(List<TextPositionSequence> sequences) {
|
||||
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
|
||||
|
||||
var sorted = sequences.stream()
|
||||
.sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj()))
|
||||
.sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj()))
|
||||
.sorted(Comparator.comparing(a -> a.getPage()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@ -251,8 +241,6 @@ public class SearchableText {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public String getAsStringWithLinebreaks() {
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
@ -85,11 +85,12 @@ public class Section {
|
||||
@ThenAction
|
||||
public void addAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Set<Entity> entitiesOfType = nerEntities.stream()
|
||||
.filter(nerEntity -> nerEntity.getType().equals(type))
|
||||
.collect(Collectors.toSet());
|
||||
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
|
||||
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(searchText, new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)), dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(searchText,
|
||||
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
|
||||
dictionary.getType(asType),
|
||||
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
|
||||
found = found.stream().filter(f -> !f.isFalsePositive()).collect(Collectors.toSet());
|
||||
|
||||
@ -127,9 +128,7 @@ public class Section {
|
||||
|
||||
Set<String> combineSet = Set.of(combineTypes.split(","));
|
||||
|
||||
List<Entity> sorted = nerEntities.stream()
|
||||
.sorted(Comparator.comparing(Entity::getStart))
|
||||
.collect(Collectors.toList());
|
||||
List<Entity> sorted = nerEntities.stream().sorted(Comparator.comparing(Entity::getStart)).collect(Collectors.toList());
|
||||
Set<Entity> found = new HashSet<>();
|
||||
int start = -1;
|
||||
int lastEnd = -1;
|
||||
@ -189,48 +188,39 @@ public class Section {
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
@ -239,11 +229,9 @@ public class Section {
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
return fileAttributes != null && fileAttributes.stream()
|
||||
.anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
|
||||
}
|
||||
|
||||
|
||||
@ -290,8 +278,7 @@ public class Section {
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
|
||||
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
|
||||
|
||||
var compiledPattern = Patterns.getCompiledPattern(regEx, patternCaseInsensitive);
|
||||
|
||||
@ -303,14 +290,11 @@ public class Section {
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@WhenCondition
|
||||
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName,
|
||||
@Argument(ArgumentType.STRING) String value) {
|
||||
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName, @Argument(ArgumentType.STRING) String value) {
|
||||
|
||||
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName)
|
||||
.toString()
|
||||
.equals(value);
|
||||
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value);
|
||||
}
|
||||
|
||||
|
||||
@ -366,7 +350,15 @@ public class Section {
|
||||
continue;
|
||||
}
|
||||
|
||||
var expandedEntities = findEntities(match + entity.getWord(), type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false);
|
||||
var expandedEntities = findEntities(match + entity.getWord(),
|
||||
type,
|
||||
false,
|
||||
entity.isRedaction(),
|
||||
entity.getMatchedRule(),
|
||||
entity.getRedactionReason(),
|
||||
entity.getLegalBasis(),
|
||||
Engine.RULE,
|
||||
false);
|
||||
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
|
||||
}
|
||||
}
|
||||
@ -429,7 +421,15 @@ public class Section {
|
||||
continue;
|
||||
}
|
||||
|
||||
var expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false);
|
||||
var expandedEntities = findEntities(entity.getWord() + match,
|
||||
type,
|
||||
false,
|
||||
entity.isRedaction(),
|
||||
entity.getMatchedRule(),
|
||||
entity.getRedactionReason(),
|
||||
entity.getLegalBasis(),
|
||||
Engine.RULE,
|
||||
false);
|
||||
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
|
||||
}
|
||||
}
|
||||
@ -453,9 +453,7 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotImage(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
public void redactNotImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redactImage(type, ruleNumber, reason, null, false);
|
||||
}
|
||||
@ -463,7 +461,8 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redact(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
public void redact(@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
|
||||
@ -473,8 +472,7 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
redact(type, ruleNumber, reason, null, false);
|
||||
}
|
||||
@ -482,7 +480,8 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType,
|
||||
public void redactLineAfter(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@ -509,7 +508,8 @@ public class Section {
|
||||
@SuppressWarnings("unused")
|
||||
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -522,7 +522,8 @@ public class Section {
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
@ -533,7 +534,8 @@ public class Section {
|
||||
@Deprecated
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@ -546,7 +548,8 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@ -560,7 +563,8 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@ -570,13 +574,26 @@ public class Section {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
|
||||
|
||||
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
|
||||
redactBetween(start,
|
||||
stop,
|
||||
false,
|
||||
false,
|
||||
asType,
|
||||
ruleNumber,
|
||||
redactEverywhere,
|
||||
excludeHeadLine,
|
||||
reason,
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
}
|
||||
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
|
||||
public void redactBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean includeStart,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean includeStop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@ -588,7 +605,19 @@ public class Section {
|
||||
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
|
||||
|
||||
redactBetween(start, stop, includeStart, includeStop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
|
||||
redactBetween(start,
|
||||
stop,
|
||||
includeStart,
|
||||
includeStop,
|
||||
asType,
|
||||
ruleNumber,
|
||||
redactEverywhere,
|
||||
excludeHeadLine,
|
||||
reason,
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
}
|
||||
|
||||
|
||||
@ -619,7 +648,19 @@ public class Section {
|
||||
String stopValue = getFirstRexExMatch(searchTextAfter, stopPattern, stopPatternCaseInsensitive, stopGroup);
|
||||
|
||||
if (startValue != null && stopValue != null) {
|
||||
redactBetween(startValue, stopValue, includeStart, includeStop, type, ruleNumber, false, false, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
|
||||
redactBetween(startValue,
|
||||
stopValue,
|
||||
includeStart,
|
||||
includeStop,
|
||||
type,
|
||||
ruleNumber,
|
||||
false,
|
||||
false,
|
||||
reason,
|
||||
legalBasis,
|
||||
true,
|
||||
skipRemoveEntitiesContainedInLarger,
|
||||
sortedResult);
|
||||
}
|
||||
}
|
||||
|
||||
@ -628,7 +669,8 @@ public class Section {
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
@ -640,7 +682,8 @@ public class Section {
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactNotBetween(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.STRING) String stop,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
|
||||
@ -680,7 +723,8 @@ public class Section {
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.TYPE) String type,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -755,9 +799,7 @@ public class Section {
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason) {
|
||||
|
||||
Set<Entity> references = entities.stream()
|
||||
.filter(entity -> entity.getType().equals(referenceType))
|
||||
.collect(Collectors.toSet());
|
||||
Set<Entity> references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet());
|
||||
|
||||
entities.forEach(entity -> {
|
||||
if (entity.getType().equals(type)) {
|
||||
@ -791,7 +833,8 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType,
|
||||
public void addRedaction(@Argument(ArgumentType.STRING) String value,
|
||||
@Argument(ArgumentType.TYPE) String asType,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.STRING) String reason,
|
||||
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
|
||||
@ -813,8 +856,7 @@ public class Section {
|
||||
@SuppressWarnings("unused")
|
||||
public void ignoreRecommendations(@Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType()
|
||||
.equals(EntityType.RECOMMENDATION));
|
||||
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION));
|
||||
}
|
||||
|
||||
|
||||
@ -873,8 +915,7 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false);
|
||||
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
|
||||
@ -883,8 +924,7 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start,
|
||||
@Argument(ArgumentType.TYPE) String asType) {
|
||||
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
|
||||
@ -909,9 +949,7 @@ public class Section {
|
||||
|
||||
@ThenAction
|
||||
@SuppressWarnings("unused")
|
||||
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader,
|
||||
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
|
||||
@Argument(ArgumentType.TYPE) String type) {
|
||||
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type) {
|
||||
|
||||
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
|
||||
}
|
||||
@ -957,7 +995,7 @@ public class Section {
|
||||
|
||||
|
||||
@ThenAction
|
||||
public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX)String pattern,
|
||||
public void redactWordPartByRegEx(@Argument(ArgumentType.REGEX) String pattern,
|
||||
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
|
||||
@Argument(ArgumentType.INTEGER) int group,
|
||||
@Argument(ArgumentType.INTEGER) int redactGroup,
|
||||
@ -1051,8 +1089,14 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
|
||||
int ruleNumber, String reason, String legalBasis, boolean redaction) {
|
||||
private void redactAndRecommendByRegEx(String pattern,
|
||||
boolean patternCaseInsensitive,
|
||||
int group,
|
||||
String asType,
|
||||
int ruleNumber,
|
||||
String reason,
|
||||
String legalBasis,
|
||||
boolean redaction) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
Matcher matcher = compiledPattern.matcher(searchText);
|
||||
@ -1067,12 +1111,21 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
|
||||
int ruleNumber, String reason, String legalBasis, Engine engine,
|
||||
private Set<Entity> findEntities(String value,
|
||||
String asType,
|
||||
boolean caseInsensitive,
|
||||
boolean redacted,
|
||||
int ruleNumber,
|
||||
String reason,
|
||||
String legalBasis,
|
||||
Engine engine,
|
||||
boolean asRecommendation) {
|
||||
|
||||
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(text, new SearchImplementation(value, caseInsensitive), dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
Set<Entity> found = EntitySearchUtils.findEntities(text,
|
||||
new SearchImplementation(value, caseInsensitive),
|
||||
dictionary.getType(asType),
|
||||
new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
found.forEach(entity -> {
|
||||
if (redacted) {
|
||||
entity.setRedaction(true);
|
||||
@ -1113,8 +1166,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact,
|
||||
boolean addAsRecommendations, String reason, String legalBasis) {
|
||||
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, boolean addAsRecommendations, String reason, String legalBasis) {
|
||||
|
||||
String cleanHeaderName = cellHeader.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
|
||||
|
||||
@ -1124,7 +1176,16 @@ public class Section {
|
||||
} else {
|
||||
String word = value.toString();
|
||||
|
||||
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY);
|
||||
Entity entity = new Entity(word,
|
||||
type,
|
||||
value.getRowSpanStart(),
|
||||
value.getRowSpanStart() + word.length(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.RULE,
|
||||
EntityType.ENTITY);
|
||||
entity.setRedaction(redact);
|
||||
entity.setMatchedRule(ruleNumber);
|
||||
entity.setRedactionReason(reason);
|
||||
@ -1139,7 +1200,10 @@ public class Section {
|
||||
singleEntitySet.add(entity);
|
||||
|
||||
EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary, manualRedactions);
|
||||
EntitySearchUtils.removeFalsePositives(singleEntitySet, searchText, dictionary.getType(type), new FindEntityDetails(type, headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY));
|
||||
EntitySearchUtils.removeFalsePositives(singleEntitySet,
|
||||
searchText,
|
||||
dictionary.getType(type),
|
||||
new FindEntityDetails(type, headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY));
|
||||
|
||||
if (!singleEntitySet.isEmpty()) {
|
||||
EntitySearchUtils.addEntitiesWithHigherRank(entities, singleEntitySet.iterator().next(), dictionary);
|
||||
@ -1165,8 +1229,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
|
||||
String legalBasis, boolean redaction) {
|
||||
private void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, "\n");
|
||||
|
||||
@ -1185,8 +1248,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
String reason, String legalBasis) {
|
||||
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
|
||||
|
||||
@ -1205,9 +1267,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
|
||||
String reason, String legalBasis, boolean redaction) {
|
||||
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
|
||||
|
||||
@ -1239,9 +1299,18 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void redactBetween(String start, String stop, boolean includeStart, boolean includeStop, String asType,
|
||||
int ruleNumber, boolean redactEverywhere, boolean excludeHeadLine, String reason,
|
||||
String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger,
|
||||
private void redactBetween(String start,
|
||||
String stop,
|
||||
boolean includeStart,
|
||||
boolean includeStop,
|
||||
String asType,
|
||||
int ruleNumber,
|
||||
boolean redactEverywhere,
|
||||
boolean excludeHeadLine,
|
||||
String reason,
|
||||
String legalBasis,
|
||||
boolean redaction,
|
||||
boolean skipRemoveEntitiesContainedInLarger,
|
||||
boolean sortedResult) {
|
||||
|
||||
String[] values = new String[1];
|
||||
@ -1297,8 +1366,7 @@ public class Section {
|
||||
}
|
||||
|
||||
|
||||
private void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
|
||||
String reason, String legalBasis, boolean redaction) {
|
||||
private void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis, boolean redaction) {
|
||||
|
||||
String[] values = StringUtils.substringsBetween(text, start, stop);
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ -9,4 +10,5 @@ public class FilterGeometry {
|
||||
|
||||
private ImageSize imageSize;
|
||||
private ImageFormat imageFormat;
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ -10,4 +11,5 @@ public class Filters {
|
||||
private FilterGeometry geometry;
|
||||
private Probability probability;
|
||||
private boolean allPassed;
|
||||
|
||||
}
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class Geometry {
|
||||
|
||||
private float width;
|
||||
private float height;
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@ -12,4 +13,5 @@ public class ImageMetadata {
|
||||
private Geometry geometry;
|
||||
private Filters filters;
|
||||
private boolean alpha;
|
||||
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ import com.dslplatform.json.CompiledJson;
|
||||
import com.dslplatform.json.JsonAttribute;
|
||||
import com.fasterxml.jackson.annotation.JsonAlias;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -23,6 +24,7 @@ public class ImageServiceResponse {
|
||||
|
||||
private List<ImageMetadata> dataCV = new ArrayList<>();
|
||||
|
||||
|
||||
@JsonProperty(value = "imageMetadata")
|
||||
@JsonAlias("data")
|
||||
@JsonAttribute(alternativeNames = {"imageMetadata"})
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
|
||||
@ -1,14 +1,17 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class Position {
|
||||
|
||||
private float x1;
|
||||
private float x2;
|
||||
private float y1;
|
||||
private float y2;
|
||||
private int pageNumber;
|
||||
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
|
||||
|
||||
import com.dslplatform.json.CompiledJson;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
|
||||
@ -10,6 +10,7 @@ import lombok.Data;
|
||||
@Data
|
||||
@CompiledJson
|
||||
public class TableServiceResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String operation;
|
||||
|
||||
@ -4,6 +4,7 @@ import com.iqser.red.service.redaction.v1.model.Argument;
|
||||
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
|
||||
import com.iqser.red.service.redaction.v1.model.RuleElement;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
@ -21,16 +22,18 @@ public class RuleBuilderModelService {
|
||||
|
||||
RuleBuilderModel ruleBuilderModel = new RuleBuilderModel();
|
||||
|
||||
|
||||
ruleBuilderModel.setWhenClauses(whenConditions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
|
||||
ruleBuilderModel.setThenConditions(thenActions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
|
||||
|
||||
return ruleBuilderModel;
|
||||
}
|
||||
|
||||
|
||||
private List<Argument> toArguments(Method c) {
|
||||
|
||||
return Arrays.stream(c.getParameters())
|
||||
.map(parameter -> new Argument(parameter.getName(), parameter.getAnnotation(Section.Argument.class).value()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -90,7 +90,9 @@ public class AnalyzeService {
|
||||
Document classifiedDoc;
|
||||
|
||||
try {
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
|
||||
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
FileType.ORIGIN));
|
||||
|
||||
Map<Integer, List<PdfImage>> pdfImages = null;
|
||||
if (redactionServiceSettings.isEnableImageClassification()) {
|
||||
@ -112,10 +114,10 @@ public class AnalyzeService {
|
||||
// enhance section grid with headline data
|
||||
sectionTexts.forEach(sectionText -> classifiedDoc.getSectionGrid()
|
||||
.getSections()
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
|
||||
.stream()
|
||||
.map(SectionArea::getPage)
|
||||
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(),
|
||||
sectionText.getHeadline(),
|
||||
sectionText.getSectionAreas().stream().map(SectionArea::getPage).collect(Collectors.toSet()),
|
||||
sectionText.getSectionAreas())));
|
||||
|
||||
log.info("Store text, simplified text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
|
||||
@ -146,10 +148,15 @@ public class AnalyzeService {
|
||||
return analyze(analyzeRequest);
|
||||
}
|
||||
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
|
||||
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
|
||||
new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()),
|
||||
analyzeRequest.getDossierId());
|
||||
|
||||
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse()
|
||||
.isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
|
||||
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse().isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(
|
||||
dictionaryIncrement,
|
||||
redactionLog,
|
||||
text,
|
||||
analyzeRequest);
|
||||
|
||||
if (sectionsToReanalyse.isEmpty()) {
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
@ -175,11 +182,13 @@ public class AnalyzeService {
|
||||
|
||||
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false);
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
newRedactionLogEntries,
|
||||
false);
|
||||
|
||||
redactionLog.getRedactionLogEntry()
|
||||
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType()
|
||||
.equals(IMPORTED_REDACTION_TYPE));
|
||||
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
|
||||
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
|
||||
@ -208,11 +217,20 @@ public class AnalyzeService {
|
||||
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
|
||||
|
||||
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, convert(legalBasis), dictionary.getVersion()
|
||||
.getDossierTemplateVersion(), dictionary.getVersion()
|
||||
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(),
|
||||
analyzeRequest.getAnalysisNumber(),
|
||||
redactionLogEntries,
|
||||
convert(legalBasis),
|
||||
dictionary.getVersion().getDossierTemplateVersion(),
|
||||
dictionary.getVersion().getDossierVersion(),
|
||||
rulesVersion,
|
||||
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
|
||||
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
|
||||
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
|
||||
analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
redactionLog.getRedactionLogEntry(),
|
||||
true);
|
||||
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
|
||||
|
||||
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
|
||||
@ -220,8 +238,7 @@ public class AnalyzeService {
|
||||
|
||||
|
||||
@Timed("redactmanager_findSectionsToReanalyse")
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
|
||||
Text text, AnalyzeRequest analyzeRequest) {
|
||||
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
|
||||
@ -237,9 +254,8 @@ public class AnalyzeService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream()
|
||||
.map(DictionaryIncrementValue::getValue).collect(Collectors.toList()), true);
|
||||
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
|
||||
true);
|
||||
|
||||
for (SectionText sectionText : text.getSectionTexts()) {
|
||||
|
||||
@ -255,8 +271,11 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
|
||||
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
|
||||
RedactionLog redactionLog, Text text, DictionaryVersion dictionaryVersion,
|
||||
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest,
|
||||
long startTime,
|
||||
RedactionLog redactionLog,
|
||||
Text text,
|
||||
DictionaryVersion dictionaryVersion,
|
||||
boolean isReanalysis) {
|
||||
|
||||
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
|
||||
@ -264,7 +283,10 @@ public class AnalyzeService {
|
||||
|
||||
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
|
||||
|
||||
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog, analyzeRequest.getAnalysisNumber());
|
||||
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(),
|
||||
analyzeRequest.getFileId(),
|
||||
redactionLog,
|
||||
analyzeRequest.getAnalysisNumber());
|
||||
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());
|
||||
|
||||
long duration = System.currentTimeMillis() - startTime;
|
||||
@ -293,31 +315,27 @@ public class AnalyzeService {
|
||||
return new HashSet<>();
|
||||
}
|
||||
|
||||
return Stream.concat(manualRedactions.getResizeRedactions()
|
||||
.stream()
|
||||
.map(ManualResizeRedaction::getAnnotationId), Stream.concat(manualRedactions.getLegalBasisChanges()
|
||||
.stream()
|
||||
.map(ManualLegalBasisChange::getAnnotationId), Stream.concat(manualRedactions.getImageRecategorization()
|
||||
.stream()
|
||||
.map(ManualImageRecategorization::getAnnotationId), Stream.concat(manualRedactions.getIdsToRemove()
|
||||
.stream()
|
||||
.map(IdRemoval::getAnnotationId), manualRedactions.getForceRedactions()
|
||||
.stream()
|
||||
.map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
|
||||
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
|
||||
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
|
||||
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogLegalBasis> convert(List<LegalBasis> legalBasis) {
|
||||
|
||||
return legalBasis.stream().map(l -> new RedactionLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
public Image convert(RedactionLogEntry entry) {
|
||||
|
||||
Rectangle position = entry.getPositions().get(0);
|
||||
|
||||
return Image.builder()
|
||||
.type(entry.getType())
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
|
||||
.getY(), position.getWidth(), position.getHeight()))
|
||||
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
|
||||
.sectionNumber(entry.getSectionNumber())
|
||||
.section(entry.getSection())
|
||||
.page(position.getPage())
|
||||
|
||||
@ -5,10 +5,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.ty
|
||||
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
|
||||
|
||||
import feign.FeignException;
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.SerializationUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -46,16 +48,12 @@ public class DictionaryService {
|
||||
updateDictionaryEntry(dossierTemplateId, dossierDictionaryVersion, getVersion(dossierDictionary), dossierId);
|
||||
}
|
||||
|
||||
return DictionaryVersion.builder()
|
||||
.dossierTemplateVersion(dossierTemplateDictionaryVersion)
|
||||
.dossierVersion(dossierDictionaryVersion)
|
||||
.build();
|
||||
return DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
|
||||
}
|
||||
|
||||
|
||||
@Timed("redactmanager_getDictionaryIncrements")
|
||||
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion,
|
||||
String dossierId) {
|
||||
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion, String dossierId) {
|
||||
|
||||
DictionaryVersion version = updateDictionary(dossierTemplateId, dossierId);
|
||||
|
||||
@ -109,7 +107,8 @@ public class DictionaryService {
|
||||
try {
|
||||
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
|
||||
|
||||
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, false) : dictionaryClient.getAllTypesForDossier(dossierId, false);
|
||||
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, false) : dictionaryClient.getAllTypesForDossier(dossierId,
|
||||
false);
|
||||
if (CollectionUtils.isNotEmpty(typeResponse)) {
|
||||
|
||||
List<DictionaryModel> dictionary = typeResponse.stream().map(t -> {
|
||||
@ -117,16 +116,10 @@ public class DictionaryService {
|
||||
Optional<DictionaryModel> oldModel;
|
||||
if (dossierId == null) {
|
||||
var representation = dictionariesByDossierTemplate.get(dossierTemplateId);
|
||||
oldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
oldModel = representation != null ? representation.getDictionary().stream().filter(f -> f.getType().equals(t.getType())).findAny() : Optional.empty();
|
||||
} else {
|
||||
var representation = dictionariesByDossier.get(dossierId);
|
||||
oldModel = representation != null ? representation.getDictionary()
|
||||
.stream()
|
||||
.filter(f -> f.getType().equals(t.getType()))
|
||||
.findAny() : Optional.empty();
|
||||
oldModel = representation != null ? representation.getDictionary().stream().filter(f -> f.getType().equals(t.getType())).findAny() : Optional.empty();
|
||||
}
|
||||
|
||||
Set<DictionaryEntry> entries = new HashSet<>();
|
||||
@ -135,18 +128,9 @@ public class DictionaryService {
|
||||
|
||||
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
|
||||
|
||||
var newValues = newEntries.getEntries()
|
||||
.stream()
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
var newFalsePositivesValues = newEntries.getFalsePositives()
|
||||
.stream()
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
var newFalseRecommendationsValues = newEntries.getFalseRecommendations()
|
||||
.stream()
|
||||
.map(DictionaryEntry::getValue)
|
||||
.collect(Collectors.toSet());
|
||||
var newValues = newEntries.getEntries().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
|
||||
|
||||
oldModel.ifPresent(oldDictionaryModel -> {
|
||||
|
||||
@ -170,7 +154,15 @@ public class DictionaryService {
|
||||
falsePositives.addAll(newEntries.getFalsePositives());
|
||||
falseRecommendations.addAll(newEntries.getFalseRecommendations());
|
||||
|
||||
return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), entries, falsePositives, falseRecommendations, dossierId != null);
|
||||
return new DictionaryModel(t.getType(),
|
||||
t.getRank(),
|
||||
convertColor(t.getHexColor()),
|
||||
t.isCaseInsensitive(),
|
||||
t.isHint(),
|
||||
entries,
|
||||
falsePositives,
|
||||
falseRecommendations,
|
||||
dossierId != null);
|
||||
}).sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList());
|
||||
|
||||
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
|
||||
@ -211,7 +203,11 @@ public class DictionaryService {
|
||||
falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
|
||||
}
|
||||
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}", entries.size(), falsePositives.size(), falseRecommendations.size(), type.getType());
|
||||
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
|
||||
entries.size(),
|
||||
falsePositives.size(),
|
||||
falseRecommendations.size(),
|
||||
type.getType());
|
||||
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
|
||||
}
|
||||
|
||||
@ -263,12 +259,8 @@ public class DictionaryService {
|
||||
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
|
||||
}
|
||||
|
||||
return new Dictionary(copy.stream()
|
||||
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
|
||||
.collect(Collectors.toList()), DictionaryVersion.builder()
|
||||
.dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion())
|
||||
.dossierVersion(dossierDictionaryVersion)
|
||||
.build());
|
||||
return new Dictionary(copy.stream().sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList()),
|
||||
DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.KieServices;
|
||||
import org.kie.api.builder.KieBuilder;
|
||||
@ -106,8 +107,7 @@ public class DroolsExecutionService {
|
||||
|
||||
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
|
||||
InputStream input = new ByteArrayInputStream(rules.getBytes(StandardCharsets.UTF_8));
|
||||
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources()
|
||||
.newInputStreamResource(input));
|
||||
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources().newInputStreamResource(input));
|
||||
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
|
||||
kieBuilder.buildAll();
|
||||
return kieBuilder.getKieModule();
|
||||
|
||||
@ -14,9 +14,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUti
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
|
||||
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
|
||||
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.kie.api.runtime.KieContainer;
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -35,8 +37,7 @@ public class EntityRedactionService {
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
|
||||
|
||||
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
|
||||
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
|
||||
@ -55,19 +56,29 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
public Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
|
||||
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
|
||||
public Set<Entity> findEntities(List<SectionText> reanalysisSections,
|
||||
Dictionary dictionary,
|
||||
KieContainer kieContainer,
|
||||
AnalyzeRequest analyzeRequest,
|
||||
boolean local,
|
||||
Map<Integer, Set<Entity>> hintsPerSectionNumber,
|
||||
Map<Integer, Set<Image>> imagesPerPage, NerEntities nerEntities) {
|
||||
Map<Integer, Set<Image>> imagesPerPage,
|
||||
NerEntities nerEntities) {
|
||||
|
||||
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
|
||||
for (SectionText reanalysisSection : reanalysisSections) {
|
||||
|
||||
Entities entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts(), analyzeRequest.getManualRedactions());
|
||||
Entities entities = findEntities(reanalysisSection.getSearchableText(),
|
||||
reanalysisSection.getHeadline(),
|
||||
reanalysisSection.getSectionNumber(),
|
||||
dictionary,
|
||||
local,
|
||||
nerEntities,
|
||||
reanalysisSection.getCellStarts(),
|
||||
analyzeRequest.getManualRedactions());
|
||||
|
||||
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary, reanalysisSection
|
||||
.getCellStarts());
|
||||
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary, reanalysisSection.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary);
|
||||
}
|
||||
@ -87,21 +98,16 @@ public class EntityRedactionService {
|
||||
.filter(idr -> idr.getStatus() == AnnotationStatus.APPROVED && !idr.isRemoveFromDictionary())
|
||||
.filter(idr -> idr.getRequestDate() != null)
|
||||
.filter(idr -> approvedForceRedactions.stream()
|
||||
.noneMatch(forceRedact -> forceRedact.getAnnotationId()
|
||||
.equals(idr.getAnnotationId()) && forceRedact.getRequestDate()
|
||||
.noneMatch(forceRedact -> forceRedact.getAnnotationId().equals(idr.getAnnotationId()) && forceRedact.getRequestDate()
|
||||
.isAfter(idr.getRequestDate())))
|
||||
.map(IdRemoval::getAnnotationId)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages()
|
||||
.isEmpty() && analyzeRequest.getManualRedactions().getImageRecategorization() != null) {
|
||||
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages().isEmpty() && analyzeRequest.getManualRedactions().getImageRecategorization() != null) {
|
||||
for (Image image : reanalysisSection.getImages()) {
|
||||
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
|
||||
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
|
||||
.getImageRecategorization()) {
|
||||
if (imageRecategorization.getStatus()
|
||||
.equals(AnnotationStatus.APPROVED) && imageRecategorization.getAnnotationId()
|
||||
.equals(imageId)) {
|
||||
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions().getImageRecategorization()) {
|
||||
if (imageRecategorization.getStatus().equals(AnnotationStatus.APPROVED) && imageRecategorization.getAnnotationId().equals(imageId)) {
|
||||
image.setType(imageRecategorization.getType());
|
||||
}
|
||||
}
|
||||
@ -124,8 +130,7 @@ public class EntityRedactionService {
|
||||
.isLocal(false)
|
||||
.dictionaryTypes(dictionary.getTypes())
|
||||
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities()
|
||||
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream())
|
||||
.collect(Collectors.toSet()) : entities.getEntities())
|
||||
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
|
||||
.nerEntities(entities.getNerEntities())
|
||||
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
|
||||
.searchText(reanalysisSection.getSearchableText().toString())
|
||||
@ -147,11 +152,16 @@ public class EntityRedactionService {
|
||||
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
|
||||
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
|
||||
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities().stream().filter(e -> e.getTextAfter() == null && e.getTextBefore() == null).collect(Collectors.toSet());
|
||||
var entriesWithoutSurroundingText = analysedSection.getEntities()
|
||||
.stream()
|
||||
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts()
|
||||
.isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary, sectionSearchableTextPair.getCellStarts());
|
||||
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
|
||||
sectionSearchableTextPair.getSearchableText(),
|
||||
dictionary,
|
||||
sectionSearchableTextPair.getCellStarts());
|
||||
} else {
|
||||
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
|
||||
}
|
||||
@ -177,13 +187,29 @@ public class EntityRedactionService {
|
||||
for (Entity entity : entities) {
|
||||
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
|
||||
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
|
||||
.add(entityPositionSequence);
|
||||
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>()).add(entityPositionSequence);
|
||||
}
|
||||
|
||||
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
|
||||
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
|
||||
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity.getReferences(), entity.getEntityType()));
|
||||
.add(new Entity(entity.getWord(),
|
||||
entity.getType(),
|
||||
entity.isRedaction(),
|
||||
entity.getRedactionReason(),
|
||||
entry.getValue(),
|
||||
entity.getHeadline(),
|
||||
entity.getMatchedRule(),
|
||||
entity.getSectionNumber(),
|
||||
entity.getLegalBasis(),
|
||||
entity.isDictionaryEntry(),
|
||||
entity.getTextBefore(),
|
||||
entity.getTextAfter(),
|
||||
entity.getStart(),
|
||||
entity.getEnd(),
|
||||
entity.isDossierDictionaryEntry(),
|
||||
entity.getEngines(),
|
||||
entity.getReferences(),
|
||||
entity.getEntityType()));
|
||||
}
|
||||
}
|
||||
return entitiesPerPage;
|
||||
@ -220,9 +246,14 @@ public class EntityRedactionService {
|
||||
|
||||
|
||||
@Timed("redactmanager_findEntities")
|
||||
private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber,
|
||||
Dictionary dictionary, boolean local, NerEntities nerEntities,
|
||||
List<Integer> cellStarts, ManualRedactions manualRedactions) {
|
||||
private Entities findEntities(SearchableText searchableText,
|
||||
String headline,
|
||||
int sectionNumber,
|
||||
Dictionary dictionary,
|
||||
boolean local,
|
||||
NerEntities nerEntities,
|
||||
List<Integer> cellStarts,
|
||||
ManualRedactions manualRedactions) {
|
||||
|
||||
Set<Entity> found = new HashSet<>();
|
||||
String searchableString = searchableText.asString();
|
||||
@ -235,7 +266,16 @@ public class EntityRedactionService {
|
||||
for (DictionaryModel model : dictionary.getDictionaryModels()) {
|
||||
|
||||
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString, searchImplementation, model, new FindEntityDetails(model.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
|
||||
searchImplementation,
|
||||
model,
|
||||
new FindEntityDetails(model.getType(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
!local,
|
||||
model.isDossierDictionary(),
|
||||
local ? Engine.RULE : Engine.DICTIONARY,
|
||||
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
|
||||
|
||||
EntitySearchUtils.addOrAddEngine(found, entities);
|
||||
}
|
||||
@ -250,15 +290,23 @@ public class EntityRedactionService {
|
||||
}
|
||||
|
||||
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts,
|
||||
String headline) {
|
||||
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
|
||||
|
||||
Set<Entity> entities = new HashSet<>();
|
||||
|
||||
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
|
||||
nerEntities.getData().get(sectionNumber).forEach(res -> {
|
||||
if (cellStarts == null || cellStarts.isEmpty()) {
|
||||
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
} else {
|
||||
boolean intersectsCellStart = false;
|
||||
for (Integer cellStart : cellStarts) {
|
||||
@ -268,7 +316,16 @@ public class EntityRedactionService {
|
||||
}
|
||||
}
|
||||
if (!intersectsCellStart) {
|
||||
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
|
||||
entities.add(new Entity(res.getValue(),
|
||||
res.getType(),
|
||||
res.getStartOffset(),
|
||||
res.getEndOffset(),
|
||||
headline,
|
||||
sectionNumber,
|
||||
false,
|
||||
false,
|
||||
Engine.NER,
|
||||
EntityType.RECOMMENDATION));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@ -26,7 +26,9 @@ public class ImportedRedactionService {
|
||||
|
||||
|
||||
@Timed("redactmanager_processImportedRedactions")
|
||||
public List<RedactionLogEntry> processImportedRedactions(String dossierTemplateId, String dossierId, String fileId,
|
||||
public List<RedactionLogEntry> processImportedRedactions(String dossierTemplateId,
|
||||
String dossierId,
|
||||
String fileId,
|
||||
List<RedactionLogEntry> redactionLogEntries,
|
||||
boolean addImportedRedactions) {
|
||||
|
||||
@ -97,13 +99,13 @@ public class ImportedRedactionService {
|
||||
|
||||
boolean rectOverlap(Rectangle a, Rectangle b) {
|
||||
|
||||
boolean xOverlap = valueInRange(a.getTopLeft().getX(), b.getTopLeft().getX(), b.getTopLeft()
|
||||
.getX() + b.getWidth()) || valueInRange(b.getTopLeft().getX(), a.getTopLeft().getX(), a.getTopLeft()
|
||||
.getX() + a.getWidth());
|
||||
boolean xOverlap = valueInRange(a.getTopLeft().getX(), b.getTopLeft().getX(), b.getTopLeft().getX() + b.getWidth()) || valueInRange(b.getTopLeft().getX(),
|
||||
a.getTopLeft().getX(),
|
||||
a.getTopLeft().getX() + a.getWidth());
|
||||
|
||||
boolean yOverlap = valueInRange(a.getTopLeft().getY(), b.getTopLeft().getY(), b.getTopLeft()
|
||||
.getY() + b.getHeight()) || valueInRange(b.getTopLeft().getY(), a.getTopLeft().getY(), a.getTopLeft()
|
||||
.getY() + a.getHeight());
|
||||
boolean yOverlap = valueInRange(a.getTopLeft().getY(), b.getTopLeft().getY(), b.getTopLeft().getY() + b.getHeight()) || valueInRange(b.getTopLeft().getY(),
|
||||
a.getTopLeft().getY(),
|
||||
a.getTopLeft().getY() + a.getHeight());
|
||||
|
||||
return xOverlap && yOverlap;
|
||||
}
|
||||
|
||||
@ -36,6 +36,7 @@ public class ManualRedactionSurroundingTextService {
|
||||
private final RedactionStorageService redactionStorageService;
|
||||
private final SurroundingWordsService surroundingWordsService;
|
||||
|
||||
|
||||
@Timed("redactmanager_SurroundingTextAnalysis")
|
||||
public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
|
||||
|
||||
@ -67,19 +68,15 @@ public class ManualRedactionSurroundingTextService {
|
||||
|
||||
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
|
||||
|
||||
return AnalyzeResult.builder()
|
||||
.dossierId(dossierId)
|
||||
.fileId(fileId)
|
||||
.manualRedactions(manualRedactions)
|
||||
.duration(System.currentTimeMillis() - startTime)
|
||||
.build();
|
||||
return AnalyzeResult.builder().dossierId(dossierId).fileId(fileId).manualRedactions(manualRedactions).duration(System.currentTimeMillis() - startTime).build();
|
||||
}
|
||||
|
||||
|
||||
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
|
||||
List<Rectangle> toFindPositions) {
|
||||
private Pair<String, String> findSurroundingText(SectionText sectionText, String value, List<Rectangle> toFindPositions) {
|
||||
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), new SearchImplementation(value, false), new FindEntityDetails("dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
|
||||
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(),
|
||||
new SearchImplementation(value, false),
|
||||
new FindEntityDetails("dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
|
||||
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null, null);
|
||||
|
||||
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
|
||||
@ -128,11 +125,9 @@ public class ManualRedactionSurroundingTextService {
|
||||
}
|
||||
|
||||
|
||||
public boolean intersects(Rectangle manualPosition,
|
||||
com.iqser.red.service.redaction.v1.model.Rectangle textPositionRectangle) {
|
||||
public boolean intersects(Rectangle manualPosition, com.iqser.red.service.redaction.v1.model.Rectangle textPositionRectangle) {
|
||||
|
||||
return textPositionRectangle.getTopLeft()
|
||||
.getX() + textPositionRectangle.getWidth() > manualPosition.getTopLeftX() && textPositionRectangle.getTopLeft()
|
||||
return textPositionRectangle.getTopLeft().getX() + textPositionRectangle.getWidth() > manualPosition.getTopLeftX() && textPositionRectangle.getTopLeft()
|
||||
.getY() + textPositionRectangle.getHeight() > manualPosition.getTopLeftY() && textPositionRectangle.getTopLeft()
|
||||
.getX() < manualPosition.getTopLeftX() + manualPosition.getWidth() && textPositionRectangle.getTopLeft()
|
||||
.getY() < manualPosition.getTopLeftY() + manualPosition.getHeight();
|
||||
|
||||
@ -6,6 +6,7 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
@ -39,8 +40,10 @@ public class RedactionChangeLogService {
|
||||
.filter(entry -> !entry.lastChangeIsRemoved())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Set<RedactionLogEntry> added = currentRedactionLog.getRedactionLogEntry().stream()
|
||||
.filter(entry -> entry.getChanges().isEmpty() || !entry.lastChangeIsRemoved()).collect(Collectors.toSet());
|
||||
Set<RedactionLogEntry> added = currentRedactionLog.getRedactionLogEntry()
|
||||
.stream()
|
||||
.filter(entry -> entry.getChanges().isEmpty() || !entry.lastChangeIsRemoved())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
notRemovedPreviousEntries.forEach(added::remove);
|
||||
|
||||
|
||||
@ -33,8 +33,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
|
||||
@Timed("redactmanager_createRedactionLog")
|
||||
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages,
|
||||
String dossierTemplateId) {
|
||||
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages, String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> entries = new ArrayList<>();
|
||||
|
||||
@ -52,8 +51,7 @@ public class RedactionLogCreatorService {
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber,
|
||||
String dossierTemplateId) {
|
||||
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber, String dossierTemplateId) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
|
||||
|
||||
@ -73,9 +71,10 @@ public class RedactionLogCreatorService {
|
||||
.isHint(dictionaryService.isHint(image.getType(), dossierTemplateId))
|
||||
.isDictionaryEntry(false)
|
||||
.isRecommendation(false)
|
||||
.positions(List.of(new Rectangle(new Point((float) image.getPosition()
|
||||
.getX(), (float) image.getPosition().getY()), (float) image.getPosition()
|
||||
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
|
||||
.positions(List.of(new Rectangle(new Point((float) image.getPosition().getX(), (float) image.getPosition().getY()),
|
||||
(float) image.getPosition().getWidth(),
|
||||
(float) image.getPosition().getHeight(),
|
||||
pageNumber)))
|
||||
.sectionNumber(image.getSectionNumber())
|
||||
.section(image.getSection())
|
||||
.imageHasTransparency(image.isHasTransparency())
|
||||
@ -141,7 +140,7 @@ public class RedactionLogCreatorService {
|
||||
TextPositionSequence combinedSequence = new TextPositionSequence();
|
||||
for (int i = 0; i < textPositionSequences.size(); i++) {
|
||||
|
||||
if(combinedSequence.getTextPositions().isEmpty()){
|
||||
if (combinedSequence.getTextPositions().isEmpty()) {
|
||||
combinedSequence = textPositionSequences.get(i);
|
||||
continue;
|
||||
}
|
||||
@ -164,7 +163,7 @@ public class RedactionLogCreatorService {
|
||||
|
||||
}
|
||||
|
||||
if(!combinedSequence.getTextPositions().isEmpty()) {
|
||||
if (!combinedSequence.getTextPositions().isEmpty()) {
|
||||
rectangles.add(combinedSequence.getRectangle());
|
||||
}
|
||||
|
||||
@ -207,8 +206,7 @@ public class RedactionLogCreatorService {
|
||||
.redacted(entity.isRedaction())
|
||||
.isHint(isHint(entity.getType(), dossierTemplateId))
|
||||
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
|
||||
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType()
|
||||
.equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
|
||||
.section(entity.getHeadline())
|
||||
.sectionNumber(entity.getSectionNumber())
|
||||
.matchedRule(entity.getMatchedRule())
|
||||
|
||||
@ -77,16 +77,21 @@ public class RedactionLogMergeService {
|
||||
// enhance section grid with headline data
|
||||
for (var sectionText : text.getSectionTexts()) {
|
||||
sectionGrid.getSections()
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
|
||||
.stream()
|
||||
.map(SectionArea::getPage)
|
||||
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
|
||||
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(),
|
||||
sectionText.getHeadline(),
|
||||
sectionText.getSectionAreas().stream().map(SectionArea::getPage).collect(Collectors.toSet()),
|
||||
sectionText.getSectionAreas()));
|
||||
}
|
||||
redactionStorageService.storeObject(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
|
||||
}
|
||||
|
||||
log.debug("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
|
||||
var merged = mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages(), redactionRequest.getTypes(), redactionRequest.getColors());
|
||||
var merged = mergeRedactionLogData(redactionLog,
|
||||
sectionGrid,
|
||||
redactionRequest.getManualRedactions(),
|
||||
redactionRequest.getExcludedPages(),
|
||||
redactionRequest.getTypes(),
|
||||
redactionRequest.getColors());
|
||||
|
||||
merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives());
|
||||
|
||||
@ -94,15 +99,23 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog, SectionGrid sectionGrid,
|
||||
ManualRedactions manualRedactions, Set<Integer> excludedPages,
|
||||
List<Type> types, Colors colors) {
|
||||
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog,
|
||||
SectionGrid sectionGrid,
|
||||
ManualRedactions manualRedactions,
|
||||
Set<Integer> excludedPages,
|
||||
List<Type> types,
|
||||
Colors colors) {
|
||||
|
||||
var skippedImportedRedactions = new HashSet<>();
|
||||
log.info("Merging Redaction log with manual redactions");
|
||||
if (manualRedactions != null) {
|
||||
|
||||
var manualRedactionLogEntries = addManualAddEntries(sectionGrid, manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), colors, types, redactionLog.getAnalysisNumber());
|
||||
var manualRedactionLogEntries = addManualAddEntries(sectionGrid,
|
||||
manualRedactions.getEntriesToAdd(),
|
||||
manualRedactions.getComments(),
|
||||
colors,
|
||||
types,
|
||||
redactionLog.getAnalysisNumber());
|
||||
|
||||
redactionLog.getRedactionLogEntry().addAll(manualRedactionLogEntries);
|
||||
|
||||
@ -110,9 +123,7 @@ public class RedactionLogMergeService {
|
||||
|
||||
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
|
||||
|
||||
processRedactionLogEntry(manualRedactionWrappers.stream()
|
||||
.filter(mr -> entry.getId().equals(mr.getId()))
|
||||
.collect(Collectors.toList()), entry, types, colors);
|
||||
processRedactionLogEntry(manualRedactionWrappers.stream().filter(mr -> entry.getId().equals(mr.getId())).collect(Collectors.toList()), entry, types, colors);
|
||||
|
||||
if (entry.isImported() && !entry.isRedacted()) {
|
||||
skippedImportedRedactions.add(entry.getId());
|
||||
@ -136,9 +147,8 @@ public class RedactionLogMergeService {
|
||||
|
||||
if (entry.getImportedRedactionIntersections() != null) {
|
||||
entry.getImportedRedactionIntersections().removeAll(skippedImportedRedactions);
|
||||
if (!entry.getImportedRedactionIntersections()
|
||||
.isEmpty() && (!entry.isImage() || entry.isImage() && !(entry.getType()
|
||||
.equals("image") || entry.getType().equals("ocr")))) {
|
||||
if (!entry.getImportedRedactionIntersections().isEmpty() && (!entry.isImage() || entry.isImage() && !(entry.getType().equals("image") || entry.getType()
|
||||
.equals("ocr")))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -193,8 +203,7 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers,
|
||||
RedactionLogEntry redactionLogEntry, List<Type> types, Colors colors) {
|
||||
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers, RedactionLogEntry redactionLogEntry, List<Type> types, Colors colors) {
|
||||
|
||||
manualRedactionWrappers.forEach(mrw -> {
|
||||
|
||||
@ -219,7 +228,7 @@ public class RedactionLogMergeService {
|
||||
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), colors, false, redactionLogEntry.isRedacted(), false, types));
|
||||
}
|
||||
|
||||
if(manualOverrideReason != null) {
|
||||
if (manualOverrideReason != null) {
|
||||
redactionLogEntry.setReason(manualOverrideReason);
|
||||
}
|
||||
|
||||
@ -231,8 +240,7 @@ public class RedactionLogMergeService {
|
||||
|
||||
if (mrw.getItem() instanceof IdRemoval) {
|
||||
var manualRemoval = (IdRemoval) mrw.getItem();
|
||||
if (manualRemoval.getStatus()
|
||||
.equals(AnnotationStatus.APPROVED) && manualRemoval.isRemoveFromDictionary()) {
|
||||
if (manualRemoval.getStatus().equals(AnnotationStatus.APPROVED) && manualRemoval.isRemoveFromDictionary()) {
|
||||
log.debug("Skipping merge for dictionary-modifying entry");
|
||||
} else {
|
||||
String manualOverrideReason = null;
|
||||
@ -246,7 +254,7 @@ public class RedactionLogMergeService {
|
||||
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), colors, true, redactionLogEntry.isRedacted(), false, types));
|
||||
}
|
||||
|
||||
if(manualOverrideReason != null) {
|
||||
if (manualOverrideReason != null) {
|
||||
redactionLogEntry.setReason(manualOverrideReason);
|
||||
}
|
||||
|
||||
@ -276,7 +284,7 @@ public class RedactionLogMergeService {
|
||||
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
|
||||
}
|
||||
|
||||
if(manualOverrideReason != null) {
|
||||
if (manualOverrideReason != null) {
|
||||
redactionLogEntry.setReason(manualOverrideReason);
|
||||
}
|
||||
|
||||
@ -305,12 +313,11 @@ public class RedactionLogMergeService {
|
||||
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), colors, true, redactionLogEntry.isRedacted(), false, types));
|
||||
}
|
||||
|
||||
if(manualOverrideReason != null) {
|
||||
if (manualOverrideReason != null) {
|
||||
redactionLogEntry.setReason(manualOverrideReason);
|
||||
}
|
||||
|
||||
var manualChange = ManualChange.from(manualLegalBasisChange)
|
||||
.withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
|
||||
var manualChange = ManualChange.from(manualLegalBasisChange).withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
|
||||
manualChange.withChange("legalBasis", manualLegalBasisChange.getLegalBasis());
|
||||
if (manualLegalBasisChange.getSection() != null) {
|
||||
manualChange.withChange("section", manualLegalBasisChange.getSection());
|
||||
@ -349,9 +356,7 @@ public class RedactionLogMergeService {
|
||||
|
||||
redactionLogEntry.setReason(manualOverrideReason);
|
||||
redactionLogEntry.getManualChanges()
|
||||
.add(ManualChange.from(manualResizeRedact)
|
||||
.withManualRedactionType(ManualRedactionType.RESIZE)
|
||||
.withChange("value", manualResizeRedact.getValue()));
|
||||
.add(ManualChange.from(manualResizeRedact).withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()));
|
||||
}
|
||||
|
||||
});
|
||||
@ -372,9 +377,12 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
public List<RedactionLogEntry> addManualAddEntries(SectionGrid sectionGrid, Set<ManualRedactionEntry> manualAdds,
|
||||
Map<String, List<Comment>> comments, Colors colors,
|
||||
List<Type> types, int analysisNumber) {
|
||||
public List<RedactionLogEntry> addManualAddEntries(SectionGrid sectionGrid,
|
||||
Set<ManualRedactionEntry> manualAdds,
|
||||
Map<String, List<Comment>> comments,
|
||||
Colors colors,
|
||||
List<Type> types,
|
||||
int analysisNumber) {
|
||||
|
||||
List<RedactionLogEntry> redactionLogEntries = new ArrayList<>();
|
||||
|
||||
@ -405,8 +413,7 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
private List<Rectangle> convertPositions(
|
||||
List<com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle> positions) {
|
||||
private List<Rectangle> convertPositions(List<com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle> positions) {
|
||||
|
||||
return positions.stream()
|
||||
.map(pos -> new Rectangle(new Point(pos.getTopLeftX(), pos.getTopLeftY()), pos.getWidth(), pos.getHeight(), pos.getPage()))
|
||||
@ -421,13 +428,11 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
|
||||
Colors colors, List<Type> types, int analysisNumber) {
|
||||
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id, Colors colors, List<Type> types, int analysisNumber) {
|
||||
|
||||
var addToDictionary = manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary();
|
||||
|
||||
var change = ManualChange.from(manualRedactionEntry)
|
||||
.withManualRedactionType(addToDictionary ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY);
|
||||
var change = ManualChange.from(manualRedactionEntry).withManualRedactionType(addToDictionary ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY);
|
||||
List<ManualChange> changeList = new ArrayList<>();
|
||||
changeList.add(change);
|
||||
|
||||
@ -452,8 +457,7 @@ public class RedactionLogMergeService {
|
||||
}
|
||||
|
||||
|
||||
private float[] getColor(String type, Colors colors, boolean requested, boolean isRedaction, boolean skipped,
|
||||
List<Type> types) {
|
||||
private float[] getColor(String type, Colors colors, boolean requested, boolean isRedaction, boolean skipped, List<Type> types) {
|
||||
|
||||
if (requested) {
|
||||
return convertColor(colors.getRequestRemoveColor());
|
||||
@ -484,8 +488,7 @@ public class RedactionLogMergeService {
|
||||
return convertColor(foundAndNotDeletedType.get().getHexColor());
|
||||
}
|
||||
Optional<Type> firstDeletedType = matchingTypes.stream().findFirst();
|
||||
return firstDeletedType.map(value -> convertColor(value.getHexColor()))
|
||||
.orElseGet(() -> convertColor(DELETED_TYPE_COLOR));
|
||||
return firstDeletedType.map(value -> convertColor(value.getHexColor())).orElseGet(() -> convertColor(DELETED_TYPE_COLOR));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -9,7 +9,9 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -19,7 +21,6 @@ import java.util.List;
|
||||
@RequiredArgsConstructor
|
||||
public class SectionGridCreatorService {
|
||||
|
||||
|
||||
public void createSectionGrid(Document classifiedDoc, int numberOfPages) {
|
||||
|
||||
for (int page = 1; page <= numberOfPages; page++) {
|
||||
@ -45,8 +46,12 @@ public class SectionGridCreatorService {
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size(),null));
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()),
|
||||
textBlock.getWidth(),
|
||||
textBlock.getHeight(),
|
||||
i + 1,
|
||||
paragraph.getPageBlocks().size(),
|
||||
null));
|
||||
|
||||
} else if (textBlock instanceof Table) {
|
||||
|
||||
@ -54,8 +59,7 @@ public class SectionGridCreatorService {
|
||||
for (List<Cell> row : ((Table) textBlock).getRows()) {
|
||||
for (Cell cell : row) {
|
||||
if (cell != null) {
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
|
||||
.getWidth(), (float) cell.getHeight()));
|
||||
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -63,8 +67,12 @@ public class SectionGridCreatorService {
|
||||
classifiedDoc.getSectionGrid()
|
||||
.getRectanglesPerPage()
|
||||
.computeIfAbsent(page, (x) -> new ArrayList<>())
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
|
||||
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
|
||||
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()),
|
||||
textBlock.getWidth(),
|
||||
textBlock.getHeight(),
|
||||
i + 1,
|
||||
paragraph.getPageBlocks().size(),
|
||||
cellRectangles));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -139,8 +139,11 @@ public class SectionTextBuilderService {
|
||||
|
||||
private SectionArea getSectionArea(Cell cell) {
|
||||
|
||||
return new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight(),
|
||||
cell.getTextBlocks().get(0).getSequences().get(0).getPage(), null);
|
||||
return new SectionArea(new Point((float) cell.getX(), (float) cell.getY()),
|
||||
(float) cell.getWidth(),
|
||||
(float) cell.getHeight(),
|
||||
cell.getTextBlocks().get(0).getSequences().get(0).getPage(),
|
||||
null);
|
||||
}
|
||||
|
||||
|
||||
@ -170,8 +173,11 @@ public class SectionTextBuilderService {
|
||||
|
||||
SectionText sectionText = new SectionText();
|
||||
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
|
||||
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock.getWidth(),
|
||||
paragraphTextBlock.getHeight(), paragraphTextBlock.getPage(), null);
|
||||
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()),
|
||||
paragraphTextBlock.getWidth(),
|
||||
paragraphTextBlock.getHeight(),
|
||||
paragraphTextBlock.getPage(),
|
||||
null);
|
||||
sectionText.getSectionAreas().add(sectionArea);
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user