RED-5232: Code reformatting

This commit is contained in:
Ali Oezyetimoglu 2022-10-17 14:58:26 +02:00
parent 69540bcd5e
commit 76fda2b573
287 changed files with 3820403 additions and 1510 deletions

View File

@ -1,4 +1,5 @@
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]

View File

@ -70,7 +70,12 @@ public class PlanSpec {
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
Permissions permission = new Permissions().userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
Permissions permission = new Permissions().userPermissions("atlbamboo",
PermissionType.EDIT,
PermissionType.VIEW,
PermissionType.ADMIN,
PermissionType.CLONE,
PermissionType.BUILD)
.groupPermissions("development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
.groupPermissions("devplant", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
.loggedInUserPermissions(PermissionType.VIEW)
@ -89,33 +94,16 @@ public class PlanSpec {
return new Plan(project(), SERVICE_NAME, new BambooKey(SERVICE_KEY)).description("Plan created from (enter repository url of your plan)")
.variables(new Variable("maven_add_param", ""))
.stages(new Stage("Default Stage").jobs(new Job("Default Job",
new BambooKey("JOB1")).tasks(
new CleanWorkingDirectoryTask()
.description("Clean working directory.")
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new CleanWorkingDirectoryTask().description("Clean working directory.")
.enabled(true),
new VcsCheckoutTask().description("Checkout Default Repository")
.cleanCheckout(true)
.checkoutItems(
new CheckoutItem()
.defaultRepository()),
new ScriptTask().description("Build")
.location(Location.FILE)
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
.argument(SERVICE_NAME), createJUnitParserTask()
.description("Resultparser")
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask().description("Build").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh").argument(SERVICE_NAME),
createJUnitParserTask().description("Resultparser")
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
.enabled(true),
new InjectVariablesTask().description("Inject git Tag")
.path("git.tag")
.namespace("g")
.scope(InjectVariablesScope.LOCAL),
new VcsTagTask().description("${bamboo.g.gitTag}")
.tagName("${bamboo.g.gitTag}")
.defaultRepository())
.dockerConfiguration(
new DockerConfiguration()
.image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
new InjectVariablesTask().description("Inject git Tag").path("git.tag").namespace("g").scope(InjectVariablesScope.LOCAL),
new VcsTagTask().description("${bamboo.g.gitTag}").tagName("${bamboo.g.gitTag}").defaultRepository())
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
.linkedRepositories("RED / " + SERVICE_NAME)
@ -130,27 +118,17 @@ public class PlanSpec {
return new Plan(project(), SERVICE_NAME + "-Night", new BambooKey(SERVICE_KEY + "NIGHT")).description("Long running nightly Plan for tests")
.variables(new Variable("maven_add_param", "-Dtest-groups=rules-test"))
.stages(new Stage("Default Stage").jobs(
new Job("Default Job", new BambooKey("JOB1")).tasks(
new CleanWorkingDirectoryTask()
.description("Clean working directory.")
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new CleanWorkingDirectoryTask().description("Clean working directory.")
.enabled(true),
new VcsCheckoutTask()
.description("Checkout Default Repository")
.cleanCheckout(true)
.checkoutItems(
new CheckoutItem()
.defaultRepository()),
new ScriptTask()
.description("Build")
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask().description("Build")
.location(Location.FILE)
.fileFromPath("bamboo-specs/src/main/resources/scripts/build-java.sh")
.argument(SERVICE_NAME + " verify"), createJUnitParserTask().description("Resultparser")
.argument(SERVICE_NAME + " verify"),
createJUnitParserTask().description("Resultparser")
.resultDirectories("**/test-reports/*.xml, **/target/surefire-reports/*.xml, **/target/failsafe-reports/*.xml")
.enabled(true))
.dockerConfiguration(
new DockerConfiguration()
.image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
.linkedRepositories("RED / " + SERVICE_NAME)
@ -163,12 +141,9 @@ public class PlanSpec {
return new Plan(project(), SERVICE_NAME + "-Sec", new BambooKey(SERVICE_KEY + "SEC")).description("Security Analysis Plan")
.stages(new Stage("Default Stage").jobs(new Job("Default Job", new BambooKey("JOB1")).tasks(new ScriptTask().description("Clean")
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"), new VcsCheckoutTask().description("Checkout Default Repository")
.cleanCheckout(true)
.checkoutItems(new CheckoutItem().defaultRepository()), new ScriptTask().description("Sonar")
.location(Location.FILE)
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh")
.argument(SERVICE_NAME))
.inlineBody("#!/bin/bash\n" + "set -e\n" + "rm -rf ./*"),
new VcsCheckoutTask().description("Checkout Default Repository").cleanCheckout(true).checkoutItems(new CheckoutItem().defaultRepository()),
new ScriptTask().description("Sonar").location(Location.FILE).fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-java.sh").argument(SERVICE_NAME))
.dockerConfiguration(new DockerConfiguration().image("nexus.iqser.com:5001/infra/maven:3.8.4-openjdk-17-slim")
.dockerRunArguments("--net=host")
.volume("/etc/maven/settings.xml", "/usr/share/maven/conf/settings.xml")

View File

@ -1,6 +1,5 @@
package buildjob;
import org.junit.Test;
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
@ -8,6 +7,7 @@ import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
public class PlanSpecTest {
@Test
public void checkYourPlanOffline() throws PropertiesValidationException {
@ -20,4 +20,5 @@ public class PlanSpecTest {
Plan secPlan = new PlanSpec().createSecBuild();
EntityPropertiesBuilders.build(secPlan);
}
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -31,6 +32,5 @@ public class AnalyzeResult {
private ManualRedactions manualRedactions;
}

View File

@ -2,6 +2,14 @@ package com.iqser.red.service.redaction.v1.model;
public enum ArgumentType {
INTEGER, BOOLEAN, STRING, FILE_ATTRIBUTE, REGEX, TYPE, RULE_NUMBER, LEGAL_BASIS, REFERENCE_TYPE
INTEGER,
BOOLEAN,
STRING,
FILE_ATTRIBUTE,
REGEX,
TYPE,
RULE_NUMBER,
LEGAL_BASIS,
REFERENCE_TYPE
}

View File

@ -16,4 +16,5 @@ public class Change {
private int analysisNumber;
private ChangeType type;
private OffsetDateTime dateTime;
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
public enum ChangeType {
ADDED, REMOVED, CHANGED
ADDED,
REMOVED,
CHANGED
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
public enum Engine {
DICTIONARY, NER, RULE
DICTIONARY,
NER,
RULE
}

View File

@ -18,4 +18,5 @@ public class ImportedRedaction {
@Builder.Default
private List<Rectangle> positions = new ArrayList<>();
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -19,4 +20,5 @@ public class ImportedRedactions {
@Builder.Default
private Map<Integer, List<ImportedRedaction>> importedRedactions = new HashMap<>();
}

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.entitymapped.BaseAnnotation;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -24,7 +25,9 @@ public class ManualChange {
private String userId;
private Map<String, String> propertyChanges = new HashMap<>();
public static ManualChange from(BaseAnnotation baseAnnotation) {
ManualChange manualChange = new ManualChange();
manualChange.annotationStatus = baseAnnotation.getStatus();
manualChange.processedDate = baseAnnotation.getProcessedDate();
@ -33,16 +36,22 @@ public class ManualChange {
return manualChange;
}
public boolean isProcessed() {
return processedDate != null;
}
public ManualChange withManualRedactionType(ManualRedactionType manualRedactionType) {
this.manualRedactionType = manualRedactionType;
return this;
}
public ManualChange withChange(String property, String value) {
this.propertyChanges.put(property, value);
return this;
}

View File

@ -2,6 +2,9 @@ package com.iqser.red.service.redaction.v1.model;
public enum MessageType {
ANALYSE, REANALYSE, STRUCTURE_ANALYSE, SURROUNDING_TEXT
ANALYSE,
REANALYSE,
STRUCTURE_ANALYSE,
SURROUNDING_TEXT
}

View File

@ -12,4 +12,5 @@ import lombok.NoArgsConstructor;
public class ReanalyzeResult {
private RedactionLog redactionLog;
}

View File

@ -14,4 +14,5 @@ public class Rectangle {
private float height;
private int page;
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -8,14 +9,12 @@ import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.List;
@Data
@CompiledJson
@AllArgsConstructor
@NoArgsConstructor
public class RedactionLog {
/**
* Version 0 Redaction Logs have manual redactions merged inside them
* Version 1 Redaction Logs only contain system ( rule/dictionary ) redactions. Manual Redactions are merged in at runtime.
@ -35,5 +34,4 @@ public class RedactionLog {
private long rulesVersion = -1;
private long legalBasisVersion = -1;
}

View File

@ -1,11 +1,11 @@
package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.AnnotationStatus;
import lombok.*;
import java.util.*;
@Data
@Builder
@NoArgsConstructor
@ -35,7 +35,6 @@ public class RedactionLogEntry {
private List<Rectangle> positions = new ArrayList<>();
private int sectionNumber;
private String textBefore;
private String textAfter;
@ -70,21 +69,27 @@ public class RedactionLogEntry {
@Builder.Default
private Set<String> importedRedactionIntersections = new HashSet<>();
public boolean lastChangeIsRemoved() {
return last(changes).map(c -> c.getType() == ChangeType.REMOVED).orElse(false);
}
public boolean isLocalManualRedaction() {
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.ADD_LOCALLY &&
mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.ADD_LOCALLY && mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
}
public boolean isManuallyRemoved() {
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.REMOVE_LOCALLY &&
mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
return manualChanges.stream().anyMatch(mc -> mc.getManualRedactionType() == ManualRedactionType.REMOVE_LOCALLY && mc.getAnnotationStatus() == AnnotationStatus.APPROVED);
}
private <T> Optional<T> last(List<T> list) {
return list.isEmpty() ? Optional.empty() : Optional.of(list.get(list.size() - 1));
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.Type;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -29,4 +30,5 @@ public class RedactionRequest {
private List<Type> types;
private boolean includeFalsePositives;
}

View File

@ -15,13 +15,20 @@ public class SectionArea {
private int page;
private String header;
public boolean contains(Rectangle other) {
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft().getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft().getY() + other.getHeight();
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeft().getX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeft()
.getX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeft().getY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeft()
.getY() + other.getHeight();
}
// TODO we should only use one rectangle class.
public boolean contains(com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle other) {
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeftX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeftX() + other.getWidth() && this.getTopLeft().getY() <= other.getTopLeftY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeftY() + other.getHeight();
return page == other.getPage() && this.topLeft.getX() <= other.getTopLeftX() && this.topLeft.getX() + this.getWidth() >= other.getTopLeftX() + other.getWidth() && this.getTopLeft()
.getY() <= other.getTopLeftY() && this.getTopLeft().getY() + this.getHeight() >= other.getTopLeftY() + other.getHeight();
}
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -28,4 +29,5 @@ public class SectionGrid {
private List<SectionArea> sectionAreas;
}
}

View File

@ -4,6 +4,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Manual
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
@ -32,8 +33,6 @@ public interface RedactionResource {
@PostMapping(value = "/manual/surrounding-text/{dossierId}/{fileId}", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId,
@PathVariable("fileId") String fileId,
@RequestBody ManualRedactions manualRedactions);
ManualRedactions addSurroundingText(@PathVariable("dossierId") String dossierId, @PathVariable("fileId") String fileId, @RequestBody ManualRedactions manualRedactions);
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.resources;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server;
import com.iqser.red.commons.spring.DefaultWebMvcConfiguration;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.security.servlet.ManagementWebSecurityAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@ -22,14 +23,16 @@ import io.micrometer.core.instrument.MeterRegistry;
public class Application {
public static void main(String[] args) {
System.setProperty("org.apache.pdfbox.rendering.UsePureJavaCMYKConversion", "true");
SpringApplication.run(Application.class, args);
}
@Bean
public TimedAspect timedAspect(MeterRegistry registry) {
return new TimedAspect(registry);
}
}

View File

@ -14,7 +14,9 @@ public class FloatFrequencyCounter {
@Getter
Map<Float, Integer> countPerValue = new HashMap<>();
public void add(float value) {
if (!countPerValue.containsKey(value)) {
countPerValue.put(value, 1);
} else {
@ -22,7 +24,9 @@ public class FloatFrequencyCounter {
}
}
public void addAll(Map<Float, Integer> otherCounter) {
for (Map.Entry<Float, Integer> entry : otherCounter.entrySet()) {
if (countPerValue.containsKey(entry.getKey())) {
countPerValue.put(entry.getKey(), countPerValue.get(entry.getKey()) + entry.getValue());
@ -32,7 +36,9 @@ public class FloatFrequencyCounter {
}
}
public Float getMostPopular() {
Map.Entry<Float, Integer> mostPopular = null;
for (Map.Entry<Float, Integer> entry : countPerValue.entrySet()) {
if (mostPopular == null || entry.getValue() >= mostPopular.getValue()) {
@ -44,6 +50,7 @@ public class FloatFrequencyCounter {
public List<Float> getHighterThanMostPopular() {
Float mostPopular = getMostPopular();
List<Float> higher = new ArrayList<>();
for (Float value : countPerValue.keySet()) {
@ -57,6 +64,7 @@ public class FloatFrequencyCounter {
public Float getHighest() {
Float highest = null;
for (Float value : countPerValue.keySet()) {
if (highest == null || value > highest) {

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -14,6 +15,7 @@ public class Footer {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -14,6 +15,7 @@ public class Header {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {

View File

@ -2,5 +2,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
public enum Orientation {
NONE, LEFT, RIGHT
NONE,
LEFT,
RIGHT
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;

View File

@ -4,6 +4,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.Data;
import lombok.NoArgsConstructor;

View File

@ -7,6 +7,7 @@ import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -40,10 +41,12 @@ public class SectionText {
public void setTabularData(Map<String, CellValue> tabularData) {
tabularData.remove(null);
this.tabularData = tabularData;
}
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

View File

@ -6,6 +6,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -45,12 +46,12 @@ public class TextBlock extends AbstractTextContainer {
@JsonIgnore
private float highestFontSize;
@JsonIgnore
private String classification;
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
this.minX = minX;
this.maxX = maxX;
this.minY = minY;
@ -59,19 +60,25 @@ public class TextBlock extends AbstractTextContainer {
this.rotation = rotation;
}
public TextBlock union(TextPositionSequence r) {
TextBlock union = this.copy();
union.add(r);
return union;
}
public TextBlock union(TextBlock r) {
TextBlock union = this.copy();
union.add(r);
return union;
}
public void add(TextBlock r) {
if (r.getMinX() < minX) {
minX = r.getMinX();
}
@ -87,7 +94,9 @@ public class TextBlock extends AbstractTextContainer {
sequences.addAll(r.getSequences());
}
public void add(TextPositionSequence r) {
if (r.getX1() < minX) {
minX = r.getX1();
}
@ -102,15 +111,21 @@ public class TextBlock extends AbstractTextContainer {
}
}
public TextBlock copy() {
return new TextBlock(minX, maxX, minY, maxY, sequences, rotation);
}
public void resize(float x1, float y1, float width, float height) {
set(x1, y1, x1 + width, y1 + height);
}
public void set(float x1, float y1, float x2, float y2) {
this.minX = Math.min(x1, x2);
this.maxX = Math.max(x1, x2);
this.minY = Math.min(y1, y2);
@ -136,6 +151,7 @@ public class TextBlock extends AbstractTextContainer {
}
@Override
@JsonIgnore
@JsonAttribute(ignore = true)

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.classification.model;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -14,6 +15,7 @@ public class UnclassifiedText {
private List<TextBlock> textBlocks;
@JsonIgnore
@JsonAttribute(ignore = true)
public SearchableText getSearchableText() {

View File

@ -29,8 +29,7 @@ public class BlockificationService {
static final float THRESHOLD = 1f;
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines,
List<Ruling> verticalRulingLines) {
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
sortRotatedSequences(textPositions);
@ -48,12 +47,17 @@ public class BlockificationService {
boolean startFromTop = word.getY1() > maxY + word.getHeight();
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
boolean splittedByRuling =
isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) ||
isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines)
boolean splittedByRuling = isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || isSplittedByRuling(minX,
minY,
word.getX1(),
word.getY2(),
horizontalRulingLines)
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines)
|| isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
|| isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || isSplittedByRuling(minX,
minY,
word.getX1(),
word.getY2(),
verticalRulingLines);
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
@ -115,8 +119,7 @@ public class BlockificationService {
TextBlock block = (TextBlock) itty.next();
if (previousLeft != null && block.getOrientation().equals(Orientation.LEFT)) {
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft
.getMinY()) {
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()) {
previousLeft.add(block);
itty.remove();
continue;
@ -124,8 +127,7 @@ public class BlockificationService {
}
if (previousRight != null && block.getOrientation().equals(Orientation.RIGHT)) {
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight
.getMinY()) {
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()) {
previousRight.add(block);
itty.remove();
continue;
@ -144,10 +146,8 @@ public class BlockificationService {
while (itty.hasNext()) {
TextBlock block = (TextBlock) itty.next();
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
.equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY()) || previous != null && previous
.getOrientation()
.equals(Orientation.LEFT) && block.getOrientation()
if (previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(),
previous.getMaxY()) || previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation()
.equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous.getMaxY())) {
previous.add(block);
itty.remove();
@ -186,12 +186,10 @@ public class BlockificationService {
styleFrequencyCounter.add(wordBlock.getFontStyle());
if (textBlock == null) {
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock
.getRotation());
textBlock = new TextBlock(wordBlock.getX1(), wordBlock.getX2(), wordBlock.getY1(), wordBlock.getY2(), wordBlockList, wordBlock.getRotation());
} else {
TextBlock spatialEntity = textBlock.union(wordBlock);
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity
.getHeight());
textBlock.resize(spatialEntity.getMinX(), spatialEntity.getMinY(), spatialEntity.getWidth(), spatialEntity.getHeight());
}
}
@ -204,19 +202,14 @@ public class BlockificationService {
textBlock.setHighestFontSize(fontSizeFrequencyCounter.getHighest());
}
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences()
.stream()
.map(t -> round(t.getY1(), 3))
.collect(toSet())
.size() == 1) {
if (textBlock != null && textBlock.getSequences() != null && textBlock.getSequences().stream().map(t -> round(t.getY1(), 3)).collect(toSet()).size() == 1) {
textBlock.getSequences().sort(Comparator.comparing(TextPositionSequence::getX1));
}
return textBlock;
}
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1,
List<Ruling> rulingLines) {
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
for (Ruling ruling : rulingLines) {
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
@ -227,8 +220,7 @@ public class BlockificationService {
}
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter,
boolean landscape) {
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
float minX = 10000;
float maxX = -100;

View File

@ -6,8 +6,10 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.utils.PositionUtils;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.List;
@ -48,35 +50,34 @@ public class ClassificationService {
}
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document,
List<Float> headlineFontSizes) {
public void classifyBlock(TextBlock textBlock, Rectangle bodyTextFrame, Page page, Document document, List<Float> headlineFontSizes) {
if (document.getFontSizeCounter().getMostPopular() == null) {
textBlock.setClassification("Other");
return;
}
if (PositionUtils.isOverBodyTextFrame(bodyTextFrame, textBlock, page.isRotated()) && (document.getFontSizeCounter()
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
.getMostPopular())) {
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
textBlock.setClassification("Header");
} else if (PositionUtils.isUnderBodyTextFrame(bodyTextFrame, textBlock) && (document.getFontSizeCounter()
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter()
.getMostPopular())) {
.getMostPopular() == null || textBlock.getHighestFontSize() <= document.getFontSizeCounter().getMostPopular())) {
textBlock.setClassification("Footer");
} else if (page.getPageNumber() == 1 && (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame, textBlock) && PositionUtils
.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock, document.getTextHeightCounter()
.getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter()
.getMostPopular() || page.getTextBlocks().size() == 1)) {
} else if (page.getPageNumber() == 1 && (!PositionUtils.isTouchingUnderBodyTextFrame(bodyTextFrame,
textBlock) && PositionUtils.getHeightDifferenceBetweenChunkWordAndDocumentWord(textBlock,
document.getTextHeightCounter().getMostPopular()) > 2.5 && textBlock.getHighestFontSize() > document.getFontSizeCounter().getMostPopular() || page.getTextBlocks()
.size() == 1)) {
if (!Pattern.matches("[0-9]+", textBlock.toString())) {
textBlock.setClassification("Title");
}
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document
.getFontSizeCounter()
.getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle()
.equals("bold") || !document.getFontStyleCounter().getCountPerValue().containsKey("bold") && textBlock.getMostPopularWordFontSize() > document
.getFontSizeCounter()
.getMostPopular() + 1) && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter()
.getMostPopular() && PositionUtils.getApproxLineCount(textBlock) < 4.9 && (textBlock.getMostPopularWordStyle().equals("bold") || !document.getFontStyleCounter()
.getCountPerValue()
.containsKey("bold") && textBlock.getMostPopularWordFontSize() > document.getFontSizeCounter().getMostPopular() + 1) && textBlock.getSequences()
.get(0)
.getTextPositions()
.get(0)
.getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
for (int i = 1; i <= headlineFontSizes.size(); i++) {
if (textBlock.getMostPopularWordFontSize() == headlineFontSizes.get(i - 1)) {
@ -84,28 +85,25 @@ public class ClassificationService {
document.setHeadlines(true);
}
}
} else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText()
.startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordStyle()
.equals("bold") && !document.getFontStyleCounter()
} else if (!textBlock.getText().startsWith("Table ") && !textBlock.getText().startsWith("Figure ") && PositionUtils.isWithinBodyTextFrame(bodyTextFrame,
textBlock) && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter()
.getMostPopular()
.equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences().get(0).getTextPositions().get(0).getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
.equals("bold") && PositionUtils.getApproxLineCount(textBlock) < 2.9 && textBlock.getSequences()
.get(0)
.getTextPositions()
.get(0)
.getFontSizeInPt() >= textBlock.getMostPopularWordFontSize()) {
textBlock.setClassification("H " + (headlineFontSizes.size() + 1));
document.setHeadlines(true);
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document
.getFontSizeCounter()
.getMostPopular() && textBlock.getMostPopularWordStyle()
.equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
.getMostPopular() && textBlock.getMostPopularWordStyle().equals("bold") && !document.getFontStyleCounter().getMostPopular().equals("bold")) {
textBlock.setClassification("TextBlock Bold");
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFont()
.equals(document.getFontCounter().getMostPopular()) && textBlock.getMostPopularWordStyle()
.equals(document.getFontStyleCounter()
.getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
.getMostPopular()) {
.equals(document.getFontStyleCounter().getMostPopular()) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter().getMostPopular()) {
textBlock.setClassification("TextBlock");
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document
.getFontSizeCounter()
.getMostPopular() && textBlock.getMostPopularWordStyle()
.equals("italic") && !document.getFontStyleCounter()
} else if (PositionUtils.isWithinBodyTextFrame(bodyTextFrame, textBlock) && textBlock.getMostPopularWordFontSize() == document.getFontSizeCounter()
.getMostPopular() && textBlock.getMostPopularWordStyle().equals("italic") && !document.getFontStyleCounter()
.getMostPopular()
.equals("italic") && PositionUtils.getApproxLineCount(textBlock) < 2.9) {
textBlock.setClassification("TextBlock Italic");

View File

@ -2,13 +2,13 @@ package com.iqser.red.service.redaction.v1.server.classification.utils;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import lombok.experimental.UtilityClass;
@UtilityClass
@SuppressWarnings("all")
public class PositionUtils {
public boolean isWithinBodyTextFrame(Rectangle btf, TextBlock textBlock) {
//TODO Currently this is not working for rotated pages.
@ -19,10 +19,7 @@ public class PositionUtils {
double threshold = textBlock.getMostPopularWordHeight() * 3;
if (textBlock.getMinX() + threshold > btf.getX() &&
textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() &&
textBlock.getMinY() + threshold > btf.getY() &&
textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
if (textBlock.getMinX() + threshold > btf.getX() && textBlock.getMaxX() - threshold < btf.getX() + btf.getWidth() && textBlock.getMinY() + threshold > btf.getY() && textBlock.getMaxY() - threshold < btf.getY() + btf.getHeight()) {
return true;
} else {
return false;
@ -84,11 +81,14 @@ public class PositionUtils {
public float getHeightDifferenceBetweenChunkWordAndDocumentWord(TextBlock textBlock, Float documentMostPopularWordHeight) {
return textBlock.getMostPopularWordHeight() - documentMostPopularWordHeight;
}
public Float getApproxLineCount(TextBlock textBlock) {
return textBlock.getHeight() / textBlock.getMostPopularWordHeight();
}
}

View File

@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.DictionaryReso
@FeignClient(name = "DictionaryResource", url = "${persistence-service.url}")
public interface DictionaryClient extends DictionaryResource {
}

View File

@ -1,10 +1,10 @@
package com.iqser.red.service.redaction.v1.server.client;
import org.springframework.cloud.openfeign.FeignClient;
import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource;
@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}")
public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource {
}

View File

@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.LegalBasisMapp
@FeignClient(name = "LegalBasisMappingResource", url = "${persistence-service.url}")
public interface LegalBasisClient extends LegalBasisMappingResource {
}

View File

@ -32,8 +32,7 @@ public class MockMultipartFile implements MultipartFile {
}
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
@Nullable byte[] content) {
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, @Nullable byte[] content) {
Assert.hasLength(name, "Name must not be empty");
this.name = name;
@ -43,8 +42,7 @@ public class MockMultipartFile implements MultipartFile {
}
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType,
InputStream contentStream) throws IOException {
public MockMultipartFile(String name, @Nullable String originalFilename, @Nullable String contentType, InputStream contentStream) throws IOException {
this(name, originalFilename, contentType, FileCopyUtils.copyToByteArray(contentStream));
}

View File

@ -6,4 +6,5 @@ import com.iqser.red.service.persistence.service.v1.api.resources.RulesResource;
@FeignClient(name = "RulesResource", url = "${persistence-service.url}")
public interface RulesClient extends RulesResource {
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;

View File

@ -17,4 +17,5 @@ public class EntityRecognitionResult {
@Builder.Default
private Map<Integer, List<EntityRecogintionEntity>> entities = new HashMap<>();
}

View File

@ -13,4 +13,5 @@ public class EntityRecognitionSection {
private int sectionNumber;
private String text;
}

View File

@ -3,7 +3,9 @@ package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.commons.spring.ErrorMessage;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpStatus;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseBody;
@ -18,10 +20,12 @@ public class ControllerAdvice {
/* error handling */
@ResponseBody
@ResponseStatus(value = HttpStatus.INTERNAL_SERVER_ERROR)
@ExceptionHandler(value = NullPointerException.class)
public ErrorMessage handleContentNotFoundException(NullPointerException e) {
if (e != null) {
log.error(e.getMessage(), e);
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
@ -30,17 +34,21 @@ public class ControllerAdvice {
return new ErrorMessage(OffsetDateTime.now(), "Nullpointer exception");
}
@ResponseBody
@ResponseStatus(value = HttpStatus.BAD_REQUEST)
@ExceptionHandler(value = RulesValidationException.class)
public ErrorMessage handleRulesValidationException(RulesValidationException e) {
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
}
@ResponseBody
@ResponseStatus(value = HttpStatus.NOT_FOUND)
@ExceptionHandler(value = NotFoundException.class)
public ErrorMessage handleFileNotFoundException(NotFoundException e) {
return new ErrorMessage(OffsetDateTime.now(), e.getMessage());
}

View File

@ -46,11 +46,15 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
redactionRequest.getFileId(),
FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
redactionRequest.getFileId(),
FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -72,11 +76,15 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
redactionRequest.getFileId(),
FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
redactionRequest.getFileId(),
FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -100,7 +108,9 @@ public class RedactionController implements RedactionResource {
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(),
redactionRequest.getFileId(),
FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(redactionRequest.getDossierId(), redactionRequest.getFileId(), storedObjectStream, null);
} catch (Exception e) {
throw new RedactionException(e);

View File

@ -1,10 +1,11 @@
package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import com.iqser.red.service.redaction.v1.resources.RuleBuilderResource;
import com.iqser.red.service.redaction.v1.server.redaction.rulebuilder.RuleBuilderModelService;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.RestController;
@RestController
@ -13,8 +14,10 @@ public class RuleBuilderController implements RuleBuilderResource {
private final RuleBuilderModelService ruleBuilderModelService;
@Override
public RuleBuilderModel getRuleBuilderModel() {
return ruleBuilderModelService.getRuleBuilderModel();
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.exception;
public class NotFoundException extends RuntimeException {
public NotFoundException(String message) {
super(message);
}

View File

@ -3,10 +3,13 @@ package com.iqser.red.service.redaction.v1.server.exception;
public class RedactionException extends RuntimeException {
public RedactionException(Throwable cause) {
super("Could not parse document", cause);
}
public RedactionException() {
super("Could not parse document");
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.exception;
public class RulesValidationException extends RuntimeException {
public RulesValidationException(String message, Throwable t) {
super(message, t);
}

View File

@ -69,17 +69,17 @@ import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
/**
* LEGACY text calculations which are known to be incorrect but are depended on by PDFTextStripper.
*
* <p>
* This class exists only so that we don't break the code of users who have their own subclasses of
* PDFTextStripper. It replaces the mostly empty implementation of showGlyph() in PDFStreamEngine
* with a heuristic implementation which is backwards compatible.
*
* <p>
* DO NOT USE THIS CODE UNLESS YOU ARE WORKING WITH PDFTextStripper.
* THIS CODE IS DELIBERATELY INCORRECT, USE PDFStreamEngine INSTEAD.
*/
@SuppressWarnings({"PMD", "checkstyle:all"})
class LegacyPDFStreamEngine extends PDFStreamEngine
{
class LegacyPDFStreamEngine extends PDFStreamEngine {
private static final Log LOG = LogFactory.getLog(LegacyPDFStreamEngine.class);
private int pageRotation;
@ -88,11 +88,12 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
private final GlyphList glyphList;
private final Map<COSDictionary, Float> fontHeightMap = new WeakHashMap<COSDictionary, Float>();
/**
* Constructor.
*/
LegacyPDFStreamEngine() throws IOException
{
LegacyPDFStreamEngine() throws IOException {
addOperator(new BeginText());
addOperator(new Concatenate());
addOperator(new DrawObject()); // special text version
@ -122,6 +123,7 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
glyphList = new GlyphList(GlyphList.getAdobeGlyphList(), input);
}
/**
* This will initialize and process the contents of the stream.
*
@ -129,33 +131,27 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
* @throws java.io.IOException if there is an error accessing the stream.
*/
@Override
public void processPage(PDPage page) throws IOException
{
public void processPage(PDPage page) throws IOException {
this.pageRotation = page.getRotation();
this.pageSize = page.getCropBox();
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0)
{
if (pageSize.getLowerLeftX() == 0 && pageSize.getLowerLeftY() == 0) {
translateMatrix = null;
}
else
{
} else {
// translation matrix for cropbox
translateMatrix = Matrix.getTranslateInstance(-pageSize.getLowerLeftX(), -pageSize.getLowerLeftY());
}
super.processPage(page);
}
/**
* Called when a glyph is to be processed. The heuristic calculations here were originally
* written by Ben Litchfield for PDFStreamEngine.
*/
@Override
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,
String unicode,
Vector displacement)
throws IOException
{
protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, Vector displacement) throws IOException {
//
// legacy calculations which were previously in PDFStreamEngine
//
@ -173,25 +169,19 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// the sorting algorithm is based on the width of the character. As the displacement
// for vertical characters doesn't provide any suitable value for it, we have to
// calculate our own
if (font.isVertical())
{
if (font.isVertical()) {
displacementX = font.getWidth(code) / 1000;
// there may be an additional scaling factor for true type fonts
TrueTypeFont ttf = null;
if (font instanceof PDTrueTypeFont)
{
if (font instanceof PDTrueTypeFont) {
ttf = ((PDTrueTypeFont) font).getTrueTypeFont();
}
else if (font instanceof PDType0Font)
{
} else if (font instanceof PDType0Font) {
PDCIDFont cidFont = ((PDType0Font) font).getDescendantFont();
if (cidFont instanceof PDCIDFontType2)
{
if (cidFont instanceof PDCIDFontType2) {
ttf = ((PDCIDFontType2) cidFont).getTrueTypeFont();
}
}
if (ttf != null && ttf.getUnitsPerEm() != 1000)
{
if (ttf != null && ttf.getUnitsPerEm() != 1000) {
displacementX *= 1000f / ttf.getUnitsPerEm();
}
}
@ -219,8 +209,7 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// (modified) width and height calculations
float dxDisplay = nextX - textRenderingMatrix.getTranslateX();
Float fontHeight = fontHeightMap.get(font.getCOSObject());
if (fontHeight == null)
{
if (fontHeight == null) {
fontHeight = computeFontHeight(font);
fontHeightMap.put(font.getCOSObject(), fontHeight);
}
@ -237,30 +226,24 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// saved).
float glyphSpaceToTextSpaceFactor = 1 / 1000f;
if (font instanceof PDType3Font)
{
if (font instanceof PDType3Font) {
glyphSpaceToTextSpaceFactor = font.getFontMatrix().getScaleX();
}
float spaceWidthText = 0;
try
{
try {
// to avoid crash as described in PDFBOX-614, see what the space displacement should be
spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
}
catch (Throwable exception)
{
} catch (Throwable exception) {
LOG.warn(exception, exception);
}
if (spaceWidthText == 0)
{
if (spaceWidthText == 0) {
spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
// the average space width appears to be higher than necessary so make it smaller
spaceWidthText *= .80f;
}
if (spaceWidthText == 0)
{
if (spaceWidthText == 0) {
spaceWidthText = 1.0f; // if could not find font, use a generic value
}
@ -273,15 +256,11 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// when there is no Unicode mapping available, Acrobat simply coerces the character code
// into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want
// this, which is why we leave it until this point in PDFTextStreamEngine.
if (unicodeMapping == null)
{
if (font instanceof PDSimpleFont)
{
if (unicodeMapping == null) {
if (font instanceof PDSimpleFont) {
char c = (char) code;
unicodeMapping = new String(new char[]{c});
}
else
{
} else {
// Acrobat doesn't seem to coerce composite font's character codes, instead it
// skips them. See the "allah2.pdf" TestTextStripper file.
return;
@ -290,12 +269,9 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// adjust for cropbox if needed
Matrix translatedTextRenderingMatrix;
if (translateMatrix == null)
{
if (translateMatrix == null) {
translatedTextRenderingMatrix = textRenderingMatrix;
}
else
{
} else {
translatedTextRenderingMatrix = Matrix.concatenate(translateMatrix, textRenderingMatrix);
nextX -= pageSize.getLowerLeftX();
nextY -= pageSize.getLowerLeftY();
@ -303,37 +279,65 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
if (unicodeMapping.length() == 2) {
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
Math.abs(dyDisplay), dxDisplay,
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(0)), new int[] { code }, font,
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(0)),
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
Math.abs(dyDisplay), dxDisplay,
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(1)), new int[] { code }, font,
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
Character.toString(unicodeMapping.charAt(1)),
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
} else {
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), dxDisplay, Math.abs(spaceWidthDisplay), unicodeMapping, new int[]{code}, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
processTextPosition(new TextPosition(pageRotation,
pageSize.getWidth(),
pageSize.getHeight(),
translatedTextRenderingMatrix,
nextX,
nextY,
Math.abs(dyDisplay),
dxDisplay,
Math.abs(spaceWidthDisplay),
unicodeMapping,
new int[]{code},
font,
fontSize,
(int) (fontSize * textMatrix.getScalingFactorX())));
}
}
/**
* Compute the font height. Override this if you want to use own calculations.
*
* @param font the font.
* @return the font height.
*
* @throws IOException if there is an error while getting the font bounding box.
*/
protected float computeFontHeight(PDFont font) throws IOException
{
protected float computeFontHeight(PDFont font) throws IOException {
BoundingBox bbox = font.getBoundingBox();
if (bbox.getLowerLeftY() < Short.MIN_VALUE)
{
if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
// PDFBOX-2158 and PDFBOX-3130
// files by Salmat eSolutions / ClibPDF Library
bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
@ -343,47 +347,40 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
// sometimes the bbox has very high values, but CapHeight is OK
PDFontDescriptor fontDescriptor = font.getFontDescriptor();
if (fontDescriptor != null)
{
if (fontDescriptor != null) {
float capHeight = fontDescriptor.getCapHeight();
if (Float.compare(capHeight, 0) != 0 &&
(capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0))
{
if (Float.compare(capHeight, 0) != 0 && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
glyphHeight = capHeight;
}
// PDFBOX-3464, PDFBOX-4480, PDFBOX-4553:
// sometimes even CapHeight has very high value, but Ascent and Descent are ok
float ascent = fontDescriptor.getAscent();
float descent = fontDescriptor.getDescent();
if (capHeight > ascent && ascent > 0 && descent < 0 &&
((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0))
{
if (capHeight > ascent && ascent > 0 && descent < 0 && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
glyphHeight = (ascent - descent) / 2;
}
}
// transformPoint from glyph space -> text space
float height;
if (font instanceof PDType3Font)
{
if (font instanceof PDType3Font) {
height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
}
else
{
} else {
height = glyphHeight / 1000;
}
return height;
}
/**
* A method provided as an event interface to allow a subclass to perform some specific
* functionality when text needs to be processed.
*
* @param text The text to be processed.
*/
protected void processTextPosition(TextPosition text)
{
protected void processTextPosition(TextPosition text) {
// subclasses can override to provide specific functionality
}
}

View File

@ -1,8 +1,10 @@
package com.iqser.red.service.redaction.v1.server.parsing;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Getter;
import lombok.Setter;
import org.apache.pdfbox.text.PDFTextStripperByArea;
import org.apache.pdfbox.text.TextPosition;
@ -18,19 +20,19 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
@Setter
private int pageNumber;
public PDFAreaTextStripper() throws IOException {
}
@Override
public void writeString(String text, List<TextPosition> textPositions) throws IOException {
int startIndex = 0;
for (int i = 0; i <= textPositions.size() - 1; i++) {
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0"))) {
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0"))) {
startIndex++;
continue;
}
@ -38,32 +40,23 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
if (i > 0 && textPositions.get(i).getX() < textPositions.get(i - 1).getX()) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0")) && i <= textPositions.size() - 2) {
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0")) && i <= textPositions.size() - 2) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i + 1;
@ -71,14 +64,10 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
}
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
.getUnicode()
.equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1).getUnicode().equals("\u00A0"))) {
sublist = sublist.subList(0, sublist.size() - 1);
}
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
.getUnicode()
.equals("\u00A0")))) {
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
super.writeString(text);
@ -86,6 +75,7 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
public void clearPositions() {
textPositionSequences = new ArrayList<>();
}

View File

@ -3,9 +3,11 @@ package com.iqser.red.service.redaction.v1.server.parsing;
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.pdfbox.contentstream.operator.color.*;
@ -155,7 +157,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
graphicsPath.clear();
break;
}
super.processOperator(operator, arguments);
@ -182,14 +183,11 @@ public class PDFLinesTextStripper extends PDFTextStripper {
try {
if (stroke && !getGraphicsState().getStrokingColor().isPattern() && getGraphicsState().getStrokingColor()
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor()
.isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
.toRGB() == 0 || !stroke && !getGraphicsState().getNonStrokingColor().isPattern() && getGraphicsState().getNonStrokingColor().toRGB() == 0) {
rulings.addAll(path);
}
} catch (UnsupportedOperationException e) {
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor()
.getColorSpace()
.getName() + " or " + getGraphicsState().getNonStrokingColor()
log.debug("UnsupportedOperationException: " + getGraphicsState().getStrokingColor().getColorSpace().getName() + " or " + getGraphicsState().getNonStrokingColor()
.getColorSpace()
.getName() + " does not support toRGB");
}
@ -226,9 +224,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
maxCharHeight = charHeight;
}
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
if (i == 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i).getUnicode().equals("\t"))) {
startIndex++;
continue;
}
@ -236,9 +232,7 @@ public class PDFLinesTextStripper extends PDFTextStripper {
// Strange but sometimes this is happening, for example: Metolachlor2.pdf
if (i > 0 && textPositions.get(i).getXDirAdj() < textPositions.get(i - 1).getXDirAdj()) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
@ -246,12 +240,9 @@ public class PDFLinesTextStripper extends PDFTextStripper {
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i)
.getX() > textPositions.get(i - 1).getEndX() + 1) {
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
@ -259,15 +250,11 @@ public class PDFLinesTextStripper extends PDFTextStripper {
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0") || textPositions.get(i)
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i).getUnicode().equals("\u00A0") || textPositions.get(i)
.getUnicode()
.equals("\t")) && i <= textPositions.size() - 2) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\t")))) {
@ -286,17 +273,15 @@ public class PDFLinesTextStripper extends PDFTextStripper {
}
List<TextPosition> sublist = textPositions.subList(startIndex, textPositions.size());
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1)
.getUnicode()
.equals(" ") || sublist.get(sublist.size() - 1)
if (!sublist.isEmpty() && (sublist.get(sublist.size() - 1).getUnicode().equals(" ") || sublist.get(sublist.size() - 1)
.getUnicode()
.equals("\u00A0") || sublist.get(sublist.size() - 1).getUnicode().equals("\t"))) {
sublist = sublist.subList(0, sublist.size() - 1);
}
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0)
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0).getUnicode().equals(" ") || sublist.get(0).getUnicode().equals("\u00A0") || sublist.get(0)
.getUnicode()
.equals("\u00A0") || sublist.get(0).getUnicode().equals("\t")))) {
.equals("\t")))) {
if (previous != null && sublist.get(0).getYDirAdj() == previous.getYDirAdj() && sublist.get(0)
.getXDirAdj() - (previous.getXDirAdj() + previous.getWidthDirAdj()) < 0.01) {
for (TextPosition t : sublist) {

View File

@ -1368,7 +1368,9 @@ public class PDFTextStripper extends LegacyPDFStreamEngine {
* @return start position of the last line
* @throws IOException if something went wrong
*/
private PositionWrapper handleLineSeparation(PositionWrapper current, PositionWrapper lastPosition, PositionWrapper lastLineStartPosition,
private PositionWrapper handleLineSeparation(PositionWrapper current,
PositionWrapper lastPosition,
PositionWrapper lastLineStartPosition,
float maxHeightForLine) throws IOException {
current.setLineStart();

View File

@ -55,6 +55,7 @@ public class RedTextPosition {
@SneakyThrows
public static RedTextPosition fromTextPosition(TextPosition textPosition) {
var pos = new RedTextPosition();
BeanUtils.copyProperties(textPosition, pos);
pos.setFontName(textPosition.getFont().getName());
@ -77,23 +78,29 @@ public class RedTextPosition {
@JsonIgnore
public float getXDirAdj() {
return position[0];
}
@JsonIgnore
public float getYDirAdj() {
return position[1];
}
@JsonIgnore
public float getWidthDirAdj() {
return position[2];
}
@JsonIgnore
public float getHeightDir() {
return position[3];
}
}

View File

@ -20,21 +20,28 @@ public enum TextDirection {
private final float degrees;
private final float radians;
TextDirection(float degreeValue) {
degrees = degreeValue;
radians = (float) Math.toRadians(degreeValue);
}
@Override
public String toString() {
return degrees + VALUE_STRING_SUFFIX;
}
@com.dslplatform.json.JsonValue
public float jsonValue() {
return getDegrees();
}
@JsonCreator(mode = JsonCreator.Mode.DELEGATING)
public static TextDirection fromDegrees(float degrees) {
@ -47,6 +54,7 @@ public enum TextDirection {
throw new IllegalArgumentException(String.format("A value of %f is not supported by TextDirection", degrees));
}
public static TextDirection fromString(String degreesAsString) {
Objects.requireNonNull(degreesAsString, "Cannot construct a text direction from a null value");

View File

@ -304,7 +304,10 @@ public class TextPositionSequence implements CharSequence {
topRight = transform.transform(topRight, null);
return new Rectangle( //
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()), (float) (topRight.getX() - bottomLeft.getX()), (float) (topRight.getY() - bottomLeft.getY()), page);
new Point((float) bottomLeft.getX(), (float) bottomLeft.getY()),
(float) (topRight.getX() - bottomLeft.getX()),
(float) (topRight.getY() - bottomLeft.getY()),
page);
}
}

View File

@ -17,9 +17,11 @@ public class MessageReceiver {
private final RedactionMessageReceiver redactionMessageReceiver;
@RabbitHandler
@RabbitListener(queues = REDACTION_QUEUE)
public void receiveAnalyzeRequest(Message message) {
redactionMessageReceiver.receiveAnalyzeRequest(message, false);
}

View File

@ -17,9 +17,11 @@ public class MessagingConfiguration {
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
@Bean
@ConditionalOnProperty(prefix = "redaction-service", name = "priorityMode", havingValue = "false")
public MessageReceiver messageReceiver(RedactionMessageReceiver redactionMessageReceiver) {
return new MessageReceiver(redactionMessageReceiver);
}
@ -27,6 +29,7 @@ public class MessagingConfiguration {
@Bean
@ConditionalOnProperty(prefix = "redaction-service", name = "priorityMode", havingValue = "true")
public PriorityMessageReceiver priorityMessageReceiver(RedactionMessageReceiver redactionMessageReceiver) {
return new PriorityMessageReceiver(redactionMessageReceiver);
}
@ -34,11 +37,7 @@ public class MessagingConfiguration {
@Bean
public Queue redactionQueue() {
return QueueBuilder.durable(REDACTION_QUEUE)
.withArgument("x-dead-letter-exchange", "")
.withArgument("x-dead-letter-routing-key", REDACTION_DQL)
.maxPriority(2)
.build();
return QueueBuilder.durable(REDACTION_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", REDACTION_DQL).maxPriority(2).build();
}

View File

@ -39,12 +39,15 @@ public class RedactionMessageReceiver {
// This prevents from endless retries oom errors.
if (message.getMessageProperties().isRedelivered()) {
throw new AmqpRejectAndDontRequeueException(String.format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.", analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
throw new AmqpRejectAndDontRequeueException(String.format("Error during last processing of request with dossierId: %s and fileId: %s, do not retry.",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId()));
}
receiveAnalyzeRequest(analyzeRequest, priority);
}
@SneakyThrows
public void receiveAnalyzeRequest(AnalyzeRequest analyzeRequest, boolean priority) {
@ -70,8 +73,13 @@ public class RedactionMessageReceiver {
break;
case SURROUNDING_TEXT:
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), analyzeRequest.getManualRedactions());
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result.getDuration());
result = manualRedactionSurroundingTextService.addSurroundingText(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
analyzeRequest.getManualRedactions());
log.info("Successfully added surrounding text for manual redaction in dossierId {} and fileId {} took: {}",
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
result.getDuration());
break;
default:

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -20,6 +21,7 @@ public class CellValue {
private int rowSpanStart;
@Override
public String toString() {
@ -45,9 +47,7 @@ public class CellValue {
}
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
.replaceAll("\n", " ")
.replaceAll(" {2}", " ");
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
}
}

View File

@ -1,8 +1,8 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.type.DictionaryEntry;
import com.iqser.red.service.redaction.v1.server.redaction.utils.SearchImplementation;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -11,7 +11,6 @@ import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
@Data
@AllArgsConstructor
public class DictionaryModel implements Serializable {
@ -33,6 +32,7 @@ public class DictionaryModel implements Serializable {
private final Set<String> localEntries = new HashSet<>();
public DictionaryModel(String type,
int rank,
float[] color,
@ -42,6 +42,7 @@ public class DictionaryModel implements Serializable {
Set<DictionaryEntry> falsePositives,
Set<DictionaryEntry> falseRecommendations,
boolean isDossierDictionary) {
this.type = type;
this.rank = rank;
this.color = color;
@ -53,39 +54,58 @@ public class DictionaryModel implements Serializable {
this.falsePositives = falsePositives;
this.falseRecommendations = falseRecommendations;
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
caseInsensitive);
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
caseInsensitive);
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
public SearchImplementation getLocalSearch() {
if (this.localSearch == null) {
this.localSearch = new SearchImplementation(this.localEntries, caseInsensitive);
}
return this.localSearch;
}
public SearchImplementation getEntriesSearch() {
if (entriesSearch == null) {
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.entriesSearch = new SearchImplementation(this.entries.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()),
caseInsensitive);
}
return entriesSearch;
}
public SearchImplementation getFalsePositiveSearch() {
if (falsePositiveSearch == null) {
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.falsePositiveSearch = new SearchImplementation(this.falsePositives.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
return falsePositiveSearch;
}
public SearchImplementation getFalseRecommendationsSearch() {
if (falseRecommendationsSearch == null) {
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream().filter(e -> !e.isDeleted()).map(DictionaryEntry::getValue).collect(Collectors.toList()), caseInsensitive);
this.falseRecommendationsSearch = new SearchImplementation(this.falseRecommendations.stream()
.filter(e -> !e.isDeleted())
.map(DictionaryEntry::getValue)
.collect(Collectors.toList()), caseInsensitive);
}
return falseRecommendationsSearch;
}
}

View File

@ -19,5 +19,4 @@ public class DictionaryRepresentation {
private float[] notRedactedColor;
private Map<String, DictionaryModel> localAccessMap = new HashMap<>();
}

View File

@ -13,4 +13,5 @@ public class DictionaryVersion {
long dossierTemplateVersion;
long dossierVersion;
}

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.*;
import java.util.ArrayList;
@ -57,10 +58,24 @@ public class Entity implements ReasonHolder {
private boolean skipRemoveEntitiesContainedInLarger;
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references, EntityType entityType) {
public Entity(String word,
String type,
boolean redaction,
String redactionReason,
List<EntityPositionSequence> positionSequences,
String headline,
int matchedRule,
int sectionNumber,
String legalBasis,
boolean isDictionaryEntry,
String textBefore,
String textAfter,
Integer start,
Integer end,
boolean isDossierDictionaryEntry,
Set<Engine> engines,
Set<Entity> references,
EntityType entityType) {
this.word = word;
this.type = type;
@ -83,8 +98,16 @@ public class Entity implements ReasonHolder {
}
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine, EntityType entityType) {
public Entity(String word,
String type,
Integer start,
Integer end,
String headline,
int sectionNumber,
boolean isDictionaryEntry,
boolean isDossierDictionaryEntry,
Engine engine,
EntityType entityType) {
this.word = word;
this.type = type;
@ -98,5 +121,4 @@ public class Entity implements ReasonHolder {
this.entityType = entityType;
}
}

View File

@ -1,5 +1,8 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public enum EntityType {
ENTITY, RECOMMENDATION, FALSE_POSITIVE, FALSE_RECOMMENDATION
ENTITY,
RECOMMENDATION,
FALSE_POSITIVE,
FALSE_RECOMMENDATION
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.dslplatform.json.CompiledJson;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;

View File

@ -1,5 +1,9 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public enum ImageType {
LOGO, FORMULA, SIGNATURE, OTHER, OCR
LOGO,
FORMULA,
SIGNATURE,
OTHER,
OCR
}

View File

@ -19,5 +19,4 @@ public class PageEntities {
@Builder.Default
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
}

View File

@ -4,11 +4,13 @@ public interface ReasonHolder {
String getRedactionReason();
void setRedactionReason(String reason);
boolean isRedaction();
void setRedaction(boolean value);
}

View File

@ -23,26 +23,30 @@ public class RedRectangle2D {
private double width;
private double height;
@JsonIgnore
@JsonAttribute(ignore = true)
public boolean isEmpty() {
return width <= 0.0f || height <= 0.0f;
}
public boolean contains(double x, double y, double w, double h) {
if (isEmpty() || w <= 0 || h <= 0) {
return false;
}
double x0 = getX();
double y0 = getY();
return round(x) >= round(x0) &&
round(y) >= round(y0) &&
(x + w) - (x0 + getWidth()) <= THRESHOLD &&
(y + h) - (y0 + getHeight()) <= THRESHOLD;
return round(x) >= round(x0) && round(y) >= round(y0) && (x + w) - (x0 + getWidth()) <= THRESHOLD && (y + h) - (y0 + getHeight()) <= THRESHOLD;
}
private double round(double value) {
double d = Math.pow(10, 2);
return Math.round(value * d) / d;
}
}

View File

@ -17,7 +17,6 @@ import lombok.Getter;
public class SearchableText {
@JsonIgnore
@JsonAttribute(ignore = true)
private transient String stringRepresentation;
@ -45,8 +44,7 @@ public class SearchableText {
@SuppressWarnings("checkstyle:ModifiedControlVariable")
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive,
List<TextPositionSequence> sequencesSubList) {
public List<EntityPositionSequence> getSequences(String searchString, boolean caseInsensitive, List<TextPositionSequence> sequencesSubList) {
String normalizedSearchString;
if (caseInsensitive) {
@ -78,36 +76,27 @@ public class SearchableText {
for (int j = 0; j < searchSpace.get(i).length(); j++) {
if (i > 0 && j == 0 && searchSpace.get(i).charAt(0, caseInsensitive) == ' ' && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1)
.length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i)
.charAt(j, caseInsensitive) == ' ' && searchSpace.get(i)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) == ' ' || j > 0 && searchSpace.get(i).charAt(j, caseInsensitive) == ' ' && searchSpace.get(i)
.charAt(j - 1, caseInsensitive) == ' ') {
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions()
.isEmpty()) {
if (j == searchSpace.get(i).length() - 1 && counter != 0 && !partMatch.getTextPositions().isEmpty()) {
crossSequenceParts.add(partMatch);
}
continue;
}
if (j == 0 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1)
.length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchChars[counter] == ' ') {
counter++;
}
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace
.get(i)
.charAt(j, caseInsensitive) == '-') {
if (searchSpace.get(i).charAt(j, caseInsensitive) == searchChars[counter] || counter != 0 && searchSpace.get(i).charAt(j, caseInsensitive) == '-') {
if (counter != 0 || i == 0 && j == 0 || j != 0 && SeparatorUtils.isSeparator(searchSpace.get(i)
.charAt(j - 1, caseInsensitive)) || j == 0 && i != 0 && SeparatorUtils.isSeparator(searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1)
.length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i)
.charAt(j, caseInsensitive) != ' ') {
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive)) || j == 0 && i != 0 && searchSpace.get(i - 1)
.charAt(searchSpace.get(i - 1).length() - 1, caseInsensitive) != ' ' && searchSpace.get(i).charAt(j, caseInsensitive) != ' ') {
partMatch.add(searchSpace.get(i), searchSpace.get(i).textPositionAt(j));
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i)
.charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
if (!(j == searchSpace.get(i).length() - 1 && searchSpace.get(i).charAt(j, caseInsensitive) == '-' && searchChars[counter] != '-')) {
counter++;
}
}
@ -115,14 +104,10 @@ public class SearchableText {
if (counter == searchString.length()) {
crossSequenceParts.add(partMatch);
if (i == searchSpace.size() - 1 && j == searchSpace.get(i)
.length() - 1 || j != searchSpace.get(i).length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i)
.charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1)
.charAt(0, caseInsensitive)) || j == searchSpace.get(i)
.length() - 1 && searchSpace.get(i)
.charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1)
.charAt(0, caseInsensitive) != ' ') {
if (i == searchSpace.size() - 1 && j == searchSpace.get(i).length() - 1 || j != searchSpace.get(i)
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i).charAt(j + 1, caseInsensitive)) || j == searchSpace.get(i)
.length() - 1 && SeparatorUtils.isSeparator(searchSpace.get(i + 1).charAt(0, caseInsensitive)) || j == searchSpace.get(i)
.length() - 1 && searchSpace.get(i).charAt(j, caseInsensitive) != ' ' && searchSpace.get(i + 1).charAt(0, caseInsensitive) != ' ') {
finalMatches.addAll(buildEntityPositionSequence(crossSequenceParts, normalizedSearchString));
}
@ -149,8 +134,7 @@ public class SearchableText {
}
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts,
String searchString) {
private List<EntityPositionSequence> buildEntityPositionSequence(List<TextPositionSequence> crossSequenceParts, String searchString) {
List<EntityPositionSequence> result = new ArrayList<>();
String asString = buildString(crossSequenceParts);
@ -188,10 +172,13 @@ public class SearchableText {
@Override
public String toString() {
return buildString(sequences);
}
public String asString() {
if (stringRepresentation == null) {
stringRepresentation = buildString(sequences);
}
@ -217,9 +204,7 @@ public class SearchableText {
previous = word;
}
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString())
.replaceAll("\n", " ")
.replaceAll(" {2}", " ");
return TextNormalizationUtilities.removeHyphenLineBreaks(sb.toString()).replaceAll("\n", " ").replaceAll(" {2}", " ");
}
@ -230,7 +215,12 @@ public class SearchableText {
public String getAsStringWithLinebreaksSorted(List<TextPositionSequence> sequences) {
var sorted = sequences.stream().sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj())).sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj())).sorted(Comparator.comparing(a -> a.getPage())).collect(Collectors.toList());
var sorted = sequences.stream()
.sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getXDirAdj()))
.sorted(Comparator.comparing(a -> a.getTextPositions().get(0).getYDirAdj()))
.sorted(Comparator.comparing(a -> a.getPage()))
.collect(Collectors.toList());
StringBuilder sb = new StringBuilder();
@ -251,8 +241,6 @@ public class SearchableText {
}
public String getAsStringWithLinebreaks() {
StringBuilder sb = new StringBuilder();

View File

@ -85,11 +85,12 @@ public class Section {
@ThenAction
public void addAiEntities(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.TYPE) String asType) {
Set<Entity> entitiesOfType = nerEntities.stream()
.filter(nerEntity -> nerEntity.getType().equals(type))
.collect(Collectors.toSet());
Set<Entity> entitiesOfType = nerEntities.stream().filter(nerEntity -> nerEntity.getType().equals(type)).collect(Collectors.toSet());
List<String> values = entitiesOfType.stream().map(Entity::getWord).collect(Collectors.toList());
Set<Entity> found = EntitySearchUtils.findEntities(searchText, new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)), dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
Set<Entity> found = EntitySearchUtils.findEntities(searchText,
new SearchImplementation(values, dictionary.isCaseInsensitiveDictionary(asType)),
dictionary.getType(asType),
new FindEntityDetails(asType, headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary, manualRedactions);
found = found.stream().filter(f -> !f.isFalsePositive()).collect(Collectors.toSet());
@ -127,9 +128,7 @@ public class Section {
Set<String> combineSet = Set.of(combineTypes.split(","));
List<Entity> sorted = nerEntities.stream()
.sorted(Comparator.comparing(Entity::getStart))
.collect(Collectors.toList());
List<Entity> sorted = nerEntities.stream().sorted(Comparator.comparing(Entity::getStart)).collect(Collectors.toList());
Set<Entity> found = new HashSet<>();
int start = -1;
int lastEnd = -1;
@ -189,48 +188,39 @@ public class Section {
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue()));
}
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue()));
@ -239,11 +229,9 @@ public class Section {
@SuppressWarnings("unused")
@WhenCondition
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
@Argument(ArgumentType.STRING) String value) {
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label, @Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream()
.anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
return fileAttributes != null && fileAttributes.stream().anyMatch(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue()));
}
@ -290,8 +278,7 @@ public class Section {
@SuppressWarnings("unused")
@WhenCondition
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
public boolean containsRegEx(@Argument(ArgumentType.STRING) String regEx, @Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive) {
var compiledPattern = Patterns.getCompiledPattern(regEx, patternCaseInsensitive);
@ -303,14 +290,11 @@ public class Section {
@SuppressWarnings("unused")
@WhenCondition
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName,
@Argument(ArgumentType.STRING) String value) {
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName, @Argument(ArgumentType.STRING) String value) {
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName)
.toString()
.equals(value);
return tabularData != null && tabularData.containsKey(cleanHeaderName) && tabularData.get(cleanHeaderName).toString().equals(value);
}
@ -366,7 +350,15 @@ public class Section {
continue;
}
var expandedEntities = findEntities(match + entity.getWord(), type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false);
var expandedEntities = findEntities(match + entity.getWord(),
type,
false,
entity.isRedaction(),
entity.getMatchedRule(),
entity.getRedactionReason(),
entity.getLegalBasis(),
Engine.RULE,
false);
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
}
}
@ -429,7 +421,15 @@ public class Section {
continue;
}
var expandedEntities = findEntities(entity.getWord() + match, type, false, entity.isRedaction(), entity.getMatchedRule(), entity.getRedactionReason(), entity.getLegalBasis(), Engine.RULE, false);
var expandedEntities = findEntities(entity.getWord() + match,
type,
false,
entity.isRedaction(),
entity.getMatchedRule(),
entity.getRedactionReason(),
entity.getLegalBasis(),
Engine.RULE,
false);
expanded.addAll(EntitySearchUtils.findNonOverlappingMatchEntities(entities, expandedEntities));
}
}
@ -453,9 +453,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotImage(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
public void redactNotImage(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
redactImage(type, ruleNumber, reason, null, false);
}
@ -463,7 +461,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redact(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
public void redact(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -473,8 +472,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
public void redactNot(@Argument(ArgumentType.TYPE) String type, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.STRING) String reason) {
redact(type, ruleNumber, reason, null, false);
}
@ -482,7 +480,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType,
public void redactLineAfter(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason,
@ -509,7 +508,8 @@ public class Section {
@SuppressWarnings("unused")
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -522,7 +522,8 @@ public class Section {
@SuppressWarnings("unused")
public void redactNotByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
@ -533,7 +534,8 @@ public class Section {
@Deprecated
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@ -546,7 +548,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@ -560,7 +563,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@ -570,13 +574,26 @@ public class Section {
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
redactBetween(start, stop, false, false, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
redactBetween(start,
stop,
false,
false,
asType,
ruleNumber,
redactEverywhere,
excludeHeadLine,
reason,
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
}
@ThenAction
@SuppressWarnings("unused")
public void redactBetween(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.STRING) String stop,
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.BOOLEAN) boolean includeStart,
@Argument(ArgumentType.BOOLEAN) boolean includeStop,
@Argument(ArgumentType.TYPE) String asType,
@ -588,7 +605,19 @@ public class Section {
@Argument(ArgumentType.BOOLEAN) boolean skipRemoveEntitiesContainedInLarger,
@Argument(ArgumentType.BOOLEAN) boolean sortedResult) {
redactBetween(start, stop, includeStart, includeStop, asType, ruleNumber, redactEverywhere, excludeHeadLine, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
redactBetween(start,
stop,
includeStart,
includeStop,
asType,
ruleNumber,
redactEverywhere,
excludeHeadLine,
reason,
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
}
@ -619,7 +648,19 @@ public class Section {
String stopValue = getFirstRexExMatch(searchTextAfter, stopPattern, stopPatternCaseInsensitive, stopGroup);
if (startValue != null && stopValue != null) {
redactBetween(startValue, stopValue, includeStart, includeStop, type, ruleNumber, false, false, reason, legalBasis, true, skipRemoveEntitiesContainedInLarger, sortedResult);
redactBetween(startValue,
stopValue,
includeStart,
includeStop,
type,
ruleNumber,
false,
false,
reason,
legalBasis,
true,
skipRemoveEntitiesContainedInLarger,
sortedResult);
}
}
@ -628,7 +669,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason) {
@ -640,7 +682,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactNotBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop, @Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.BOOLEAN) boolean excludeHeadLine,
@ -680,7 +723,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -755,9 +799,7 @@ public class Section {
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
Set<Entity> references = entities.stream()
.filter(entity -> entity.getType().equals(referenceType))
.collect(Collectors.toSet());
Set<Entity> references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet());
entities.forEach(entity -> {
if (entity.getType().equals(type)) {
@ -791,7 +833,8 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addRedaction(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType,
public void addRedaction(@Argument(ArgumentType.STRING) String value,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
@ -813,8 +856,7 @@ public class Section {
@SuppressWarnings("unused")
public void ignoreRecommendations(@Argument(ArgumentType.TYPE) String type) {
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType()
.equals(EntityType.RECOMMENDATION));
entities.removeIf(entity -> entity.getType().equals(type) && entity.getEntityType().equals(EntityType.RECOMMENDATION));
}
@ -873,8 +915,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value,
@Argument(ArgumentType.TYPE) String asType) {
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value, @Argument(ArgumentType.TYPE) String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null, Engine.RULE, false);
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
@ -883,8 +924,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType) {
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start, @Argument(ArgumentType.TYPE) String asType) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -909,9 +949,7 @@ public class Section {
@ThenAction
@SuppressWarnings("unused")
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.TYPE) String type) {
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader, @Argument(ArgumentType.RULE_NUMBER) int ruleNumber, @Argument(ArgumentType.TYPE) String type) {
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
}
@ -1051,8 +1089,14 @@ public class Section {
}
private void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
int ruleNumber, String reason, String legalBasis, boolean redaction) {
private void redactAndRecommendByRegEx(String pattern,
boolean patternCaseInsensitive,
int group,
String asType,
int ruleNumber,
String reason,
String legalBasis,
boolean redaction) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
Matcher matcher = compiledPattern.matcher(searchText);
@ -1067,12 +1111,21 @@ public class Section {
}
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
int ruleNumber, String reason, String legalBasis, Engine engine,
private Set<Entity> findEntities(String value,
String asType,
boolean caseInsensitive,
boolean redacted,
int ruleNumber,
String reason,
String legalBasis,
Engine engine,
boolean asRecommendation) {
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
Set<Entity> found = EntitySearchUtils.findEntities(text, new SearchImplementation(value, caseInsensitive), dictionary.getType(asType), new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
Set<Entity> found = EntitySearchUtils.findEntities(text,
new SearchImplementation(value, caseInsensitive),
dictionary.getType(asType),
new FindEntityDetails(asType, headline, sectionNumber, false, false, engine, asRecommendation ? EntityType.RECOMMENDATION : EntityType.ENTITY));
found.forEach(entity -> {
if (redacted) {
entity.setRedaction(true);
@ -1113,8 +1166,7 @@ public class Section {
}
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact,
boolean addAsRecommendations, String reason, String legalBasis) {
private void annotateCell(String cellHeader, int ruleNumber, String type, boolean redact, boolean addAsRecommendations, String reason, String legalBasis) {
String cleanHeaderName = cellHeader.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
@ -1124,7 +1176,16 @@ public class Section {
} else {
String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY);
Entity entity = new Entity(word,
type,
value.getRowSpanStart(),
value.getRowSpanStart() + word.length(),
headline,
sectionNumber,
false,
false,
Engine.RULE,
EntityType.ENTITY);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -1139,7 +1200,10 @@ public class Section {
singleEntitySet.add(entity);
EntitySearchUtils.clearAndFindPositions(singleEntitySet, searchableText, dictionary, manualRedactions);
EntitySearchUtils.removeFalsePositives(singleEntitySet, searchText, dictionary.getType(type), new FindEntityDetails(type, headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY));
EntitySearchUtils.removeFalsePositives(singleEntitySet,
searchText,
dictionary.getType(type),
new FindEntityDetails(type, headline, sectionNumber, false, false, Engine.RULE, EntityType.ENTITY));
if (!singleEntitySet.isEmpty()) {
EntitySearchUtils.addEntitiesWithHigherRank(entities, singleEntitySet.iterator().next(), dictionary);
@ -1165,8 +1229,7 @@ public class Section {
}
private void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
String legalBasis, boolean redaction) {
private void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis, boolean redaction) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -1185,8 +1248,7 @@ public class Section {
}
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis) {
public void redactLineAfterAcrossColumns(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis) {
String[] values = StringUtils.substringsBetween(searchableText.getAsStringWithLinebreaksSorted(), start, "\n");
@ -1205,9 +1267,7 @@ public class Section {
}
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
String reason, String legalBasis, boolean redaction) {
private void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber, String reason, String legalBasis, boolean redaction) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -1239,9 +1299,18 @@ public class Section {
}
private void redactBetween(String start, String stop, boolean includeStart, boolean includeStop, String asType,
int ruleNumber, boolean redactEverywhere, boolean excludeHeadLine, String reason,
String legalBasis, boolean redaction, boolean skipRemoveEntitiesContainedInLarger,
private void redactBetween(String start,
String stop,
boolean includeStart,
boolean includeStop,
String asType,
int ruleNumber,
boolean redactEverywhere,
boolean excludeHeadLine,
String reason,
String legalBasis,
boolean redaction,
boolean skipRemoveEntitiesContainedInLarger,
boolean sortedResult) {
String[] values = new String[1];
@ -1297,8 +1366,7 @@ public class Section {
}
private void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis, boolean redaction) {
private void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere, String reason, String legalBasis, boolean redaction) {
String[] values = StringUtils.substringsBetween(text, start, stop);

View File

@ -4,6 +4,7 @@ import java.util.HashMap;
import java.util.Map;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@ -9,4 +10,5 @@ public class FilterGeometry {
private ImageSize imageSize;
private ImageFormat imageFormat;
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@ -10,4 +11,5 @@ public class Filters {
private FilterGeometry geometry;
private Probability probability;
private boolean allPassed;
}

View File

@ -1,11 +1,14 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Geometry {
private float width;
private float height;
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@ -12,4 +13,5 @@ public class ImageMetadata {
private Geometry geometry;
private Filters filters;
private boolean alpha;
}

View File

@ -4,6 +4,7 @@ import com.dslplatform.json.CompiledJson;
import com.dslplatform.json.JsonAttribute;
import com.fasterxml.jackson.annotation.JsonAlias;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
import java.util.ArrayList;
@ -23,6 +24,7 @@ public class ImageServiceResponse {
private List<ImageMetadata> dataCV = new ArrayList<>();
@JsonProperty(value = "imageMetadata")
@JsonAlias("data")
@JsonAttribute(alternativeNames = {"imageMetadata"})

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data

View File

@ -1,14 +1,17 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data
@CompiledJson
public class Position {
private float x1;
private float x2;
private float y1;
private float y2;
private int pageNumber;
}

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.image;
import com.dslplatform.json.CompiledJson;
import lombok.Data;
@Data

View File

@ -10,6 +10,7 @@ import lombok.Data;
@Data
@CompiledJson
public class TableServiceResponse {
private String dossierId;
private String fileId;
private String operation;

View File

@ -4,6 +4,7 @@ import com.iqser.red.service.redaction.v1.model.Argument;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import com.iqser.red.service.redaction.v1.model.RuleElement;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import org.springframework.stereotype.Service;
import java.lang.reflect.Method;
@ -21,16 +22,18 @@ public class RuleBuilderModelService {
RuleBuilderModel ruleBuilderModel = new RuleBuilderModel();
ruleBuilderModel.setWhenClauses(whenConditions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
ruleBuilderModel.setThenConditions(thenActions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
return ruleBuilderModel;
}
private List<Argument> toArguments(Method c) {
return Arrays.stream(c.getParameters())
.map(parameter -> new Argument(parameter.getName(), parameter.getAnnotation(Section.Argument.class).value()))
.collect(Collectors.toList());
}
}

View File

@ -90,7 +90,9 @@ public class AnalyzeService {
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
FileType.ORIGIN));
Map<Integer, List<PdfImage>> pdfImages = null;
if (redactionServiceSettings.isEnableImageClassification()) {
@ -112,10 +114,10 @@ public class AnalyzeService {
// enhance section grid with headline data
sectionTexts.forEach(sectionText -> classifiedDoc.getSectionGrid()
.getSections()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas())));
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(),
sectionText.getHeadline(),
sectionText.getSectionAreas().stream().map(SectionArea::getPage).collect(Collectors.toSet()),
sectionText.getSectionAreas())));
log.info("Store text, simplified text and section grid for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
@ -146,10 +148,15 @@ public class AnalyzeService {
return analyze(analyzeRequest);
}
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(),
new DictionaryVersion(redactionLog.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()),
analyzeRequest.getDossierId());
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse()
.isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse().isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(
dictionaryIncrement,
redactionLog,
text,
analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
@ -175,11 +182,13 @@ public class AnalyzeService {
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), newRedactionLogEntries, false);
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
newRedactionLogEntries,
false);
redactionLog.getRedactionLogEntry()
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType()
.equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()) && !entry.getType().equals(IMPORTED_REDACTION_TYPE));
redactionLog.getRedactionLogEntry().addAll(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
@ -208,11 +217,20 @@ public class AnalyzeService {
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(), redactionLogEntries, convert(legalBasis), dictionary.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(),
analyzeRequest.getAnalysisNumber(),
redactionLogEntries,
convert(legalBasis),
dictionary.getVersion().getDossierTemplateVersion(),
dictionary.getVersion().getDossierVersion(),
rulesVersion,
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog.getRedactionLogEntry(), true);
var importedRedactionFilteredEntries = importedRedactionService.processImportedRedactions(analyzeRequest.getDossierTemplateId(),
analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
redactionLog.getRedactionLogEntry(),
true);
redactionLog.setRedactionLogEntry(importedRedactionFilteredEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
@ -220,8 +238,7 @@ public class AnalyzeService {
@Timed("redactmanager_findSectionsToReanalyse")
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
Text text, AnalyzeRequest analyzeRequest) {
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog, Text text, AnalyzeRequest analyzeRequest) {
long start = System.currentTimeMillis();
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
@ -237,9 +254,8 @@ public class AnalyzeService {
}
}
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream()
.map(DictionaryIncrementValue::getValue).collect(Collectors.toList()), true);
var dictionaryIncrementsSearch = new SearchImplementation(dictionaryIncrement.getValues().stream().map(DictionaryIncrementValue::getValue).collect(Collectors.toList()),
true);
for (SectionText sectionText : text.getSectionTexts()) {
@ -255,8 +271,11 @@ public class AnalyzeService {
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
RedactionLog redactionLog, Text text, DictionaryVersion dictionaryVersion,
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest,
long startTime,
RedactionLog redactionLog,
Text text,
DictionaryVersion dictionaryVersion,
boolean isReanalysis) {
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
@ -264,7 +283,10 @@ public class AnalyzeService {
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog, analyzeRequest.getAnalysisNumber());
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
redactionLog,
analyzeRequest.getAnalysisNumber());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());
long duration = System.currentTimeMillis() - startTime;
@ -293,31 +315,27 @@ public class AnalyzeService {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getResizeRedactions()
.stream()
.map(ManualResizeRedaction::getAnnotationId), Stream.concat(manualRedactions.getLegalBasisChanges()
.stream()
.map(ManualLegalBasisChange::getAnnotationId), Stream.concat(manualRedactions.getImageRecategorization()
.stream()
.map(ManualImageRecategorization::getAnnotationId), Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getAnnotationId), manualRedactions.getForceRedactions()
.stream()
.map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
return Stream.concat(manualRedactions.getResizeRedactions().stream().map(ManualResizeRedaction::getAnnotationId),
Stream.concat(manualRedactions.getLegalBasisChanges().stream().map(ManualLegalBasisChange::getAnnotationId),
Stream.concat(manualRedactions.getImageRecategorization().stream().map(ManualImageRecategorization::getAnnotationId),
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getAnnotationId),
manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))).collect(Collectors.toSet());
}
public List<RedactionLogLegalBasis> convert(List<LegalBasis> legalBasis) {
return legalBasis.stream().map(l -> new RedactionLogLegalBasis(l.getName(), l.getDescription(), l.getReason())).collect(Collectors.toList());
}
public Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
.getY(), position.getWidth(), position.getHeight()))
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft().getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())

View File

@ -5,10 +5,12 @@ import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.ty
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import feign.FeignException;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.springframework.stereotype.Service;
@ -46,16 +48,12 @@ public class DictionaryService {
updateDictionaryEntry(dossierTemplateId, dossierDictionaryVersion, getVersion(dossierDictionary), dossierId);
}
return DictionaryVersion.builder()
.dossierTemplateVersion(dossierTemplateDictionaryVersion)
.dossierVersion(dossierDictionaryVersion)
.build();
return DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
}
@Timed("redactmanager_getDictionaryIncrements")
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion,
String dossierId) {
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion, String dossierId) {
DictionaryVersion version = updateDictionary(dossierTemplateId, dossierId);
@ -109,7 +107,8 @@ public class DictionaryService {
try {
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, false) : dictionaryClient.getAllTypesForDossier(dossierId, false);
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId, false) : dictionaryClient.getAllTypesForDossier(dossierId,
false);
if (CollectionUtils.isNotEmpty(typeResponse)) {
List<DictionaryModel> dictionary = typeResponse.stream().map(t -> {
@ -117,16 +116,10 @@ public class DictionaryService {
Optional<DictionaryModel> oldModel;
if (dossierId == null) {
var representation = dictionariesByDossierTemplate.get(dossierTemplateId);
oldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
oldModel = representation != null ? representation.getDictionary().stream().filter(f -> f.getType().equals(t.getType())).findAny() : Optional.empty();
} else {
var representation = dictionariesByDossier.get(dossierId);
oldModel = representation != null ? representation.getDictionary()
.stream()
.filter(f -> f.getType().equals(t.getType()))
.findAny() : Optional.empty();
oldModel = representation != null ? representation.getDictionary().stream().filter(f -> f.getType().equals(t.getType())).findAny() : Optional.empty();
}
Set<DictionaryEntry> entries = new HashSet<>();
@ -135,18 +128,9 @@ public class DictionaryService {
DictionaryEntries newEntries = getEntries(t.getId(), currentVersion);
var newValues = newEntries.getEntries()
.stream()
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
var newFalsePositivesValues = newEntries.getFalsePositives()
.stream()
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations()
.stream()
.map(DictionaryEntry::getValue)
.collect(Collectors.toSet());
var newValues = newEntries.getEntries().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
var newFalsePositivesValues = newEntries.getFalsePositives().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
var newFalseRecommendationsValues = newEntries.getFalseRecommendations().stream().map(DictionaryEntry::getValue).collect(Collectors.toSet());
oldModel.ifPresent(oldDictionaryModel -> {
@ -170,7 +154,15 @@ public class DictionaryService {
falsePositives.addAll(newEntries.getFalsePositives());
falseRecommendations.addAll(newEntries.getFalseRecommendations());
return new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t.isHint(), entries, falsePositives, falseRecommendations, dossierId != null);
return new DictionaryModel(t.getType(),
t.getRank(),
convertColor(t.getHexColor()),
t.isCaseInsensitive(),
t.isHint(),
entries,
falsePositives,
falseRecommendations,
dossierId != null);
}).sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList());
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
@ -211,7 +203,11 @@ public class DictionaryService {
falsePositives.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
falseRecommendations.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
}
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}", entries.size(), falsePositives.size(), falseRecommendations.size(), type.getType());
log.info("Dictionary update returned {} entries {} falsePositives and {} falseRecommendations for type {}",
entries.size(),
falsePositives.size(),
falseRecommendations.size(),
type.getType());
return new DictionaryEntries(entries, falsePositives, falseRecommendations);
}
@ -263,12 +259,8 @@ public class DictionaryService {
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
}
return new Dictionary(copy.stream()
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList()), DictionaryVersion.builder()
.dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion())
.dossierVersion(dossierDictionaryVersion)
.build());
return new Dictionary(copy.stream().sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList()),
DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
}

View File

@ -6,6 +6,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
@ -106,8 +107,7 @@ public class DroolsExecutionService {
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(rules.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources()
.newInputStreamResource(input));
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources().newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
return kieBuilder.getKieModule();

View File

@ -14,9 +14,11 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUti
import com.iqser.red.service.redaction.v1.server.redaction.utils.FindEntityDetails;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
@ -35,8 +37,7 @@ public class EntityRedactionService {
private final SurroundingWordsService surroundingWordsService;
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer,
AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
@ -55,19 +56,29 @@ public class EntityRedactionService {
}
public Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
public Set<Entity> findEntities(List<SectionText> reanalysisSections,
Dictionary dictionary,
KieContainer kieContainer,
AnalyzeRequest analyzeRequest,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
Map<Integer, Set<Image>> imagesPerPage, NerEntities nerEntities) {
Map<Integer, Set<Image>> imagesPerPage,
NerEntities nerEntities) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
Entities entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts(), analyzeRequest.getManualRedactions());
Entities entities = findEntities(reanalysisSection.getSearchableText(),
reanalysisSection.getHeadline(),
reanalysisSection.getSectionNumber(),
dictionary,
local,
nerEntities,
reanalysisSection.getCellStarts(),
analyzeRequest.getManualRedactions());
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary, reanalysisSection
.getCellStarts());
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary, reanalysisSection.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entities.getEntities(), reanalysisSection.getSearchableText(), dictionary);
}
@ -87,21 +98,16 @@ public class EntityRedactionService {
.filter(idr -> idr.getStatus() == AnnotationStatus.APPROVED && !idr.isRemoveFromDictionary())
.filter(idr -> idr.getRequestDate() != null)
.filter(idr -> approvedForceRedactions.stream()
.noneMatch(forceRedact -> forceRedact.getAnnotationId()
.equals(idr.getAnnotationId()) && forceRedact.getRequestDate()
.noneMatch(forceRedact -> forceRedact.getAnnotationId().equals(idr.getAnnotationId()) && forceRedact.getRequestDate()
.isAfter(idr.getRequestDate())))
.map(IdRemoval::getAnnotationId)
.collect(Collectors.toSet());
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages()
.isEmpty() && analyzeRequest.getManualRedactions().getImageRecategorization() != null) {
if (reanalysisSection.getImages() != null && !reanalysisSection.getImages().isEmpty() && analyzeRequest.getManualRedactions().getImageRecategorization() != null) {
for (Image image : reanalysisSection.getImages()) {
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
.getImageRecategorization()) {
if (imageRecategorization.getStatus()
.equals(AnnotationStatus.APPROVED) && imageRecategorization.getAnnotationId()
.equals(imageId)) {
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions().getImageRecategorization()) {
if (imageRecategorization.getStatus().equals(AnnotationStatus.APPROVED) && imageRecategorization.getAnnotationId().equals(imageId)) {
image.setType(imageRecategorization.getType());
}
}
@ -124,8 +130,7 @@ public class EntityRedactionService {
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream.concat(entities.getEntities()
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream())
.collect(Collectors.toSet()) : entities.getEntities())
.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber()).stream()).collect(Collectors.toSet()) : entities.getEntities())
.nerEntities(entities.getNerEntities())
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
@ -147,11 +152,16 @@ public class EntityRedactionService {
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
var entriesWithoutSurroundingText = analysedSection.getEntities().stream().filter(e -> e.getTextAfter() == null && e.getTextBefore() == null).collect(Collectors.toSet());
var entriesWithoutSurroundingText = analysedSection.getEntities()
.stream()
.filter(e -> e.getTextAfter() == null && e.getTextBefore() == null)
.collect(Collectors.toSet());
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts()
.isEmpty()) {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary, sectionSearchableTextPair.getCellStarts());
if (sectionSearchableTextPair.getCellStarts() != null && !sectionSearchableTextPair.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText,
sectionSearchableTextPair.getSearchableText(),
dictionary,
sectionSearchableTextPair.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entriesWithoutSurroundingText, sectionSearchableTextPair.getSearchableText(), dictionary);
}
@ -177,13 +187,29 @@ public class EntityRedactionService {
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>()).add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity.getReferences(), entity.getEntityType()));
.add(new Entity(entity.getWord(),
entity.getType(),
entity.isRedaction(),
entity.getRedactionReason(),
entry.getValue(),
entity.getHeadline(),
entity.getMatchedRule(),
entity.getSectionNumber(),
entity.getLegalBasis(),
entity.isDictionaryEntry(),
entity.getTextBefore(),
entity.getTextAfter(),
entity.getStart(),
entity.getEnd(),
entity.isDossierDictionaryEntry(),
entity.getEngines(),
entity.getReferences(),
entity.getEntityType()));
}
}
return entitiesPerPage;
@ -220,9 +246,14 @@ public class EntityRedactionService {
@Timed("redactmanager_findEntities")
private Entities findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local, NerEntities nerEntities,
List<Integer> cellStarts, ManualRedactions manualRedactions) {
private Entities findEntities(SearchableText searchableText,
String headline,
int sectionNumber,
Dictionary dictionary,
boolean local,
NerEntities nerEntities,
List<Integer> cellStarts,
ManualRedactions manualRedactions) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.asString();
@ -235,7 +266,16 @@ public class EntityRedactionService {
for (DictionaryModel model : dictionary.getDictionaryModels()) {
var searchImplementation = local ? model.getLocalSearch() : model.getEntriesSearch();
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString, searchImplementation, model, new FindEntityDetails(model.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), local ? Engine.RULE : Engine.DICTIONARY, local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
var entities = EntitySearchUtils.findEntities(model.isCaseInsensitive() ? lowercaseInputString : searchableString,
searchImplementation,
model,
new FindEntityDetails(model.getType(),
headline,
sectionNumber,
!local,
model.isDossierDictionary(),
local ? Engine.RULE : Engine.DICTIONARY,
local ? EntityType.RECOMMENDATION : EntityType.ENTITY));
EntitySearchUtils.addOrAddEngine(found, entities);
}
@ -250,15 +290,23 @@ public class EntityRedactionService {
}
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts,
String headline) {
private Set<Entity> getNerValues(int sectionNumber, NerEntities nerEntities, List<Integer> cellStarts, String headline) {
Set<Entity> entities = new HashSet<>();
if (redactionServiceSettings.isNerServiceEnabled() && nerEntities.getData().containsKey(sectionNumber)) {
nerEntities.getData().get(sectionNumber).forEach(res -> {
if (cellStarts == null || cellStarts.isEmpty()) {
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellStarts) {
@ -268,7 +316,16 @@ public class EntityRedactionService {
}
}
if (!intersectsCellStart) {
entities.add(new Entity(res.getValue(), res.getType(), res.getStartOffset(), res.getEndOffset(), headline, sectionNumber, false, false, Engine.NER, EntityType.RECOMMENDATION));
entities.add(new Entity(res.getValue(),
res.getType(),
res.getStartOffset(),
res.getEndOffset(),
headline,
sectionNumber,
false,
false,
Engine.NER,
EntityType.RECOMMENDATION));
}
}
});

View File

@ -26,7 +26,9 @@ public class ImportedRedactionService {
@Timed("redactmanager_processImportedRedactions")
public List<RedactionLogEntry> processImportedRedactions(String dossierTemplateId, String dossierId, String fileId,
public List<RedactionLogEntry> processImportedRedactions(String dossierTemplateId,
String dossierId,
String fileId,
List<RedactionLogEntry> redactionLogEntries,
boolean addImportedRedactions) {
@ -97,13 +99,13 @@ public class ImportedRedactionService {
boolean rectOverlap(Rectangle a, Rectangle b) {
boolean xOverlap = valueInRange(a.getTopLeft().getX(), b.getTopLeft().getX(), b.getTopLeft()
.getX() + b.getWidth()) || valueInRange(b.getTopLeft().getX(), a.getTopLeft().getX(), a.getTopLeft()
.getX() + a.getWidth());
boolean xOverlap = valueInRange(a.getTopLeft().getX(), b.getTopLeft().getX(), b.getTopLeft().getX() + b.getWidth()) || valueInRange(b.getTopLeft().getX(),
a.getTopLeft().getX(),
a.getTopLeft().getX() + a.getWidth());
boolean yOverlap = valueInRange(a.getTopLeft().getY(), b.getTopLeft().getY(), b.getTopLeft()
.getY() + b.getHeight()) || valueInRange(b.getTopLeft().getY(), a.getTopLeft().getY(), a.getTopLeft()
.getY() + a.getHeight());
boolean yOverlap = valueInRange(a.getTopLeft().getY(), b.getTopLeft().getY(), b.getTopLeft().getY() + b.getHeight()) || valueInRange(b.getTopLeft().getY(),
a.getTopLeft().getY(),
a.getTopLeft().getY() + a.getHeight());
return xOverlap && yOverlap;
}

View File

@ -36,6 +36,7 @@ public class ManualRedactionSurroundingTextService {
private final RedactionStorageService redactionStorageService;
private final SurroundingWordsService surroundingWordsService;
@Timed("redactmanager_SurroundingTextAnalysis")
public AnalyzeResult addSurroundingText(String dossierId, String fileId, ManualRedactions manualRedactions) {
@ -67,19 +68,15 @@ public class ManualRedactionSurroundingTextService {
manualRedactions.getEntriesToAdd().addAll(processedAddRedactions);
return AnalyzeResult.builder()
.dossierId(dossierId)
.fileId(fileId)
.manualRedactions(manualRedactions)
.duration(System.currentTimeMillis() - startTime)
.build();
return AnalyzeResult.builder().dossierId(dossierId).fileId(fileId).manualRedactions(manualRedactions).duration(System.currentTimeMillis() - startTime).build();
}
private Pair<String, String> findSurroundingText(SectionText sectionText, String value,
List<Rectangle> toFindPositions) {
private Pair<String, String> findSurroundingText(SectionText sectionText, String value, List<Rectangle> toFindPositions) {
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(), new SearchImplementation(value, false), new FindEntityDetails("dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
Set<Entity> entities = EntitySearchUtils.find(sectionText.getText(),
new SearchImplementation(value, false),
new FindEntityDetails("dummy", sectionText.getHeadline(), sectionText.getSectionNumber(), false, false, Engine.DICTIONARY, EntityType.ENTITY));
Set<Entity> entitiesWithPositions = EntitySearchUtils.clearAndFindPositions(entities, sectionText.getSearchableText(), null, null);
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
@ -128,11 +125,9 @@ public class ManualRedactionSurroundingTextService {
}
public boolean intersects(Rectangle manualPosition,
com.iqser.red.service.redaction.v1.model.Rectangle textPositionRectangle) {
public boolean intersects(Rectangle manualPosition, com.iqser.red.service.redaction.v1.model.Rectangle textPositionRectangle) {
return textPositionRectangle.getTopLeft()
.getX() + textPositionRectangle.getWidth() > manualPosition.getTopLeftX() && textPositionRectangle.getTopLeft()
return textPositionRectangle.getTopLeft().getX() + textPositionRectangle.getWidth() > manualPosition.getTopLeftX() && textPositionRectangle.getTopLeft()
.getY() + textPositionRectangle.getHeight() > manualPosition.getTopLeftY() && textPositionRectangle.getTopLeft()
.getX() < manualPosition.getTopLeftX() + manualPosition.getWidth() && textPositionRectangle.getTopLeft()
.getY() < manualPosition.getTopLeftY() + manualPosition.getHeight();

View File

@ -6,6 +6,7 @@ import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.time.OffsetDateTime;
@ -39,8 +40,10 @@ public class RedactionChangeLogService {
.filter(entry -> !entry.lastChangeIsRemoved())
.collect(Collectors.toList());
Set<RedactionLogEntry> added = currentRedactionLog.getRedactionLogEntry().stream()
.filter(entry -> entry.getChanges().isEmpty() || !entry.lastChangeIsRemoved()).collect(Collectors.toSet());
Set<RedactionLogEntry> added = currentRedactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> entry.getChanges().isEmpty() || !entry.lastChangeIsRemoved())
.collect(Collectors.toSet());
notRemovedPreviousEntries.forEach(added::remove);

View File

@ -33,8 +33,7 @@ public class RedactionLogCreatorService {
@Timed("redactmanager_createRedactionLog")
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages,
String dossierTemplateId) {
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages, String dossierTemplateId) {
List<RedactionLogEntry> entries = new ArrayList<>();
@ -52,8 +51,7 @@ public class RedactionLogCreatorService {
}
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber,
String dossierTemplateId) {
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber, String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
@ -73,9 +71,10 @@ public class RedactionLogCreatorService {
.isHint(dictionaryService.isHint(image.getType(), dossierTemplateId))
.isDictionaryEntry(false)
.isRecommendation(false)
.positions(List.of(new Rectangle(new Point((float) image.getPosition()
.getX(), (float) image.getPosition().getY()), (float) image.getPosition()
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
.positions(List.of(new Rectangle(new Point((float) image.getPosition().getX(), (float) image.getPosition().getY()),
(float) image.getPosition().getWidth(),
(float) image.getPosition().getHeight(),
pageNumber)))
.sectionNumber(image.getSectionNumber())
.section(image.getSection())
.imageHasTransparency(image.isHasTransparency())
@ -207,8 +206,7 @@ public class RedactionLogCreatorService {
.redacted(entity.isRedaction())
.isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(entity.getEntityType().equals(EntityType.RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType()
.equals(EntityType.FALSE_RECOMMENDATION))
.isFalsePositive(entity.getEntityType().equals(EntityType.FALSE_POSITIVE) || entity.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule())

View File

@ -77,16 +77,21 @@ public class RedactionLogMergeService {
// enhance section grid with headline data
for (var sectionText : text.getSectionTexts()) {
sectionGrid.getSections()
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(), sectionText.getHeadline(), sectionText.getSectionAreas()
.stream()
.map(SectionArea::getPage)
.collect(Collectors.toSet()), sectionText.getSectionAreas()));
.add(new SectionGrid.SectionGridSection(sectionText.getSectionNumber(),
sectionText.getHeadline(),
sectionText.getSectionAreas().stream().map(SectionArea::getPage).collect(Collectors.toSet()),
sectionText.getSectionAreas()));
}
redactionStorageService.storeObject(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.SECTION_GRID, sectionGrid);
}
log.debug("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
var merged = mergeRedactionLogData(redactionLog, sectionGrid, redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages(), redactionRequest.getTypes(), redactionRequest.getColors());
var merged = mergeRedactionLogData(redactionLog,
sectionGrid,
redactionRequest.getManualRedactions(),
redactionRequest.getExcludedPages(),
redactionRequest.getTypes(),
redactionRequest.getColors());
merged.getRedactionLogEntry().removeIf(e -> e.isFalsePositive() && !redactionRequest.isIncludeFalsePositives());
@ -94,15 +99,23 @@ public class RedactionLogMergeService {
}
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog, SectionGrid sectionGrid,
ManualRedactions manualRedactions, Set<Integer> excludedPages,
List<Type> types, Colors colors) {
private RedactionLog mergeRedactionLogData(RedactionLog redactionLog,
SectionGrid sectionGrid,
ManualRedactions manualRedactions,
Set<Integer> excludedPages,
List<Type> types,
Colors colors) {
var skippedImportedRedactions = new HashSet<>();
log.info("Merging Redaction log with manual redactions");
if (manualRedactions != null) {
var manualRedactionLogEntries = addManualAddEntries(sectionGrid, manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), colors, types, redactionLog.getAnalysisNumber());
var manualRedactionLogEntries = addManualAddEntries(sectionGrid,
manualRedactions.getEntriesToAdd(),
manualRedactions.getComments(),
colors,
types,
redactionLog.getAnalysisNumber());
redactionLog.getRedactionLogEntry().addAll(manualRedactionLogEntries);
@ -110,9 +123,7 @@ public class RedactionLogMergeService {
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
processRedactionLogEntry(manualRedactionWrappers.stream()
.filter(mr -> entry.getId().equals(mr.getId()))
.collect(Collectors.toList()), entry, types, colors);
processRedactionLogEntry(manualRedactionWrappers.stream().filter(mr -> entry.getId().equals(mr.getId())).collect(Collectors.toList()), entry, types, colors);
if (entry.isImported() && !entry.isRedacted()) {
skippedImportedRedactions.add(entry.getId());
@ -136,9 +147,8 @@ public class RedactionLogMergeService {
if (entry.getImportedRedactionIntersections() != null) {
entry.getImportedRedactionIntersections().removeAll(skippedImportedRedactions);
if (!entry.getImportedRedactionIntersections()
.isEmpty() && (!entry.isImage() || entry.isImage() && !(entry.getType()
.equals("image") || entry.getType().equals("ocr")))) {
if (!entry.getImportedRedactionIntersections().isEmpty() && (!entry.isImage() || entry.isImage() && !(entry.getType().equals("image") || entry.getType()
.equals("ocr")))) {
return true;
}
}
@ -193,8 +203,7 @@ public class RedactionLogMergeService {
}
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers,
RedactionLogEntry redactionLogEntry, List<Type> types, Colors colors) {
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers, RedactionLogEntry redactionLogEntry, List<Type> types, Colors colors) {
manualRedactionWrappers.forEach(mrw -> {
@ -231,8 +240,7 @@ public class RedactionLogMergeService {
if (mrw.getItem() instanceof IdRemoval) {
var manualRemoval = (IdRemoval) mrw.getItem();
if (manualRemoval.getStatus()
.equals(AnnotationStatus.APPROVED) && manualRemoval.isRemoveFromDictionary()) {
if (manualRemoval.getStatus().equals(AnnotationStatus.APPROVED) && manualRemoval.isRemoveFromDictionary()) {
log.debug("Skipping merge for dictionary-modifying entry");
} else {
String manualOverrideReason = null;
@ -309,8 +317,7 @@ public class RedactionLogMergeService {
redactionLogEntry.setReason(manualOverrideReason);
}
var manualChange = ManualChange.from(manualLegalBasisChange)
.withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
var manualChange = ManualChange.from(manualLegalBasisChange).withManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
manualChange.withChange("legalBasis", manualLegalBasisChange.getLegalBasis());
if (manualLegalBasisChange.getSection() != null) {
manualChange.withChange("section", manualLegalBasisChange.getSection());
@ -349,9 +356,7 @@ public class RedactionLogMergeService {
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.getManualChanges()
.add(ManualChange.from(manualResizeRedact)
.withManualRedactionType(ManualRedactionType.RESIZE)
.withChange("value", manualResizeRedact.getValue()));
.add(ManualChange.from(manualResizeRedact).withManualRedactionType(ManualRedactionType.RESIZE).withChange("value", manualResizeRedact.getValue()));
}
});
@ -372,9 +377,12 @@ public class RedactionLogMergeService {
}
public List<RedactionLogEntry> addManualAddEntries(SectionGrid sectionGrid, Set<ManualRedactionEntry> manualAdds,
Map<String, List<Comment>> comments, Colors colors,
List<Type> types, int analysisNumber) {
public List<RedactionLogEntry> addManualAddEntries(SectionGrid sectionGrid,
Set<ManualRedactionEntry> manualAdds,
Map<String, List<Comment>> comments,
Colors colors,
List<Type> types,
int analysisNumber) {
List<RedactionLogEntry> redactionLogEntries = new ArrayList<>();
@ -405,8 +413,7 @@ public class RedactionLogMergeService {
}
private List<Rectangle> convertPositions(
List<com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle> positions) {
private List<Rectangle> convertPositions(List<com.iqser.red.service.persistence.service.v1.api.model.annotations.Rectangle> positions) {
return positions.stream()
.map(pos -> new Rectangle(new Point(pos.getTopLeftX(), pos.getTopLeftY()), pos.getWidth(), pos.getHeight(), pos.getPage()))
@ -421,13 +428,11 @@ public class RedactionLogMergeService {
}
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
Colors colors, List<Type> types, int analysisNumber) {
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id, Colors colors, List<Type> types, int analysisNumber) {
var addToDictionary = manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary();
var change = ManualChange.from(manualRedactionEntry)
.withManualRedactionType(addToDictionary ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY);
var change = ManualChange.from(manualRedactionEntry).withManualRedactionType(addToDictionary ? ManualRedactionType.ADD_TO_DICTIONARY : ManualRedactionType.ADD_LOCALLY);
List<ManualChange> changeList = new ArrayList<>();
changeList.add(change);
@ -452,8 +457,7 @@ public class RedactionLogMergeService {
}
private float[] getColor(String type, Colors colors, boolean requested, boolean isRedaction, boolean skipped,
List<Type> types) {
private float[] getColor(String type, Colors colors, boolean requested, boolean isRedaction, boolean skipped, List<Type> types) {
if (requested) {
return convertColor(colors.getRequestRemoveColor());
@ -484,8 +488,7 @@ public class RedactionLogMergeService {
return convertColor(foundAndNotDeletedType.get().getHexColor());
}
Optional<Type> firstDeletedType = matchingTypes.stream().findFirst();
return firstDeletedType.map(value -> convertColor(value.getHexColor()))
.orElseGet(() -> convertColor(DELETED_TYPE_COLOR));
return firstDeletedType.map(value -> convertColor(value.getHexColor())).orElseGet(() -> convertColor(DELETED_TYPE_COLOR));
}

View File

@ -9,7 +9,9 @@ import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
@ -19,7 +21,6 @@ import java.util.List;
@RequiredArgsConstructor
public class SectionGridCreatorService {
public void createSectionGrid(Document classifiedDoc, int numberOfPages) {
for (int page = 1; page <= numberOfPages; page++) {
@ -45,8 +46,12 @@ public class SectionGridCreatorService {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(),null));
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()),
textBlock.getWidth(),
textBlock.getHeight(),
i + 1,
paragraph.getPageBlocks().size(),
null));
} else if (textBlock instanceof Table) {
@ -54,8 +59,7 @@ public class SectionGridCreatorService {
for (List<Cell> row : ((Table) textBlock).getRows()) {
for (Cell cell : row) {
if (cell != null) {
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight()));
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight()));
}
}
}
@ -63,8 +67,12 @@ public class SectionGridCreatorService {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()),
textBlock.getWidth(),
textBlock.getHeight(),
i + 1,
paragraph.getPageBlocks().size(),
cellRectangles));
}
}

View File

@ -139,8 +139,11 @@ public class SectionTextBuilderService {
private SectionArea getSectionArea(Cell cell) {
return new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell.getWidth(), (float) cell.getHeight(),
cell.getTextBlocks().get(0).getSequences().get(0).getPage(), null);
return new SectionArea(new Point((float) cell.getX(), (float) cell.getY()),
(float) cell.getWidth(),
(float) cell.getHeight(),
cell.getTextBlocks().get(0).getSequences().get(0).getPage(),
null);
}
@ -170,8 +173,11 @@ public class SectionTextBuilderService {
SectionText sectionText = new SectionText();
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock.getWidth(),
paragraphTextBlock.getHeight(), paragraphTextBlock.getPage(), null);
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()),
paragraphTextBlock.getWidth(),
paragraphTextBlock.getHeight(),
paragraphTextBlock.getPage(),
null);
sectionText.getSectionAreas().add(sectionArea);
}

View File

@ -2,14 +2,15 @@ package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
@Slf4j
@Service
public class SectionTextService {
public void handleSectionText(SectionGrid sectionGrid, RedactionLogEntry redactionLogEntry) {
if (redactionLogEntry.getSection() != null) {
@ -36,4 +37,5 @@ public class SectionTextService {
}
}
}
}

View File

@ -8,6 +8,7 @@ import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettin
import io.micrometer.core.annotation.Timed;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.List;
@ -43,8 +44,7 @@ public class SurroundingWordsService {
@Timed("redactmanager_addSurroundingText_tables")
public void addSurroundingText(Set<Entity> entities, SearchableText searchableText, Dictionary dictionary,
List<Integer> cellstarts) {
public void addSurroundingText(Set<Entity> entities, SearchableText searchableText, Dictionary dictionary, List<Integer> cellstarts) {
if (entities.isEmpty()) {
return;
@ -88,25 +88,21 @@ public class SurroundingWordsService {
private void findSurroundingWords(Entity entity, String text, int entityStartOffset, int entityEndOffset) {
int offsetBefore = entityStartOffset - redactionServiceSettings.getSurroundingWordsOffsetWindow() < 0 ? 0 : entityStartOffset - redactionServiceSettings
.getSurroundingWordsOffsetWindow();
int offsetBefore = entityStartOffset - redactionServiceSettings.getSurroundingWordsOffsetWindow() < 0 ? 0 : entityStartOffset - redactionServiceSettings.getSurroundingWordsOffsetWindow();
String textBefore = text.substring(offsetBefore, entityStartOffset);
if (!textBefore.isBlank()) {
String[] wordsBefore = textBefore.split(" ");
int numberOfWordsBefore = wordsBefore.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings
.getNumberOfSurroundingWords() : wordsBefore.length;
int numberOfWordsBefore = wordsBefore.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings.getNumberOfSurroundingWords() : wordsBefore.length;
if (wordsBefore.length > 0) {
entity.setTextBefore(concatWordsBefore(wordsBefore, numberOfWordsBefore, textBefore.endsWith(" ")));
}
}
int endOffset = entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow() > text.length() ? text
.length() : entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow();
int endOffset = entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow() > text.length() ? text.length() : entityEndOffset + redactionServiceSettings.getSurroundingWordsOffsetWindow();
String textAfter = text.substring(entityEndOffset, endOffset);
if (!textAfter.isBlank()) {
String[] wordsAfter = textAfter.split(" ");
int numberOfWordsAfter = wordsAfter.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings
.getNumberOfSurroundingWords() : wordsAfter.length;
int numberOfWordsAfter = wordsAfter.length > redactionServiceSettings.getNumberOfSurroundingWords() ? redactionServiceSettings.getNumberOfSurroundingWords() : wordsAfter.length;
if (wordsAfter.length > 0) {
entity.setTextAfter(concatWordsAfter(wordsAfter, numberOfWordsAfter, textAfter.startsWith(" ")));
}

View File

@ -4,6 +4,7 @@ import com.iqser.red.service.persistence.service.v1.api.model.annotations.Annota
import com.iqser.red.service.persistence.service.v1.api.model.annotations.ManualRedactions;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@ -15,18 +16,20 @@ import java.util.stream.Collectors;
@SuppressWarnings("PMD")
public class EntitySearchUtils {
public boolean sectionContainsAny(String sectionText, SearchImplementation searchImplementation) {
return searchImplementation.atLeastOneMatches(sectionText);
}
public void removeFalsePositives(Set<Entity> found, String inputString, DictionaryModel type, FindEntityDetails details) {
Set<Entity> falsePositives = find(inputString, type.getFalsePositiveSearch(), details.withEntityType(EntityType.FALSE_POSITIVE));
markFalsePositives(found, falsePositives);
found.removeIf(f -> f.isFalsePositive());
}
public Set<Entity> findEntities(String inputString, SearchImplementation searchImplementation, DictionaryModel type, FindEntityDetails details) {
Set<Entity> found = find(inputString, searchImplementation, details);
@ -42,7 +45,9 @@ public class EntitySearchUtils {
return found;
}
public Set<Entity> find(String inputString, SearchImplementation searchImplementation, FindEntityDetails findEntityDetails) {
Set<Entity> entities = new HashSet<>();
searchImplementation.getMatches(inputString).forEach(match -> validateAndAddEntity(entities, findEntityDetails, inputString, match.getStartIndex(), match.getEndIndex()));
@ -50,13 +55,21 @@ public class EntitySearchUtils {
return entities;
}
private void validateAndAddEntity(Set<Entity> entities, FindEntityDetails findEntityDetails, String inputString, int startIndex, int stopIndex) {
if ((startIndex == 0 || SeparatorUtils.isSeparator(inputString.charAt(startIndex - 1)))
&& (stopIndex == inputString.length() || SeparatorUtils.isSeparator(inputString.charAt(stopIndex)))) {
entities.add(new Entity(inputString.substring(startIndex, stopIndex), findEntityDetails.getType(), startIndex, stopIndex,
findEntityDetails.getHeadline(), findEntityDetails.getSectionNumber(), findEntityDetails.isDictionaryEntry(),
findEntityDetails.isDossierDictionary(), findEntityDetails.getEngine(), findEntityDetails.getEntityType()));
if ((startIndex == 0 || SeparatorUtils.isSeparator(inputString.charAt(startIndex - 1))) && (stopIndex == inputString.length() || SeparatorUtils.isSeparator(inputString.charAt(
stopIndex)))) {
entities.add(new Entity(inputString.substring(startIndex, stopIndex),
findEntityDetails.getType(),
startIndex,
stopIndex,
findEntityDetails.getHeadline(),
findEntityDetails.getSectionNumber(),
findEntityDetails.isDictionaryEntry(),
findEntityDetails.isDossierDictionary(),
findEntityDetails.getEngine(),
findEntityDetails.getEntityType()));
}
}
@ -73,8 +86,9 @@ public class EntitySearchUtils {
List<Entity> orderedEntities = entitiesByWord.get(word).stream().sorted(Comparator.comparing(Entity::getStart)).collect(Collectors.toList());
Entity firstEntity = orderedEntities.get(0);
List<EntityPositionSequence> positionSequences = text.getSequences(firstEntity.getWord()
.trim(), dictionary == null || dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences());
List<EntityPositionSequence> positionSequences = text.getSequences(firstEntity.getWord().trim(),
dictionary == null || dictionary.isCaseInsensitiveDictionary(firstEntity.getType()),
firstEntity.getTargetSequences());
Map<String, List<EntityPositionSequence>> multipartSequenceParts = new HashMap<>();
Iterator<EntityPositionSequence> itty = positionSequences.iterator();
@ -182,8 +196,7 @@ public class EntitySearchUtils {
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
if (word.getEntityType().equals(EntityType.RECOMMENDATION) && inner.getEntityType().equals(EntityType.ENTITY)) {
wordsToRemove.add(word);
} else if (!(inner.getEntityType() == EntityType.FALSE_RECOMMENDATION && word.getEntityType() == EntityType.ENTITY ||
inner.getEntityType() == EntityType.ENTITY && word.getEntityType() == EntityType.FALSE_RECOMMENDATION)) {
} else if (!(inner.getEntityType() == EntityType.FALSE_RECOMMENDATION && word.getEntityType() == EntityType.ENTITY || inner.getEntityType() == EntityType.ENTITY && word.getEntityType() == EntityType.FALSE_RECOMMENDATION)) {
if (inner.isResized()) {
wordsToRemove.add(word);
} else {
@ -193,7 +206,9 @@ public class EntitySearchUtils {
}
}
}
wordsToRemove.stream().filter(e -> !e.getEntityType().equals(EntityType.FALSE_POSITIVE) && !e.getEntityType().equals(EntityType.FALSE_RECOMMENDATION)).forEach(entities::remove);
wordsToRemove.stream()
.filter(e -> !e.getEntityType().equals(EntityType.FALSE_POSITIVE) && !e.getEntityType().equals(EntityType.FALSE_RECOMMENDATION))
.forEach(entities::remove);
}
@ -245,8 +260,8 @@ public class EntitySearchUtils {
existing.setLegalBasis(found.getLegalBasis());
existing.setMatchedRule(found.getMatchedRule());
existing.setRedactionReason(found.getRedactionReason());
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY)
|| existing.getEntityType().equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
if (existing.getEntityType().equals(EntityType.RECOMMENDATION) && found.getEntityType().equals(EntityType.ENTITY) || existing.getEntityType()
.equals(EntityType.ENTITY) && found.getEntityType().equals(EntityType.RECOMMENDATION)) {
existing.setEntityType(EntityType.ENTITY);
if (found.isRedaction()) {
existing.setRedaction(true);
@ -318,6 +333,7 @@ public class EntitySearchUtils {
}
}
/*
* returns true if the found entity overlaps with an existing entity in a way
* that neither entity is a subset of the other

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.redaction.utils;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityType;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -17,8 +18,11 @@ public class FindEntityDetails {
private Engine engine;
private EntityType entityType;
public FindEntityDetails withEntityType(EntityType entityType) {
this.entityType = entityType;
return this;
}
}

Some files were not shown because too many files have changed in this diff Show More