Merge branch 'master' of ssh://git.iqser.com:2222/red/redaction-service

This commit is contained in:
cschabert 2021-09-22 16:11:54 +02:00
commit d9b78643fb
95 changed files with 2798 additions and 2247 deletions

View File

@ -5,7 +5,7 @@
<parent>
<groupId>com.atlassian.bamboo</groupId>
<artifactId>bamboo-specs-parent</artifactId>
<version>7.1.2</version>
<version>7.2.2</version>
<relativePath/>
</parent>

View File

@ -13,6 +13,6 @@ RUN apt-get update \
wget cabextract xfonts-utils fonts-liberation \
&& rm -rf /var/lib/apt/lists/*
RUN curl http://ftp.br.debian.org/debian/pool/contrib/m/msttcorefonts/ttf-mscorefonts-installer_3.7_all.deb -o /tmp/ttf-mscorefonts-installer_3.7_all.deb \
&& dpkg -i /tmp/ttf-mscorefonts-installer_3.7_all.deb \
&& rm /tmp/ttf-mscorefonts-installer_3.7_all.deb \
RUN curl http://ftp.br.debian.org/debian/pool/contrib/m/msttcorefonts/ttf-mscorefonts-installer_3.8_all.deb -o /tmp/ttf-mscorefonts-installer_3.8_all.deb \
&& dpkg -i /tmp/ttf-mscorefonts-installer_3.8_all.deb \
&& rm /tmp/ttf-mscorefonts-installer_3.8_all.deb \

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>1.1.2</version>
<version>1.1.3</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@ -32,7 +32,7 @@
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>1.3.1</version>
<version>1.3.6</version>
<scope>import</scope>
<type>pom</type>
</dependency>

View File

@ -19,14 +19,8 @@
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>configuration-service-api-v1</artifactId>
<version>2.7.0</version>
<exclusions>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>file-management-service-api-v1</artifactId>
</exclusion>
</exclusions>
<artifactId>persistence-service-api-v1</artifactId>
<version>0.4.0</version>
</dependency>
</dependencies>
</project>

View File

@ -6,6 +6,10 @@ import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Data
@Builder
@ -13,12 +17,20 @@ import java.time.OffsetDateTime;
@AllArgsConstructor
public class AnalyzeRequest {
private String projectId;
private String dossierId;
private String fileId;
private String ruleSetId;
private String dossierTemplateId;
private boolean reanalyseOnlyIfPossible;
private ManualRedactions manualRedactions;
private OffsetDateTime lastProcessed;
@Builder.Default
private Set<Integer> excludedPages = new HashSet<>();
@Builder.Default
private Set<Integer> sectionsToReanalyse = new HashSet<>();
@Builder.Default
private List<FileAttribute> fileAttributes = new ArrayList<>();
}

View File

@ -11,20 +11,20 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class AnalyzeResult {
private String projectId;
private String dossierId;
private String fileId;
private long duration;
private int numberOfPages;
private boolean hasHints;
private boolean hasRequests;
private boolean hasRedactions;
private boolean hasImages;
private boolean hasUpdates;
private long dictionaryVersion;
private long dossierDictionaryVersion;
private long rulesVersion;
private long legalBasisVersion;
private boolean wasReanalyzed;
private int analysisVersion;
}

View File

@ -11,6 +11,7 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class AnnotateRequest {
private String projectId;
private String dossierId;
private String dossierTemplateId;
private String fileId;
}

View File

@ -0,0 +1,15 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class Argument {
private String name;
private ArgumentType type;
}

View File

@ -0,0 +1,7 @@
package com.iqser.red.service.redaction.v1.model;
public enum ArgumentType {
INTEGER, BOOLEAN, STRING, FILE_ATTRIBUTE, REGEX, TYPE, RULE_NUMBER, LEGAL_BASIS, REFERENCE_TYPE
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.model;
import java.time.OffsetDateTime;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class Change {
private ChangeType type;
private OffsetDateTime dateTime;
}

View File

@ -1,5 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum ChangeType {
ADDED, REMOVED
ADDED, REMOVED, CHANGED
}

View File

@ -0,0 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum Engine {
DICTIONARY, NER, RULE
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FileAttribute {
private String id;
private String label;
private String placeholder;
private String value;
}

View File

@ -5,6 +5,8 @@ import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
@Data
@Builder
@AllArgsConstructor
@ -16,4 +18,8 @@ public class IdRemoval {
private Status status;
private boolean removeFromDictionary;
}
private OffsetDateTime requestDate;
private OffsetDateTime processedDate;
private OffsetDateTime softDeletedTime;
}

View File

@ -5,6 +5,8 @@ import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
@Data
@Builder
@AllArgsConstructor
@ -16,4 +18,8 @@ public class ManualForceRedact {
private Status status;
private String legalBasis;
}
private OffsetDateTime requestDate;
private OffsetDateTime processedDate;
private OffsetDateTime softDeletedTime;
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualImageRecategorization {
private String id;
private String user;
private Status status;
private String type;
private OffsetDateTime requestDate;
private OffsetDateTime processedDate;
private OffsetDateTime softDeletedTime;
}

View File

@ -0,0 +1,25 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualLegalBasisChange {
private String id;
private String user;
private Status status;
private String legalBasis;
private OffsetDateTime requestDate;
private OffsetDateTime processedDate;
private OffsetDateTime softDeletedTime;
}

View File

@ -5,6 +5,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
@ -22,11 +23,12 @@ public class ManualRedactionEntry {
private String legalBasis;
private List<Rectangle> positions = new ArrayList<>();
private Status status;
private boolean addToDictionary;
private String section;
private int sectionNumber;
private boolean addToDossierDictionary;
private OffsetDateTime requestDate;
private OffsetDateTime processedDate;
private OffsetDateTime softDeletedTime;
}

View File

@ -1,5 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum ManualRedactionType {
ADD, REMOVE, FORCE_REDACT
ADD, REMOVE, FORCE_REDACT, RECATEGORIZE, LEGAL_BASIS_CHANGE
}

View File

@ -26,6 +26,12 @@ public class ManualRedactions {
@Builder.Default
private Set<ManualRedactionEntry> entriesToAdd = new HashSet<>();
@Builder.Default
private Set<ManualImageRecategorization> imageRecategorizations = new HashSet<>();
@Builder.Default
private Set<ManualLegalBasisChange> manualLegalBasisChanges = new HashSet<>();
@Builder.Default
private Map<String, List<Comment>> comments = new HashMap<>();

View File

@ -1,47 +0,0 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.List;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class RedactionChangeLogEntry {
private String id;
private String type;
private String value;
private String reason;
private int matchedRule;
private String legalBasis;
private boolean redacted;
private boolean isHint;
private boolean isRecommendation;
private String section;
private float[] color;
@Builder.Default
private List<Rectangle> positions = new ArrayList<>();
private int sectionNumber;
private boolean manual;
private Status status;
private ManualRedactionType manualRedactionType;
private boolean isDictionaryEntry;
private String textBefore;
private String textAfter;
@Builder.Default
private List<Comment> comments = new ArrayList<>();
private ChangeType changeType;
private boolean isDossierDictionaryEntry;
}

View File

@ -1,27 +1,29 @@
package com.iqser.red.service.redaction.v1.model;
import com.iqser.red.service.configuration.v1.api.model.LegalBasisMapping;
import lombok.AllArgsConstructor;
import lombok.Data;
import java.util.List;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.LegalBasis;
@Data
@AllArgsConstructor
public class RedactionLog {
private List<RedactionLogEntry> redactionLogEntry;
private List<LegalBasisMapping> legalBasis;
private String ruleSetId;
/**
* Version 0 Redaction Logs have manual redactions merged inside them
* Version 1 Redaction Logs only contain system ( rule/dictionary ) redactions. Manual Redactions are merged in at runtime.
*/
private long analysisVersion;
private List<RedactionLogEntry> redactionLogEntry;
private List<LegalBasis> legalBasis;
private long dictionaryVersion = -1;
private long rulesVersion = -1;
private long dossierDictionaryVersion = -1;
private long rulesVersion = -1;
private long legalBasisVersion = -1;
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class RedactionLogChanges {
private RedactionLog redactionLog;
private boolean hasChanges;
}

View File

@ -7,13 +7,15 @@ import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@EqualsAndHashCode(of = "id")
@EqualsAndHashCode
public class RedactionLogEntry {
private String id;
@ -34,6 +36,7 @@ public class RedactionLogEntry {
private boolean manual;
private Status status;
private ManualRedactionType manualRedactionType;
private String manualRedactionUserId;
private boolean isDictionaryEntry;
private String textBefore;
@ -46,7 +49,23 @@ public class RedactionLogEntry {
private int endOffset;
private boolean isImage;
private boolean imageHasTransparency;
private boolean isDossierDictionaryEntry;
private boolean excluded;
private String recategorizationType;
private String legalBasisChangeValue;
@EqualsAndHashCode.Exclude
@Builder.Default
private List<Change> changes = new ArrayList<>();
private Set<Engine> engines= new HashSet<>();
private Set<String> reference = new HashSet<>();
}

View File

@ -1,5 +1,8 @@
package com.iqser.red.service.redaction.v1.model;
import java.util.HashSet;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@ -11,8 +14,10 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class RedactionRequest {
private String projectId;
private String dossierId;
private String fileId;
private String ruleSetId;
private String dossierTemplateId;
private ManualRedactions manualRedactions;
@Builder.Default
private Set<Integer> excludedPages = new HashSet<>();
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.Data;
import java.util.ArrayList;
import java.util.List;
@Data
public class RuleBuilderModel {
private List<RuleElement> whenClauses = new ArrayList<>();
private List<RuleElement> thenConditions = new ArrayList<>();
}

View File

@ -8,15 +8,11 @@ import java.util.ArrayList;
import java.util.List;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class RedactionChangeLog {
@AllArgsConstructor
public class RuleElement {
private List<RedactionChangeLogEntry> redactionLogEntry = new ArrayList<>();
private long dictionaryVersion = -1;
private long rulesVersion = -1;
private String ruleSetId;
private String conditionName;
private List<Argument> arguments = new ArrayList<>();
}

View File

@ -30,4 +30,5 @@ public class SectionRectangle {
private int numberOfParts;
private List<CellRectangle> tableCells;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class StructureAnalyzeRequest {
private String dossierId;
private String fileId;
}

View File

@ -8,12 +8,6 @@ import org.springframework.web.bind.annotation.RequestBody;
public interface RedactionResource {
String SERVICE_NAME = "redaction-service-v1";
String RULE_SET_PARAMETER_NAME = "ruleSetId";
String RULE_SET_PATH_VARIABLE = "/{" + RULE_SET_PARAMETER_NAME + "}";
@PostMapping(value = "/annotate", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest);
@ -26,10 +20,10 @@ public interface RedactionResource {
@PostMapping(value = "/debug/htmlTables", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionResult htmlTables(@RequestBody RedactionRequest redactionRequest);
@PostMapping(value = "/rules/update" + RULE_SET_PATH_VARIABLE, consumes = MediaType.APPLICATION_JSON_VALUE)
void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId);
@PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE)
void testRules(@RequestBody String rules);
@PostMapping(value = "/redaction-log/preview", consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionLog getRedactionLog(@RequestBody RedactionRequest redactionRequest);
}

View File

@ -0,0 +1,12 @@
package com.iqser.red.service.redaction.v1.resources;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
public interface RuleBuilderResource {
@PostMapping(value = "/rule-builder-model", produces = MediaType.APPLICATION_JSON_VALUE)
RuleBuilderModel getRuleBuilderModel();
}

View File

@ -21,21 +21,6 @@
<artifactId>redaction-service-api-v1</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.iqser.red.service</groupId>
<artifactId>file-management-service-api-v1</artifactId>
<version>2.7.4</version>
<exclusions>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>redaction-service-api-v1</artifactId>
</exclusion>
<exclusion>
<groupId>com.iqser.red.service</groupId>
<artifactId>configuration-service-api-v1</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-core</artifactId>

View File

@ -1,19 +1,14 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import java.util.ArrayList;
import java.util.List;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Data
@NoArgsConstructor
public class Document {
@ -23,20 +18,14 @@ public class Document {
private List<Header> headers = new ArrayList<>();
private List<Footer> footers = new ArrayList<>();
private List<UnclassifiedText> unclassifiedTexts = new ArrayList<>();
private Map<Integer, List<Entity>> entities = new HashMap<>();
private FloatFrequencyCounter textHeightCounter = new FloatFrequencyCounter();
private FloatFrequencyCounter fontSizeCounter = new FloatFrequencyCounter();
private StringFrequencyCounter fontCounter = new StringFrequencyCounter();
private StringFrequencyCounter fontStyleCounter = new StringFrequencyCounter();
private boolean headlines;
private List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
private SectionGrid sectionGrid = new SectionGrid();
private DictionaryVersion dictionaryVersion;
private long rulesVersion;
private List<SectionText> sectionText = new ArrayList<>();
private Map<Integer, Set<Image>> images = new HashMap<>();
}

View File

@ -0,0 +1,6 @@
package com.iqser.red.service.redaction.v1.server.classification.model;
public enum Orientation {
NONE, LEFT, RIGHT
}

View File

@ -32,6 +32,7 @@ public class TextBlock extends AbstractTextContainer {
private String classification;
public TextBlock(float minX, float maxX, float minY, float maxY, List<TextPositionSequence> sequences, int rotation) {
this.minX = minX;
this.maxX = maxX;

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server.classification.service;
import com.iqser.red.service.redaction.v1.server.classification.model.FloatFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.StringFrequencyCounter;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
@ -11,16 +12,21 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Ruling;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@Service
@SuppressWarnings("all")
public class BlockificationService {
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
static final float THRESHOLD = 1f;
public Page blockify(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines,
List<Ruling> verticalRulingLines) {
List<TextPositionSequence> chunkWords = new ArrayList<>();
List<AbstractTextContainer> chunkBlockList1 = new ArrayList<>();
@ -28,21 +34,46 @@ public class BlockificationService {
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
TextPositionSequence prev = null;
boolean wasSplitted = false;
Float splitX1 = null;
for (TextPositionSequence word : textPositions) {
boolean lineSeparation = minY - word.getY2() > word.getHeight() * 1.25;
boolean startFromTop = word.getY1() > maxY + word.getHeight();
boolean splitByX = prev != null && maxX + 50 < word.getX1() && prev.getY1() == word.getY1();
boolean newLineAfterSplit = prev != null && word.getY1() != prev.getY1() && wasSplitted && splitX1 != word.getX1();
boolean splittedByRuling = word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), verticalRulingLines) || word
.getRotation() == 0 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), horizontalRulingLines) || word
.getRotation() == 90 && isSplittedByRuling(maxX, minY, word.getX1(), word.getY1(), horizontalRulingLines) || word
.getRotation() == 90 && isSplittedByRuling(minX, minY, word.getX1(), word.getY2(), verticalRulingLines);
if (prev != null && (lineSeparation || startFromTop || word.getRotation() == 0 && isSplittedByRuling(maxX, minY, word
.getX1(), word.getY1(), verticalRulingLines) || word.getRotation() == 0 && isSplittedByRuling(minX, minY, word
.getX1(), word.getY2(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(maxX, minY, word
.getX1(), word.getY1(), horizontalRulingLines) || word.getRotation() == 90 && isSplittedByRuling(minX, minY, word
.getX1(), word.getY2(), verticalRulingLines))) {
if (prev != null && (lineSeparation || startFromTop || splitByX || newLineAfterSplit || splittedByRuling)) {
Orientation prevOrientation = null;
if(!chunkBlockList1.isEmpty()) {
prevOrientation = chunkBlockList1.get(chunkBlockList1.size() - 1).getOrientation();
}
TextBlock cb1 = buildTextBlock(chunkWords);
chunkBlockList1.add(cb1);
chunkWords = new ArrayList<>();
if (splitByX && !splittedByRuling) {
wasSplitted = true;
cb1.setOrientation(Orientation.LEFT);
splitX1 = word.getX1();
} else
if (newLineAfterSplit && !splittedByRuling) {
wasSplitted = false;
cb1.setOrientation(Orientation.RIGHT);
splitX1 = null;
} else
if(prevOrientation != null && prevOrientation.equals(Orientation.RIGHT) && (lineSeparation || !startFromTop || !splitByX || !newLineAfterSplit || !splittedByRuling)){
cb1.setOrientation(Orientation.LEFT);
}
minX = 1000;
maxX = 0;
minY = 1000;
@ -72,9 +103,62 @@ public class BlockificationService {
chunkBlockList1.add(cb1);
}
Iterator<AbstractTextContainer> itty = chunkBlockList1.iterator();
TextBlock previousLeft = null;
TextBlock previousRight = null;
while (itty.hasNext()) {
TextBlock block = (TextBlock) itty.next();
if(previousLeft != null && block.getOrientation().equals(Orientation.LEFT)){
if (previousLeft.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousLeft.getMinY()){
previousLeft.add(block);
itty.remove();
continue;
}
}
if(previousRight != null && block.getOrientation().equals(Orientation.RIGHT)){
if (previousRight.getMinY() > block.getMinY() && block.getMaxY() + block.getMostPopularWordHeight() > previousRight.getMinY()){
previousRight.add(block);
itty.remove();
continue;
}
}
if (block.getOrientation().equals(Orientation.LEFT)) {
previousLeft = block;
} else if (block.getOrientation().equals(Orientation.RIGHT)) {
previousRight = block;
}
}
itty = chunkBlockList1.iterator();
TextBlock previous = null;
while (itty.hasNext()) {
TextBlock block = (TextBlock) itty.next();
if(previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.LEFT) && equalsWithThreshold(block.getMaxY(), previous
.getMaxY())||
previous != null && previous.getOrientation().equals(Orientation.LEFT) && block.getOrientation().equals(Orientation.RIGHT) && equalsWithThreshold(block.getMaxY(), previous
.getMaxY())){
previous.add(block);
itty.remove();
continue;
}
previous = block;
}
return new Page(chunkBlockList1);
}
private boolean equalsWithThreshold(float f1, float f2){
return Math.abs(f1 - f2) < THRESHOLD;
}
private TextBlock buildTextBlock(List<TextPositionSequence> wordBlockList) {
@ -117,7 +201,8 @@ public class BlockificationService {
}
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1, List<Ruling> rulingLines) {
private boolean isSplittedByRuling(float previousX2, float previousY1, float currentX1, float currentY1,
List<Ruling> rulingLines) {
for (Ruling ruling : rulingLines) {
if (ruling.intersectsLine(previousX2, previousY1, currentX1, currentY1)) {
@ -128,7 +213,8 @@ public class BlockificationService {
}
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter, boolean landscape) {
public Rectangle calculateBodyTextFrame(List<Page> pages, FloatFrequencyCounter documentFontSizeCounter,
boolean landscape) {
float minX = 10000;
float maxX = -100;

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.client;
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
import org.springframework.cloud.openfeign.FeignClient;
@FeignClient(name = "DictionaryResource", url = "${configuration-service.url}")
import com.iqser.red.service.persistence.service.v1.api.resources.DictionaryResource;
@FeignClient(name = "DictionaryResource", url = "${persistence-service.url}")
public interface DictionaryClient extends DictionaryResource {
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.server.client;
import java.util.List;
import java.util.Map;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecogintionEntity;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
@FeignClient(name = "EntityRecognitionClient", url = "${entity-recognition-service.url}")
public interface EntityRecognitionClient {
@PostMapping(value = "/find_authors", produces = MediaType.APPLICATION_JSON_VALUE, consumes = MediaType.APPLICATION_JSON_VALUE)
NerEntities findAuthors(EntityRecognitionRequest entityRecognitionRequest);
}

View File

@ -1,9 +1,10 @@
package com.iqser.red.service.redaction.v1.server.client;
import com.iqser.red.service.file.management.v1.api.resources.FileStatusProcessingUpdateResource;
import org.springframework.cloud.openfeign.FeignClient;
@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${file-management-service.url}")
import com.iqser.red.service.persistence.service.v1.api.resources.FileStatusProcessingUpdateResource;
@FeignClient(name = "FileStatusProcessingUpdateResource", url = "${persistence-service.url}")
public interface FileStatusProcessingUpdateClient extends FileStatusProcessingUpdateResource {
}

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.client;
import com.iqser.red.service.configuration.v1.api.resource.LegalBasisMappingResource;
import org.springframework.cloud.openfeign.FeignClient;
@FeignClient(name = "LegalBasisMappingResource", url = "${configuration-service.url}")
import com.iqser.red.service.persistence.service.v1.api.resources.LegalBasisMappingResource;
@FeignClient(name = "LegalBasisMappingResource", url = "${persistence-service.url}")
public interface LegalBasisClient extends LegalBasisMappingResource {
}

View File

@ -1,8 +1,9 @@
package com.iqser.red.service.redaction.v1.server.client;
import com.iqser.red.service.configuration.v1.api.resource.RulesResource;
import org.springframework.cloud.openfeign.FeignClient;
@FeignClient(name = "RulesResource", url = "${configuration-service.url}")
import com.iqser.red.service.persistence.service.v1.api.resources.RulesResource;
@FeignClient(name = "RulesResource", url = "${persistence-service.url}")
public interface RulesClient extends RulesResource {
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecogintionEntity {
private String value;
private int startOffset;
private int endOffset;
private String type;
}

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionRequest {
private List<EntityRecognitionSection> data;
}

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionResult {
@Builder.Default
private Map<Integer, List<EntityRecogintionEntity>> entities = new HashMap<>();
}

View File

@ -0,0 +1,16 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class EntityRecognitionSection {
private int sectionNumber;
private String text;
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.server.client.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class NerEntities {
@Builder.Default
private Map<Integer, List<EntityRecogintionEntity>> result = new HashMap<>();
}

View File

@ -1,10 +1,7 @@
package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.persistence.service.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
@ -12,6 +9,7 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogMergeService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
@ -21,7 +19,6 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
@ -39,18 +36,18 @@ public class RedactionController implements RedactionResource {
private final AnnotationService annotationService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionStorageService redactionStorageService;
private final RedactionLogMergeService redactionLogMergeService;
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getProjectId(), annotateRequest.getFileId(), FileType.ORIGIN));
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getProjectId(), annotateRequest.getFileId());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getProjectId(), annotateRequest.getFileId());
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(annotateRequest.getDossierId(), annotateRequest.getFileId(), FileType.ORIGIN));
var redactionLog = redactionStorageService.getRedactionLog(annotateRequest.getDossierId(), annotateRequest.getFileId());
var sectionsGrid = redactionStorageService.getSectionGrid(annotateRequest.getDossierId(), annotateRequest.getFileId());
try (PDDocument pdDocument = PDDocument.load(storedObjectStream, MemoryUsageSetting.setupTempFileOnly())) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(redactionLog.getRuleSetId(), annotateRequest.getProjectId());
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
annotationService.annotate(pdDocument, redactionLog, sectionsGrid);
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
@ -66,11 +63,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult classify(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -91,11 +88,11 @@ public class RedactionController implements RedactionResource {
@Override
public RedactionResult sections(@RequestBody RedactionRequest redactionRequest) {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try {
Document classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
try (PDDocument pdDocument = PDDocument.load(storedObjectStream)) {
pdDocument.setAllSecurityToBeRemoved(true);
@ -120,7 +117,7 @@ public class RedactionController implements RedactionResource {
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getProjectId(), redactionRequest.getFileId(), FileType.ORIGIN));
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(redactionRequest.getDossierId(), redactionRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream, true);
} catch (Exception e) {
throw new RedactionException(e);
@ -141,20 +138,28 @@ public class RedactionController implements RedactionResource {
}
@Override
public void updateRules(@PathVariable(RULE_SET_PARAMETER_NAME) String ruleSetId) {
droolsExecutionService.updateRules(ruleSetId);
}
@Override
public void testRules(@RequestBody String rules) {
droolsExecutionService.testRules(rules);
}
@Override
public RedactionLog getRedactionLog(RedactionRequest redactionRequest) {
log.info("Requested preview for: {}", redactionRequest);
dictionaryService.updateDictionary(redactionRequest.getDossierTemplateId(), redactionRequest.getDossierId());
var redactionLog = redactionStorageService.getRedactionLog(redactionRequest.getDossierId(), redactionRequest.getFileId());
log.info("Loaded redaction log with computationalVersion: {}", redactionLog.getAnalysisVersion());
if (redactionLog.getAnalysisVersion() == 0) {
// old redaction logs are returned directly
return redactionLog;
} else {
return redactionLogMergeService.mergeRedactionLogData(redactionLog, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions(), redactionRequest.getExcludedPages());
}
}
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import com.iqser.red.service.redaction.v1.resources.RuleBuilderResource;
import com.iqser.red.service.redaction.v1.server.redaction.rulebuilder.RuleBuilderModelService;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.RestController;
@RestController
@RequiredArgsConstructor
public class RuleBuilderController implements RuleBuilderResource {
private final RuleBuilderModelService ruleBuilderModelService;
@Override
public RuleBuilderModel getRuleBuilderModel() {
return ruleBuilderModelService.getRuleBuilderModel();
}
}

View File

@ -46,6 +46,17 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0")) && i <= textPositions.size() - 2) {

View File

@ -189,18 +189,16 @@ public class PDFLinesTextStripper extends PDFTextStripper {
COSName objectName = (COSName) arguments.get(0);
PDXObject xobject = getResources().getXObject(objectName);
if (xobject instanceof PDImageXObject) {
PDImageXObject pdfImage = (PDImageXObject) xobject;
PDImageXObject image = (PDImageXObject)xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
Rectangle2D imageBounds = calculateImagePosition(pdfImage);
Rectangle2D rect = new Rectangle2D.Float((float) imageBounds.getX(), (float) imageBounds.getY(), (float) imageBounds
.getWidth(), (float) imageBounds.getHeight());
Rectangle2D rect = new Rectangle2D.Float(ctmNew.getTranslateX(), ctmNew.getTranslateY(), ctmNew.getScaleX(), ctmNew.getScaleY());
// Memory Hack - sofReference kills me
FieldUtils.writeField(pdfImage, "cachedImageSubsampling", -1, true);
FieldUtils.writeField(image, "cachedImageSubsampling", -1, true);
if (rect.getHeight() > 2 && rect.getWidth() > 2) {
this.images.add(new PdfImage(pdfImage.getImage(), rect, pageNumber));
this.images.add(new PdfImage(image.getImage(), rect, pageNumber, image.getImage().getColorModel().hasAlpha()));
}
}
} catch (Exception e) {
@ -209,21 +207,6 @@ public class PDFLinesTextStripper extends PDFTextStripper {
}
private Rectangle2D calculateImagePosition(PDImageXObject pdfImage) throws IOException {
Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
Rectangle2D imageBounds = pdfImage.getImage().getRaster().getBounds();
AffineTransform imageTransform = new AffineTransform(ctm.createAffineTransform());
imageTransform.scale(1.0 / pdfImage.getWidth(), -1.0 / pdfImage.getHeight());
imageTransform.translate(0, -pdfImage.getHeight());
AffineTransform pageTransform = new AffineTransform();
pageTransform.concatenate(imageTransform);
return pageTransform.createTransformedShape(imageBounds).getBounds2D();
}
private float floatValue(COSBase value) {
@ -300,6 +283,18 @@ public class PDFLinesTextStripper extends PDFTextStripper {
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0")) && i <= textPositions.size() - 2) {

View File

@ -21,7 +21,7 @@ public class MessagingConfiguration {
return QueueBuilder.durable(REDACTION_QUEUE)
.withArgument("x-dead-letter-exchange", "")
.withArgument("x-dead-letter-routing-key", REDACTION_QUEUE)
.withArgument("x-dead-letter-routing-key", REDACTION_DQL)
.maxPriority(2)
.build();
}

View File

@ -4,10 +4,14 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.NerAnalyserService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.stereotype.Service;
@ -21,8 +25,10 @@ import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfigura
public class RedactionMessageReceiver {
private final ObjectMapper objectMapper;
private final ReanalyzeService reanalyzeService;
private final AnalyzeService analyzeService;
private final FileStatusProcessingUpdateClient fileStatusProcessingUpdateClient;
private final NerAnalyserService nerAnalyserService;
@RabbitHandler
@RabbitListener(queues = REDACTION_QUEUE)
@ -32,15 +38,25 @@ public class RedactionMessageReceiver {
log.info("Processing analyze request: {}", analyzeRequest);
AnalyzeResult result;
if (analyzeRequest.isReanalyseOnlyIfPossible()) {
result = reanalyzeService.reanalyze(analyzeRequest);
result = analyzeService.reanalyze(analyzeRequest);
log.info("Successfully reanalyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest
.getFileId(), result.getDuration());
} else {
result = reanalyzeService.analyze(analyzeRequest);
}
log.info("Successfully analyzed {}", analyzeRequest);
// TODO Seperate stucture analysis by other queue
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(analyzeRequest.getDossierId(), analyzeRequest.getFileId()));
fileStatusProcessingUpdateClient.analysisSuccessful(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), result);
// TODO NerEntities should be computed and stored in entity-recognition-service, should be triggered by a seperate queue after structure analysis
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
result = analyzeService.analyze(analyzeRequest);
log.info("Successfully analyzed dossier {} file {} took: {}", analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result
.getDuration());
}
fileStatusProcessingUpdateClient.analysisSuccessful(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), result);
}
@RabbitHandler
@RabbitListener(queues = REDACTION_DQL)
public void receiveAnalyzeRequestDQL(String in) throws JsonProcessingException {
@ -48,7 +64,7 @@ public class RedactionMessageReceiver {
var analyzeRequest = objectMapper.readValue(in, AnalyzeRequest.class);
log.info("Failed to process analyze request: {}", analyzeRequest);
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
fileStatusProcessingUpdateClient.analysisFailed(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
}

View File

@ -1,7 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -9,6 +8,8 @@ import java.io.Serializable;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.DictionaryEntry;
@Data
@AllArgsConstructor
public class DictionaryModel implements Serializable {

View File

@ -10,7 +10,7 @@ import java.util.Map;
@Data
public class DictionaryRepresentation {
private String ruleSetId;
private String dossierTemplateId;
private long dictionaryVersion = -1;
private List<DictionaryModel> dictionary = new ArrayList<>();
private float[] defaultColor;

View File

@ -11,6 +11,6 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class DictionaryVersion {
long rulesetVersion;
long dossierTemplateVersion;
long dossierVersion;
}

View File

@ -1,16 +1,19 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Entity {
public class Entity implements ReasonHolder {
private final String word;
private final String type;
@ -39,8 +42,15 @@ public class Entity {
private boolean isDossierDictionaryEntry;
private Set<Engine> engines = new HashSet<>();
public Entity(String word, String type, boolean redaction, String redactionReason, List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber, String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start, Integer end, boolean isDossierDictionaryEntry) {
private Set<Entity> references = new HashSet<>();
public Entity(String word, String type, boolean redaction, String redactionReason,
List<EntityPositionSequence> positionSequences, String headline, int matchedRule, int sectionNumber,
String legalBasis, boolean isDictionaryEntry, String textBefore, String textAfter, Integer start,
Integer end, boolean isDossierDictionaryEntry, Set<Engine> engines, Set<Entity> references) {
this.word = word;
this.type = type;
@ -57,10 +67,13 @@ public class Entity {
this.start = start;
this.end = end;
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines = engines;
this.references = references;
}
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber, boolean isDictionaryEntry, boolean isDossierDictionaryEntry) {
public Entity(String word, String type, Integer start, Integer end, String headline, int sectionNumber,
boolean isDictionaryEntry, boolean isDossierDictionaryEntry, Engine engine) {
this.word = word;
this.type = type;
@ -70,6 +83,8 @@ public class Entity {
this.sectionNumber = sectionNumber;
this.isDictionaryEntry = isDictionaryEntry;
this.isDossierDictionaryEntry = isDossierDictionaryEntry;
this.engines.add(engine);
}
}

View File

@ -9,7 +9,7 @@ import lombok.NoArgsConstructor;
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Image {
public class Image implements ReasonHolder {
private String type;
private RedRectangle2D position;
@ -20,5 +20,6 @@ public class Image {
private int sectionNumber;
private String section;
private int page;
private boolean hasTransparency;
}

View File

@ -0,0 +1,23 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@Data
@AllArgsConstructor
public class PageEntities {
@Builder.Default
private Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
@Builder.Default
private Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
}

View File

@ -18,14 +18,16 @@ public class PdfImage {
private RedRectangle2D position;
private ImageType imageType;
private boolean isAppendedToParagraph;
private boolean hasTransparency;
@NonNull
private int page;
public PdfImage(BufferedImage image, Rectangle2D position, int page) {
public PdfImage(BufferedImage image, Rectangle2D position, int page, boolean hasTransparency) {
this.image = image;
this.position = new RedRectangle2D(position.getX(), position.getY(), position.getWidth(), position.getHeight());
this.page = page;
this.hasTransparency = hasTransparency;
}
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public interface ReasonHolder {
String getRedactionReason();
void setRedactionReason(String reason);
boolean isRedaction();
void setRedaction(boolean value);
}

View File

@ -1,5 +1,8 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.ArgumentType;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
@ -8,11 +11,11 @@ import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@ -52,8 +55,49 @@ public class Section {
@Builder.Default
private Set<Image> images = new HashSet<>();
@Builder.Default
private List<FileAttribute> fileAttributes = new ArrayList<>();
public boolean rowEquals(String headerName, String value) {
@WhenCondition
public boolean fileAttributeByIdEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean fileAttributeByPlaceholderEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean fileAttributeByLabelEquals(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean fileAttributeByIdEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String id,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean fileAttributeByPlaceholderEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String placeholder,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean fileAttributeByLabelEqualsIgnoreCase(@Argument(ArgumentType.FILE_ATTRIBUTE) String label,
@Argument(ArgumentType.STRING) String value) {
return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equalsIgnoreCase(attribute.getValue())).findFirst().isPresent();
}
@WhenCondition
public boolean rowEquals(@Argument(ArgumentType.STRING) String headerName,
@Argument(ArgumentType.STRING) String value) {
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
@ -62,33 +106,36 @@ public class Section {
.equals(value);
}
public boolean hasTableHeader(String headerName) {
@WhenCondition
public boolean hasTableHeader(@Argument(ArgumentType.STRING) String headerName) {
String cleanHeaderName = headerName.replaceAll("\n", "").replaceAll(" ", "").replaceAll("-", "");
return tabularData != null && tabularData.containsKey(cleanHeaderName);
}
public boolean matchesType(String type) {
@WhenCondition
public boolean matchesType(@Argument(ArgumentType.TYPE) String type) {
return entities.stream().anyMatch(entity -> entity.getType().equals(type));
}
public boolean matchesImageType(String type) {
@WhenCondition
public boolean matchesImageType(@Argument(ArgumentType.TYPE) String type) {
return images.stream().anyMatch(image -> image.getType().equals(type));
}
public boolean headlineContainsWord(String word) {
@WhenCondition
public boolean headlineContainsWord(@Argument(ArgumentType.STRING) String word) {
return StringUtils.containsIgnoreCase(headline, word);
}
public void expandByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group) {
@ThenAction
public void expandByRegEx(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -115,8 +162,11 @@ public class Section {
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
public void redactImage(String type, int ruleNumber, String reason, String legalBasis) {
@ThenAction
public void redactImage(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
images.forEach(image -> {
if (image.getType().equals(type)) {
@ -128,8 +178,11 @@ public class Section {
});
}
public void redact(String type, int ruleNumber, String reason, String legalBasis) {
@ThenAction
public void redact(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
@ -144,8 +197,10 @@ public class Section {
});
}
public void redactNotImage(String type, int ruleNumber, String reason) {
@ThenAction
public void redactNotImage(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
images.forEach(image -> {
if (image.getType().equals(type)) {
@ -156,8 +211,10 @@ public class Section {
});
}
public void redactNot(String type, int ruleNumber, String reason) {
@ThenAction
public void redactNot(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
@ -172,8 +229,35 @@ public class Section {
}
public void expandToHintAnnotationByRegEx(String type, String pattern, boolean patternCaseInsensitive, int group,
String asType) {
@ThenAction
public void redactNotAndReference(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.REFERENCE_TYPE) String referenceType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason) {
boolean hasRecommendationDictionary = dictionaryTypes.contains(RECOMMENDATION_PREFIX + type);
Set<Entity> references = entities.stream().filter(entity -> entity.getType().equals(referenceType)).collect(Collectors.toSet());
entities.forEach(entity -> {
if (entity.getType().equals(type) || hasRecommendationDictionary && entity.getType()
.equals(RECOMMENDATION_PREFIX + type)) {
entity.setRedaction(false);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
entity.setReferences(references);
}
});
}
@ThenAction
public void expandToHintAnnotationByRegEx(@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.STRING) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -198,8 +282,11 @@ public class Section {
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
public void addHintAnnotationByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType) {
@ThenAction
public void addHintAnnotationByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -214,8 +301,12 @@ public class Section {
}
}
public void redactIfPrecededBy(String prefix, String type, int ruleNumber, String reason, String legalBasis) {
@ThenAction
public void redactIfPrecededBy(@Argument(ArgumentType.STRING) String prefix,
@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
entities.forEach(entity -> {
if (entity.getType().equals(type) && searchText.indexOf(prefix + entity.getWord()) != 1) {
@ -227,23 +318,32 @@ public class Section {
});
}
public void addHintAnnotation(String value, String asType) {
@ThenAction
public void addHintAnnotation(@Argument(ArgumentType.STRING) String value,
@Argument(ArgumentType.TYPE) String asType) {
Set<Entity> found = findEntities(value.trim(), asType, true, false, 0, null, null);
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
}
public void addRedaction(String value, String asType, int ruleNumber, String reason, String legalBasis) {
@ThenAction
public void addRedaction(@Argument(ArgumentType.STRING) String value,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
Set<Entity> found = findEntities(value.trim(), asType, true, true, ruleNumber, reason, legalBasis);
EntitySearchUtils.addEntitiesIgnoreRank(entities, found);
}
public void redactLineAfter(String start, String asType, int ruleNumber, boolean redactEverywhere, String reason,
String legalBasis) {
@ThenAction
public void redactLineAfter(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -261,8 +361,9 @@ public class Section {
}
}
public void recommendLineAfter(String start, String asType) {
@ThenAction
public void recommendLineAfter(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.TYPE) String asType) {
String[] values = StringUtils.substringsBetween(text, start, "\n");
@ -285,9 +386,14 @@ public class Section {
}
}
public void redactByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType, int ruleNumber,
String reason, String legalBasis) {
@ThenAction
public void redactByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -302,8 +408,11 @@ public class Section {
}
}
public void addRecommendationByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType) {
@ThenAction
public void addRecommendationByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -317,9 +426,14 @@ public class Section {
}
}
public void redactAndRecommendByRegEx(String pattern, boolean patternCaseInsensitive, int group, String asType,
int ruleNumber, String reason, String legalBasis) {
@ThenAction
public void redactAndRecommendByRegEx(@Argument(ArgumentType.REGEX) String pattern,
@Argument(ArgumentType.BOOLEAN) boolean patternCaseInsensitive,
@Argument(ArgumentType.INTEGER) int group,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
Pattern compiledPattern = Patterns.getCompiledPattern(pattern, patternCaseInsensitive);
@ -334,9 +448,14 @@ public class Section {
}
}
public void redactBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis) {
@ThenAction
public void redactBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
String[] values = StringUtils.substringsBetween(searchText, start, stop);
@ -355,9 +474,14 @@ public class Section {
}
}
public void redactLinesBetween(String start, String stop, String asType, int ruleNumber, boolean redactEverywhere,
String reason, String legalBasis) {
@ThenAction
public void redactLinesBetween(@Argument(ArgumentType.STRING) String start,
@Argument(ArgumentType.STRING) String stop,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.BOOLEAN) boolean redactEverywhere,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
String[] values = StringUtils.substringsBetween(text, start, stop);
@ -384,34 +508,48 @@ public class Section {
}
}
public void highlightCell(String cellHeader, int ruleNumber, String type) {
@ThenAction
public void highlightCell(@Argument(ArgumentType.STRING) String cellHeader,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.TYPE) String type) {
annotateCell(cellHeader, ruleNumber, type, false, false, null, null);
}
public void redactCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations, String reason,
String legalBasis) {
@ThenAction
public void redactCell(@Argument(ArgumentType.STRING) String cellHeader,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
annotateCell(cellHeader, ruleNumber, type, true, addAsRecommendations, reason, legalBasis);
}
public void redactNotCell(String cellHeader, int ruleNumber, String type, boolean addAsRecommendations,
String reason) {
@ThenAction
public void redactNotCell(@Argument(ArgumentType.STRING) String cellHeader,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.TYPE) String type,
@Argument(ArgumentType.BOOLEAN) boolean addAsRecommendations,
@Argument(ArgumentType.STRING) String reason) {
annotateCell(cellHeader, ruleNumber, type, false, addAsRecommendations, reason, null);
}
private Set<Entity> findEntities(String value, String asType, boolean caseInsensitive, boolean redacted,
int ruleNumber, String reason, String legalBasis) {
private Set<Entity> findEntities(@Argument(ArgumentType.STRING) String value,
@Argument(ArgumentType.TYPE) String asType,
@Argument(ArgumentType.BOOLEAN) boolean caseInsensitive,
@Argument(ArgumentType.BOOLEAN) boolean redacted,
@Argument(ArgumentType.RULE_NUMBER) int ruleNumber,
@Argument(ArgumentType.STRING) String reason,
@Argument(ArgumentType.LEGAL_BASIS) String legalBasis) {
String text = caseInsensitive ? searchText.toLowerCase() : searchText;
String searchValue = caseInsensitive ? value.toLowerCase() : value;
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, true, false);
Set<Entity> found = EntitySearchUtils.find(text, Set.of(searchValue), asType, headline, sectionNumber, false, false, Engine.RULE);
found.forEach(entity -> {
if (redacted) {
@ -437,7 +575,7 @@ public class Section {
} else {
String word = value.toString();
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false);
Entity entity = new Entity(word, type, value.getRowSpanStart(), value.getRowSpanStart() + word.length(), headline, sectionNumber, false, false, Engine.RULE);
entity.setRedaction(redact);
entity.setMatchedRule(ruleNumber);
entity.setRedactionReason(reason);
@ -475,6 +613,25 @@ public class Section {
}
}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface WhenCondition {
}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface ThenAction {
}
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.PARAMETER)
public @interface Argument {
ArgumentType value() default ArgumentType.STRING;
}
}

View File

@ -0,0 +1,36 @@
package com.iqser.red.service.redaction.v1.server.redaction.rulebuilder;
import com.iqser.red.service.redaction.v1.model.Argument;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import com.iqser.red.service.redaction.v1.model.RuleElement;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import org.springframework.stereotype.Service;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@Service
public class RuleBuilderModelService {
public RuleBuilderModel getRuleBuilderModel() {
var whenConditions = Arrays.stream(Section.class.getDeclaredMethods()).filter(m -> m.isAnnotationPresent(Section.WhenCondition.class)).collect(Collectors.toList());
var thenActions = Arrays.stream(Section.class.getDeclaredMethods()).filter(m -> m.isAnnotationPresent(Section.ThenAction.class)).collect(Collectors.toList());
RuleBuilderModel ruleBuilderModel = new RuleBuilderModel();
ruleBuilderModel.setWhenClauses(whenConditions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
ruleBuilderModel.setThenConditions(thenActions.stream().map(c -> new RuleElement(c.getName(), toArguments(c))).collect(Collectors.toList()));
return ruleBuilderModel;
}
private List<Argument> toArguments(Method c) {
return Arrays.stream(c.getParameters())
.map(parameter -> new Argument(parameter.getName(), parameter.getAnnotation(Section.Argument.class).value()))
.collect(Collectors.toList());
}
}

View File

@ -1,49 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.RedactionChangeLog;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import org.springframework.stereotype.Service;
@Service
public class AnalyzeResponseService {
public AnalyzeResult createAnalyzeResponse(String projectId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint);
boolean hasRequests = redactionLog.getRedactionLogEntry()
.stream()
.anyMatch(entry -> entry.isManual() && entry.getStatus()
.equals(com.iqser.red.service.redaction.v1.model.Status.REQUESTED));
boolean hasRedactions = redactionLog.getRedactionLogEntry()
.stream()
.anyMatch(entry -> entry.isRedacted() && !entry.isManual() || entry.isManual() && entry.getStatus()
.equals(com.iqser.red.service.redaction.v1.model.Status.APPROVED));
boolean hasImages = redactionLog.getRedactionLogEntry()
.stream()
.anyMatch(entry -> entry.isHint() && entry.getType().equals("image"));
boolean hasUpdates = redactionChangeLog != null && redactionChangeLog.getRedactionLogEntry() != null && !redactionChangeLog
.getRedactionLogEntry()
.isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive"));
return AnalyzeResult.builder()
.projectId(projectId)
.fileId(fileId)
.duration(duration)
.numberOfPages(pageCount)
.hasHints(hasHints)
.hasRedactions(hasRedactions)
.hasRequests(hasRequests)
.hasImages(hasImages)
.hasUpdates(hasUpdates)
.rulesVersion(redactionLog.getRulesVersion())
.dictionaryVersion(redactionLog.getDictionaryVersion())
.legalBasisVersion(redactionLog.getLegalBasisVersion())
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
.build();
}
}

View File

@ -0,0 +1,283 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.persistence.service.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.ManualLegalBasisChange;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class AnalyzeService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
private final EntityRedactionService entityRedactionService;
private final RedactionLogCreatorService redactionLogCreatorService;
private final RedactionStorageService redactionStorageService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionChangeLogService redactionChangeLogService;
private final LegalBasisClient legalBasisClient;
private final RedactionServiceSettings redactionServiceSettings;
private final SectionTextBuilderService sectionTextBuilderService;
private final SectionGridCreatorService sectionGridCreatorService;
private final NerAnalyserService nerAnalyserService;
public void analyzeDocumentStructure(StructureAnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var pageCount = 0;
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
.getDossierId(), analyzeRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
pageCount = classifiedDoc.getPages().size();
} catch (Exception e) {
throw new RedactionException(e);
}
List<SectionText> sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc);
sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount);
Text text = new Text(pageCount, sectionTexts);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.TEXT, text);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
.getSectionGrid());
log.info("Document structure analysis successful, took: {}", System.currentTimeMillis() - startTime);
}
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
dictionaryService.updateDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
.getDossierId());
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, text.getSectionTexts(), kieContainer, analyzeRequest, nerEntities);
dictionaryService.updateExternalDictionary(dictionary, analyzeRequest.getDossierTemplateId());
List<RedactionLogEntry> redactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest
.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), redactionLogEntries, legalBasis, dictionary
.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
}
@SneakyThrows
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
// not yet ready for reanalysis
if (redactionLog == null || text == null || text.getNumberOfPages() == 0) {
return analyze(analyzeRequest);
}
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getDossierTemplateId(), new DictionaryVersion(redactionLog
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getDossierId());
Set<Integer> sectionsToReanalyse = !analyzeRequest.getSectionsToReanalyse()
.isEmpty() ? analyzeRequest.getSectionsToReanalyse() : findSectionsToReanalyse(dictionaryIncrement, redactionLog, text, analyzeRequest);
if (sectionsToReanalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
List<SectionText> reanalysisSections = text.getSectionTexts()
.stream()
.filter(sectionText -> sectionsToReanalyse.contains(sectionText.getSectionNumber()))
.collect(Collectors.toList());
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
.getDossierId());
PageEntities pageEntities = entityRedactionService.findEntities(dictionary, reanalysisSections, kieContainer, analyzeRequest, nerEntities);
var newRedactionLogEntries = redactionLogCreatorService.createRedactionLog(pageEntities, text.getNumberOfPages(), analyzeRequest
.getDossierTemplateId());
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement.getDictionaryVersion(), true);
}
private Set<Integer> findSectionsToReanalyse(DictionaryIncrement dictionaryIncrement, RedactionLog redactionLog,
Text text, AnalyzeRequest analyzeRequest) {
long start = System.currentTimeMillis();
Set<String> relevantManuallyModifiedAnnotationIds = getRelevantManuallyModifiedAnnotationIds(analyzeRequest.getManualRedactions());
Set<Integer> sectionsToReanalyse = new HashSet<>();
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isManual() || relevantManuallyModifiedAnnotationIds.contains(entry.getId())) {
sectionsToReanalyse.add(entry.getSectionNumber());
}
if (entry.isImage() || entry.getType().equals("image")) {
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
}
}
for (SectionText sectionText : text.getSectionTexts()) {
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
sectionsToReanalyse.add(sectionText.getSectionNumber());
}
}
log.info("Should reanalyze {} sections for request: {}, took: {}", sectionsToReanalyse.size(), analyzeRequest, System.currentTimeMillis() - start);
return sectionsToReanalyse;
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
RedactionLog redactionLog, Text text, DictionaryVersion dictionaryVersion,
boolean isReanalysis) {
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange
.getRedactionLog());
long duration = System.currentTimeMillis() - startTime;
return AnalyzeResult.builder()
.dossierId(analyzeRequest.getDossierId())
.fileId(analyzeRequest.getFileId())
.duration(duration)
.numberOfPages(text.getNumberOfPages())
.hasUpdates(redactionLogChange.isHasChanges())
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
.rulesVersion(redactionLog.getRulesVersion())
.dictionaryVersion(redactionLog.getDictionaryVersion())
.legalBasisVersion(redactionLog.getLegalBasisVersion())
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
.wasReanalyzed(isReanalysis)
.build();
}
private Set<String> getRelevantManuallyModifiedAnnotationIds(ManualRedactions manualRedactions) {
if (manualRedactions == null) {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getManualLegalBasisChanges()
.stream()
.map(ManualLegalBasisChange::getId), Stream.concat(manualRedactions.getImageRecategorizations()
.stream()
.map(ManualImageRecategorization::getId), Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))))
.collect(Collectors.toSet());
}
public Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
.getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())
.hasTransparency(entry.isImageHasTransparency())
.build();
}
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
if (excludedPages != null && !excludedPages.isEmpty()) {
redactionLog.getRedactionLogEntry().forEach(entry -> entry.getPositions().forEach(pos -> {
if (excludedPages.contains(pos.getPage())) {
entry.setExcluded(true);
}
}));
}
}
}

View File

@ -1,19 +1,10 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static com.iqser.red.service.configuration.v1.api.resource.DictionaryResource.GLOBAL_DOSSIER;
import com.iqser.red.service.configuration.v1.api.model.Colors;
import com.iqser.red.service.configuration.v1.api.model.DictionaryEntry;
import com.iqser.red.service.configuration.v1.api.model.TypeResponse;
import com.iqser.red.service.configuration.v1.api.model.TypeResult;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.DictionaryEntry;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryRepresentation;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import feign.FeignException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -25,6 +16,7 @@ import java.awt.Color;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
@Service
@RequiredArgsConstructor
@ -32,37 +24,38 @@ public class DictionaryService {
private final DictionaryClient dictionaryClient;
private final Map<String, DictionaryRepresentation> dictionariesByRuleSets = new HashMap<>();
private final Map<String, DictionaryRepresentation> dictionariesByDossierTemplate = new HashMap<>();
private final Map<String, DictionaryRepresentation> dictionariesByDossier = new HashMap<>();
public DictionaryVersion updateDictionary(String ruleSetId, String dossierId) {
public DictionaryVersion updateDictionary(String dossierTemplateId, String dossierId) {
long rulesetDictionaryVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
var rulesetDictionary = dictionariesByRuleSets.get(ruleSetId);
if (rulesetDictionary == null || rulesetDictionaryVersion > rulesetDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, rulesetDictionaryVersion, GLOBAL_DOSSIER);
log.info("Updating dictionary data for dossierTemplate {} and dossier {}", dossierTemplateId, dossierId);
long dossierTemplateDictionaryVersion = dictionaryClient.getVersion(dossierTemplateId);
var dossierTemplateDictionary = dictionariesByDossierTemplate.get(dossierTemplateId);
if (dossierTemplateDictionary == null || dossierTemplateDictionaryVersion > dossierTemplateDictionary.getDictionaryVersion()) {
updateDictionaryEntry(dossierTemplateId, dossierTemplateDictionaryVersion, null);
}
long dossierDictionaryVersion = dictionaryClient.getVersion(ruleSetId, dossierId);
long dossierDictionaryVersion = dictionaryClient.getVersionForDossier(dossierId);
var dossierDictionary = dictionariesByDossier.get(dossierId);
if (dossierDictionary == null || dossierDictionaryVersion > dossierDictionary.getDictionaryVersion()) {
updateDictionaryEntry(ruleSetId, dossierDictionaryVersion, dossierId);
updateDictionaryEntry(dossierTemplateId, dossierDictionaryVersion, dossierId);
}
return DictionaryVersion.builder().rulesetVersion(rulesetDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
return DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateDictionaryVersion).dossierVersion(dossierDictionaryVersion).build();
}
public DictionaryIncrement getDictionaryIncrements(String ruleSetId, DictionaryVersion fromVersion, String dossierId) {
public DictionaryIncrement getDictionaryIncrements(String dossierTemplateId, DictionaryVersion fromVersion, String dossierId) {
DictionaryVersion version = updateDictionary(ruleSetId, dossierId);
DictionaryVersion version = updateDictionary(dossierTemplateId, dossierId);
Set<DictionaryIncrementValue> newValues = new HashSet<>();
List<DictionaryModel> dictionaryModels = dictionariesByRuleSets.get(ruleSetId).getDictionary();
List<DictionaryModel> dictionaryModels = dictionariesByDossierTemplate.get(dossierTemplateId).getDictionary();
dictionaryModels.forEach(dictionaryModel -> {
dictionaryModel.getEntries().forEach(dictionaryEntry -> {
if (dictionaryEntry.getVersion() > fromVersion.getRulesetVersion()) {
if (dictionaryEntry.getVersion() > fromVersion.getDossierTemplateVersion()) {
newValues.add(new DictionaryIncrementValue(dictionaryEntry.getValue(), dictionaryModel.isCaseInsensitive()));
}
});
@ -83,35 +76,35 @@ public class DictionaryService {
}
private void updateDictionaryEntry(String ruleSetId, long version, String dossierId) {
private void updateDictionaryEntry(String dossierTemplateId, long version, String dossierId) {
try {
DictionaryRepresentation dictionaryRepresentation = new DictionaryRepresentation();
TypeResponse typeResponse = dictionaryClient.getAllTypes(ruleSetId, dossierId);
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse.getTypes())) {
var typeResponse = dossierId == null ? dictionaryClient.getAllTypesForDossierTemplate(dossierTemplateId) : dictionaryClient.getAllTypesForDossier(dossierId);
if (typeResponse != null && CollectionUtils.isNotEmpty(typeResponse)) {
List<DictionaryModel> dictionary = typeResponse.getTypes()
List<DictionaryModel> dictionary = typeResponse
.stream()
.map(t -> new DictionaryModel(t.getType(), t.getRank(), convertColor(t.getHexColor()), t.isCaseInsensitive(), t
.isHint(), t.isRecommendation(), convertEntries(t, dossierId), new HashSet<>(),dossierId.equals(GLOBAL_DOSSIER) ? false : true))
.isHint(), t.isRecommendation(), convertEntries(t.getId()), new HashSet<>(), dossierId != null))
.sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed())
.collect(Collectors.toList());
dictionary.forEach(dm -> dictionaryRepresentation.getLocalAccessMap().put(dm.getType(), dm));
Colors colors = dictionaryClient.getColors(ruleSetId);
Colors colors = dictionaryClient.getColors(dossierTemplateId);
dictionaryRepresentation.setDefaultColor(convertColor(colors.getDefaultColor()));
dictionaryRepresentation.setRequestAddColor(convertColor(colors.getRequestAdd()));
dictionaryRepresentation.setRequestRemoveColor(convertColor(colors.getRequestRemove()));
dictionaryRepresentation.setNotRedactedColor(convertColor(colors.getNotRedacted()));
dictionaryRepresentation.setRuleSetId(ruleSetId);
dictionaryRepresentation.setDossierTemplateId(dossierTemplateId);
dictionaryRepresentation.setDictionaryVersion(version);
dictionaryRepresentation.setDictionary(dictionary);
if(dossierId.equals(GLOBAL_DOSSIER)) {
dictionariesByRuleSets.put(ruleSetId, dictionaryRepresentation);
if(dossierId == null) {
dictionariesByDossierTemplate.put(dossierTemplateId, dictionaryRepresentation);
} else {
dictionariesByDossier.put(dossierId, dictionaryRepresentation);
}
@ -123,26 +116,28 @@ public class DictionaryService {
}
public void updateExternalDictionary(Dictionary dictionary, String ruleSetId) {
public void updateExternalDictionary(Dictionary dictionary, String dossierTemplateId) {
dictionary.getDictionaryModels().forEach(dm -> {
if (dm.isRecommendation() && !dm.getLocalEntries().isEmpty()) {
dictionaryClient.addEntries(dm.getType(), ruleSetId, new ArrayList<>(dm.getLocalEntries()), false, GLOBAL_DOSSIER);
long externalVersion = dictionaryClient.getVersion(ruleSetId, GLOBAL_DOSSIER);
if (externalVersion == dictionary.getVersion().getRulesetVersion() + 1) {
dictionary.getVersion().setRulesetVersion(externalVersion);
dictionaryClient.addEntries(dm.getType(), new ArrayList<>(dm.getLocalEntries()), false);
long externalVersion = dictionaryClient.getVersion(dossierTemplateId);
if (externalVersion == dictionary.getVersion().getDossierTemplateVersion() + 1) {
dictionary.getVersion().setDossierTemplateVersion(externalVersion);
}
}
});
}
private Set<DictionaryEntry> convertEntries(TypeResult t, String dossierId) {
private Set<DictionaryEntry> convertEntries(String typeId) {
Set<DictionaryEntry> entries = new HashSet<>(dictionaryClient.getDictionaryForType(t.getType(), t.getRuleSetId(), dossierId)
var type = dictionaryClient.getDictionaryForType(typeId);
Set<DictionaryEntry> entries = new HashSet<>(type
.getEntries());
if (t.isCaseInsensitive()) {
if (type.isCaseInsensitive()) {
entries.forEach(entry -> entry.setValue(entry.getValue().toLowerCase(Locale.ROOT)));
}
return entries;
@ -156,9 +151,9 @@ public class DictionaryService {
}
public boolean isCaseInsensitiveDictionary(String type, String ruleSetId) {
public boolean isCaseInsensitiveDictionary(String type, String dossierTemplateId) {
DictionaryModel dictionaryModel = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
DictionaryModel dictionaryModel = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
if (dictionaryModel != null) {
return dictionaryModel.isCaseInsensitive();
}
@ -166,19 +161,19 @@ public class DictionaryService {
}
public float[] getColor(String type, String ruleSetId) {
public float[] getColor(String type, String dossierTemplateId) {
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
if (model != null) {
return model.getColor();
}
return dictionariesByRuleSets.get(ruleSetId).getDefaultColor();
return dictionariesByDossierTemplate.get(dossierTemplateId).getDefaultColor();
}
public boolean isHint(String type, String ruleSetId) {
public boolean isHint(String type, String dossierTemplateId) {
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
if (model != null) {
return model.isHint();
}
@ -186,9 +181,9 @@ public class DictionaryService {
}
public boolean isRecommendation(String type, String ruleSetId) {
public boolean isRecommendation(String type, String dossierTemplateId) {
DictionaryModel model = dictionariesByRuleSets.get(ruleSetId).getLocalAccessMap().get(type);
DictionaryModel model = dictionariesByDossierTemplate.get(dossierTemplateId).getLocalAccessMap().get(type);
if (model != null) {
return model.isRecommendation();
}
@ -196,12 +191,12 @@ public class DictionaryService {
}
public Dictionary getDeepCopyDictionary(String ruleSetId, String dossierId) {
public Dictionary getDeepCopyDictionary(String dossierTemplateId, String dossierId) {
List<DictionaryModel> copy = new ArrayList<>();
var rulesetRepresentation = dictionariesByRuleSets.get(ruleSetId);
rulesetRepresentation.getDictionary().forEach(dm -> {
var dossierTemplateRepresentation = dictionariesByDossierTemplate.get(dossierTemplateId);
dossierTemplateRepresentation.getDictionary().forEach(dm -> {
copy.add(SerializationUtils.clone(dm));
});
@ -215,25 +210,25 @@ public class DictionaryService {
dossierDictionaryVersion = dossierRepresentation.getDictionaryVersion();
}
return new Dictionary(copy, DictionaryVersion.builder().rulesetVersion(rulesetRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
return new Dictionary(copy.stream().sorted(Comparator.comparingInt(DictionaryModel::getRank).reversed()).collect(Collectors.toList()), DictionaryVersion.builder().dossierTemplateVersion(dossierTemplateRepresentation.getDictionaryVersion()).dossierVersion(dossierDictionaryVersion).build());
}
public float[] getRequestRemoveColor(String ruleSetId) {
public float[] getRequestRemoveColor(String dossierTemplateId) {
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
return dictionariesByDossierTemplate.get(dossierTemplateId).getRequestRemoveColor();
}
public float[] getNotRedactedColor(String ruleSetId) {
public float[] getNotRedactedColor(String dossierTemplateId) {
return dictionariesByRuleSets.get(ruleSetId).getNotRedactedColor();
return dictionariesByDossierTemplate.get(dossierTemplateId).getNotRedactedColor();
}
public float[] getRequestAddColor(String ruleSetId) {
public float[] getRequestAddColor(String dossierTemplateId) {
return dictionariesByRuleSets.get(ruleSetId).getRequestAddColor();
return dictionariesByDossierTemplate.get(dossierTemplateId).getRequestAddColor();
}
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.configuration.v1.api.model.RulesResponse;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
@ -28,14 +27,14 @@ public class DroolsExecutionService {
private final Map<String, KieContainer> kieContainers = new HashMap<>();
private final Map<String, Long> rulesVersionPerRuleSetId = new HashMap<>();
private final Map<String, Long> rulesVersionPerDossierTemplateId = new HashMap<>();
public KieContainer getKieContainer(String ruleSetId) {
public KieContainer getKieContainer(String dossierTemplateId) {
KieContainer container = kieContainers.get(ruleSetId);
KieContainer container = kieContainers.get(dossierTemplateId);
if (container == null) {
return createOrUpdateKieContainer(ruleSetId);
return createOrUpdateKieContainer(dossierTemplateId);
} else {
return container;
}
@ -55,43 +54,43 @@ public class DroolsExecutionService {
}
public KieContainer updateRules(String ruleSetId) {
public KieContainer updateRules(String dossierTemplateId) {
long version = rulesClient.getVersion(ruleSetId);
Long rulesVersion = rulesVersionPerRuleSetId.get(ruleSetId);
long version = rulesClient.getVersion(dossierTemplateId);
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
if (rulesVersion == null) {
rulesVersion = -1L;
}
if (version > rulesVersion.longValue()) {
rulesVersionPerRuleSetId.put(ruleSetId, version);
return createOrUpdateKieContainer(ruleSetId);
if (version > rulesVersion) {
rulesVersionPerDossierTemplateId.put(dossierTemplateId, version);
return createOrUpdateKieContainer(dossierTemplateId);
}
return getKieContainer(ruleSetId);
return getKieContainer(dossierTemplateId);
}
private KieContainer createOrUpdateKieContainer(String ruleSetId) {
private KieContainer createOrUpdateKieContainer(String dossierTemplateId) {
try {
RulesResponse rules = rulesClient.getRules(ruleSetId);
if (rules == null || StringUtils.isEmpty(rules.getRules())) {
var rules = rulesClient.getRules(dossierTemplateId);
if (rules == null || StringUtils.isEmpty(rules.getValue())) {
throw new RuntimeException("Rules cannot be empty.");
}
KieServices kieServices = KieServices.Factory.get();
KieModule kieModule = getKieModule(ruleSetId, rules.getRules(), kieServices);
KieModule kieModule = getKieModule(dossierTemplateId, rules.getValue(), kieServices);
var container = kieContainers.get(ruleSetId);
var container = kieContainers.get(dossierTemplateId);
if (container != null) {
container.updateToVersion(kieModule.getReleaseId());
return container;
}
container = kieServices.newKieContainer(kieModule.getReleaseId());
kieContainers.put(ruleSetId, container);
kieContainers.put(dossierTemplateId, container);
return container;
} catch (Exception e) {
throw new RulesValidationException("Could not update rules: " + e.getMessage(), e);
@ -100,11 +99,11 @@ public class DroolsExecutionService {
}
private KieModule getKieModule(String ruleSetId, String rules, KieServices kieServices) {
private KieModule getKieModule(String dossierTemplateId, String rules, KieServices kieServices) {
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(rules.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/main/resources/drools/rules" + ruleSetId + ".drl", kieServices.getResources()
kieFileSystem.write("src/main/resources/drools/rules" + dossierTemplateId + ".drl", kieServices.getResources()
.newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
@ -122,13 +121,13 @@ public class DroolsExecutionService {
}
public long getRulesVersion(String ruleSetId) {
public long getRulesVersion(String dossierTemplateId) {
Long rulesVersion = rulesVersionPerRuleSetId.get(ruleSetId);
Long rulesVersion = rulesVersionPerDossierTemplateId.get(dossierTemplateId);
if (rulesVersion == null) {
return -1;
}
return rulesVersion.longValue();
return rulesVersion;
}
}

View File

@ -1,64 +1,143 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.classification.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.Status;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class EntityRedactionService {
private final DictionaryService dictionaryService;
private final RedactionServiceSettings redactionServiceSettings;
private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService;
public void processDocument(Document classifiedDoc, String ruleSetId, ManualRedactions manualRedactions, String dossierId) {
public PageEntities findEntities(Dictionary dictionary, List<SectionText> sectionTexts, KieContainer kieContainer,
AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
dictionaryService.updateDictionary(ruleSetId, dossierId);
KieContainer container = droolsExecutionService.updateRules(ruleSetId);
long rulesVersion = droolsExecutionService.getRulesVersion(ruleSetId);
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(ruleSetId, dossierId);
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null));
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
Set<Entity> entities = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, false, null, imagesPerPage, nerEntities);
if (dictionary.hasLocalEntries()) {
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
documentEntities.stream().forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>())
.add(entity);
}
});
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber);
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
Map<Integer, Set<Entity>> hintsPerSectionNumber = getHintsPerSection(entities, dictionary);
Set<Entity> foundByLocal = findEntities(sectionTexts, dictionary, kieContainer, analyzeRequest, true, hintsPerSectionNumber, imagesPerPage, nerEntities);
EntitySearchUtils.addEntitiesWithHigherRank(entities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
}
for (Entity entity : documentEntities) {
Map<Integer, List<Entity>> entitiesPerPage = convertToEnititesPerPage(entities);
return new PageEntities(entitiesPerPage, imagesPerPage);
}
public Set<Entity> findEntities(List<SectionText> reanalysisSections, Dictionary dictionary,
KieContainer kieContainer, AnalyzeRequest analyzeRequest, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
Map<Integer, Set<Image>> imagesPerPage, NerEntities nerEntities) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
Set<Entity> entities = findEntities(reanalysisSection.getSearchableText(), reanalysisSection.getHeadline(), reanalysisSection
.getSectionNumber(), dictionary, local, nerEntities, reanalysisSection.getCellStarts());
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
}
if (!local && reanalysisSection.getImages() != null && !reanalysisSection.getImages()
.isEmpty() && analyzeRequest.getManualRedactions() != null && analyzeRequest.getManualRedactions()
.getImageRecategorizations() != null) {
for (Image image : reanalysisSection.getImages()) {
String imageId = IdBuilder.buildId(image.getPosition(), image.getPage());
for (ManualImageRecategorization imageRecategorization : analyzeRequest.getManualRedactions()
.getImageRecategorizations()) {
if (imageRecategorization.getStatus().equals(Status.APPROVED) && imageRecategorization.getId()
.equals(imageId)) {
image.setType(imageRecategorization.getType());
}
}
}
}
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(reanalysisSection.getSectionNumber()) ? Stream
.concat(entities.stream(), hintsPerSectionNumber.get(reanalysisSection.getSectionNumber())
.stream())
.collect(Collectors.toSet()) : entities)
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
.headline(reanalysisSection.getHeadline())
.sectionNumber(reanalysisSection.getSectionNumber())
.tabularData(reanalysisSection.getTabularData())
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.fileAttributes(analyzeRequest.getFileAttributes())
.build(), reanalysisSection.getSearchableText()));
}
Set<Entity> entities = new HashSet<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
EntitySearchUtils.removeEntitiesContainedInLarger(analysedSection.getEntities());
entities.addAll(analysedSection.getEntities());
if (!local) {
for (Image image : analysedSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
addLocalValuesToDictionary(analysedSection, dictionary);
}
});
return entities;
}
private Map<Integer, List<Entity>> convertToEnititesPerPage(Set<Entity> entities) {
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
@ -66,301 +145,59 @@ public class EntityRedactionService {
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
classifiedDoc.getEntities()
.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry(), entity.getEngines(), entity.getReferences()));
}
}
dictionaryService.updateExternalDictionary(dictionary, ruleSetId);
classifiedDoc.setDictionaryVersion(dictionary.getVersion());
classifiedDoc.setRulesVersion(rulesVersion);
return entitiesPerPage;
}
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
private Map<Integer, Set<Entity>> getHintsPerSection(Set<Entity> entities, Dictionary dictionary) {
Set<Entity> documentEntities = new HashSet<>();
AtomicInteger sectionNumber = new AtomicInteger(1);
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
List<Table> tables = paragraph.getTables();
for (Table table : tables) {
if (table.getColCount() == 2) {
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
} else {
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
}
sectionNumber.incrementAndGet();
Map<Integer, Set<Entity>> hintsPerSectionNumber = new HashMap<>();
entities.stream().forEach(entity -> {
if (dictionary.isHint(entity.getType()) && entity.isDictionaryEntry()) {
hintsPerSectionNumber.computeIfAbsent(entity.getSectionNumber(), (x) -> new HashSet<>()).add(entity);
}
sectionSearchableTextPairs.add(processText(classifiedDoc, paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph
.getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, paragraph
.getImages()));
sectionNumber.incrementAndGet();
}
for (Header header : classifiedDoc.getHeaders()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (Footer footer : classifiedDoc.getFooters()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText
.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
documentEntities.addAll(analysedRowSection.getEntities());
for (Image image : analysedRowSection.getImages()) {
classifiedDoc.getImages().computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
analysedRowSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)) {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedRowSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
});
});
return documentEntities;
return hintsPerSectionNumber;
}
private List<SectionSearchableTextPair> processTablePerRow(Document classifiedDoc, Table table,
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
private void addLocalValuesToDictionary(Section analysedSection, Dictionary dictionary) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
analysedSection.getLocalDictionaryAdds().keySet().forEach(key -> {
if (dictionary.isRecommendation(key)) {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
if (!dictionary.containsValue(key, value)) {
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
}
});
} else {
analysedSection.getLocalDictionaryAdds().get(key).forEach(value -> {
for (List<Cell> row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
Map<String, CellValue> tabularData = new HashMap<>();
int start = 0;
List<Integer> cellStarts = new ArrayList<>();
SectionText sectionText = new SectionText();
for (Cell cell : row) {
if (dictionary.getLocalAccessMap().get(key) == null) {
log.warn("Dictionary {} is null", key);
}
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
}
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getSectionAreas().add(sectionArea);
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber.intValue());
int cellStart = start;
if (!cell.isHeaderCell()) {
cell.getHeaderCells().forEach(headerCell -> {
StringBuilder headerBuilder = new StringBuilder();
headerCell.getTextBlocks().forEach(textBlock -> headerBuilder.append(textBlock.getText()));
String headerName = headerBuilder.toString()
.replaceAll("\n", "")
.replaceAll(" ", "")
.replaceAll("-", "");
sectionArea.setHeader(headerName);
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
});
}
for (TextBlock textBlock : cell.getTextBlocks()) {
// TODO avoid cell overlap merging.
searchableRow.addAll(textBlock.getSequences());
}
cellStarts.add(cellStart);
start = start + cell.toString().trim().length() + 1;
if (dictionary.getLocalAccessMap().get(key).getLocalEntries() == null) {
log.warn("Dictionary {} localEntries is null", key);
}
dictionary.getLocalAccessMap().get(key).getLocalEntries().add(value);
});
}
Set<Entity> rowEntities = findEntities(searchableRow, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(rowEntities, searchableRow, dictionary, cellStarts);
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
.collect(Collectors.toSet()) : rowEntities)
.text(searchableRow.getAsStringWithLinebreaks())
.searchText(searchableRow.toString())
.headline(table.getHeadline())
.sectionNumber(sectionNumber.intValue())
.tabularData(tabularData)
.searchableText(searchableRow)
.dictionary(dictionary)
.build(), searchableRow));
if (!local) {
sectionText.setText(searchableRow.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(true);
sectionText.setTabularData(tabularData);
sectionText.setCellStarts(cellStarts);
classifiedDoc.getSectionText().add(sectionText);
}
sectionNumber.incrementAndGet();
}
return sectionSearchableTextPairs;
});
}
private List<SectionSearchableTextPair> processTableAsOneText(Document classifiedDoc, Table table,
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
SearchableText entireTableText = new SearchableText();
SectionText sectionText = new SectionText();
for (List<Cell> row : table.getRows()) {
for (Cell cell : row) {
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
}
if (!local) {
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
sectionText.getSectionAreas().add(sectionArea);
}
for (TextBlock textBlock : cell.getTextBlocks()) {
entireTableText.addAll(textBlock.getSequences());
}
addSectionToManualRedactions(cell.getTextBlocks(), manualRedactions, table.getHeadline(), sectionNumber.intValue());
}
}
Set<Entity> rowEntities = findEntities(entireTableText, table.getHeadline(), sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(rowEntities, entireTableText, dictionary);
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
.concat(rowEntities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
.collect(Collectors.toSet()) : rowEntities)
.text(entireTableText.getAsStringWithLinebreaks())
.searchText(entireTableText.toString())
.headline(table.getHeadline())
.sectionNumber(sectionNumber.intValue())
.searchableText(entireTableText)
.dictionary(dictionary)
.build(), entireTableText));
if (!local) {
sectionText.setText(entireTableText.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(true);
classifiedDoc.getSectionText().add(sectionText);
}
return sectionSearchableTextPairs;
}
private SectionSearchableTextPair processText(Document classifiedDoc, SearchableText searchableText,
List<TextBlock> paragraphTextBlocks, String headline,
ManualRedactions manualRedactions, AtomicInteger sectionNumber,
Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<PdfImage> images) {
if (!local) {
SectionText sectionText = new SectionText();
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage());
sectionText.getSectionAreas().add(sectionArea);
}
sectionText.setText(searchableText.toString());
sectionText.setHeadline(headline);
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(false);
sectionText.setImages(images.stream()
.map(image -> convert(image, sectionNumber.intValue(), headline))
.collect(Collectors.toSet()));
sectionText.setTextBlocks(paragraphTextBlocks);
classifiedDoc.getSectionText().add(sectionText);
}
addSectionToManualRedactions(paragraphTextBlocks, manualRedactions, headline, sectionNumber.intValue());
Set<Entity> entities = findEntities(searchableText, headline, sectionNumber.intValue(), dictionary, local);
surroundingWordsService.addSurroundingText(entities, searchableText, dictionary);
return new SectionSearchableTextPair(Section.builder()
.isLocal(local)
.dictionaryTypes(dictionary.getTypes())
.entities(hintsPerSectionNumber != null && hintsPerSectionNumber.containsKey(sectionNumber.intValue()) ? Stream
.concat(entities.stream(), hintsPerSectionNumber.get(sectionNumber.intValue()).stream())
.collect(Collectors.toSet()) : entities)
.text(searchableText.getAsStringWithLinebreaks())
.searchText(searchableText.toString())
.headline(headline)
.sectionNumber(sectionNumber.intValue())
.searchableText(searchableText)
.dictionary(dictionary)
.images(images.stream()
.map(image -> convert(image, sectionNumber.intValue(), headline))
.collect(Collectors.toSet()))
.build(), searchableText);
}
public Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local) {
private Set<Entity> findEntities(SearchableText searchableText, String headline, int sectionNumber,
Dictionary dictionary, boolean local, NerEntities nerEntities,
List<Integer> cellstarts) {
Set<Entity> found = new HashSet<>();
String searchableString = searchableText.toString();
@ -371,47 +208,52 @@ public class EntityRedactionService {
String lowercaseInputString = searchableString.toLowerCase();
for (DictionaryModel model : dictionary.getDictionaryModels()) {
if (model.isCaseInsensitive()) {
found.addAll(EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model.getType(), headline, sectionNumber, local, model.isDossierDictionary()));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(lowercaseInputString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
} else {
found.addAll(EntitySearchUtils.find(searchableString, model.getValues(local), model.getType(), headline, sectionNumber, local, model.isDossierDictionary()));
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, model.getValues(local), model
.getType(), headline, sectionNumber, !local, model.isDossierDictionary(), Engine.DICTIONARY));
}
}
if (!local) {
Map<String, Set<String>> nerValuesPerType = getNerValues(sectionNumber, nerEntities, cellstarts);
nerValuesPerType.entrySet().forEach(entry -> {
EntitySearchUtils.addOrAddEngine(found, EntitySearchUtils.find(searchableString, entry.getValue(), entry
.getKey(), headline, sectionNumber, false, false, Engine.NER));
});
}
return EntitySearchUtils.clearAndFindPositions(found, searchableText, dictionary);
}
private void addSectionToManualRedactions(List<TextBlock> textBlocks, ManualRedactions manualRedactions,
String section, int sectionNumber) {
private Map<String, Set<String>> getNerValues(int sectionNumber, NerEntities nerEntities,
List<Integer> cellstarts) {
if (manualRedactions == null || manualRedactions.getEntriesToAdd().isEmpty()) {
return;
}
Map<String, Set<String>> nerValuesPerType = new HashMap<>();
for (TextBlock textBlock : textBlocks) {
for (ManualRedactionEntry manualRedactionEntry : manualRedactions.getEntriesToAdd()) {
for (Rectangle rectangle : manualRedactionEntry.getPositions()) {
if (textBlock.contains(rectangle)) {
manualRedactionEntry.setSection(section);
manualRedactionEntry.setSectionNumber(sectionNumber);
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities.getResult()
.containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellstarts == null || cellstarts.isEmpty()) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
} else {
boolean intersectsCellStart = false;
for (Integer cellStart : cellstarts) {
if (res.getStartOffset() < cellStart && cellStart < res.getEndOffset()) {
intersectsCellStart = true;
}
}
if (!intersectsCellStart) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
}
}
}
});
}
}
private Image convert(PdfImage pdfImage, int sectionNumber, String headline) {
return Image.builder()
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
.name()
.toLowerCase(Locale.ROOT))
.position(pdfImage.getPosition())
.sectionNumber(sectionNumber)
.section(headline)
.page(pdfImage.getPage())
.build();
return nerValuesPerType;
}
}

View File

@ -0,0 +1,51 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.stream.Collectors;
import org.apache.commons.codec.binary.Base64;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.server.client.EntityRecognitionClient;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionRequest;
import com.iqser.red.service.redaction.v1.server.client.model.EntityRecognitionSection;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class NerAnalyserService {
private final RedactionStorageService redactionStorageService;
private final EntityRecognitionClient entityRecognitionClient;
private final RedactionServiceSettings redactionServiceSettings;
public void computeNerEntities(String dossierId, String fileId) {
if (redactionServiceSettings.isEnableEntityRecognition()) {
var text = redactionStorageService.getText(dossierId, fileId);
long start = System.currentTimeMillis();
var nerRequest = EntityRecognitionRequest.builder()
.data(text.getSectionTexts()
.stream()
.map(sectionText -> new EntityRecognitionSection(sectionText.getSectionNumber(), new String(Base64
.encodeBase64(sectionText
.getText().getBytes()))))
.collect(Collectors.toList()))
.build();
var nerResponse = entityRecognitionClient.findAuthors(nerRequest);
log.info("Computing NER entities took: {} ms for dossierId {} and fileId {}", System.currentTimeMillis() - start, dossierId, fileId);
redactionStorageService.storeObject(dossierId, fileId, FileType.NER_ENTITIES, nerResponse);
}
}
}

View File

@ -1,296 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@RequiredArgsConstructor
public class ReanalyzeService {
private final DictionaryService dictionaryService;
private final DroolsExecutionService droolsExecutionService;
private final SurroundingWordsService surroundingWordsService;
private final EntityRedactionService entityRedactionService;
private final RedactionLogCreatorService redactionLogCreatorService;
private final RedactionStorageService redactionStorageService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionChangeLogService redactionChangeLogService;
private final AnalyzeResponseService analyzeResponseService;
private final LegalBasisClient legalBasisClient;
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var pageCount = 0;
Document classifiedDoc;
try {
var storedObjectStream = redactionStorageService.getStoredObject(RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest
.getProjectId(), analyzeRequest.getFileId(), FileType.ORIGIN));
classifiedDoc = pdfSegmentationService.parseDocument(storedObjectStream);
pageCount = classifiedDoc.getPages().size();
} catch (Exception e) {
throw new RedactionException(e);
}
log.info("Document structure analysis successful, starting redaction analysis...");
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getRuleSetId(), analyzeRequest.getManualRedactions(), analyzeRequest
.getProjectId());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
.getRuleSetId());
log.info("Redaction analysis successful...");
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getRuleSetId());
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(),legalBasis,
analyzeRequest.getRuleSetId(),
classifiedDoc.getDictionaryVersion().getRulesetVersion(),
classifiedDoc.getRulesVersion(),
classifiedDoc.getDictionaryVersion().getDossierVersion(),
legalBasisClient.getVersion(analyzeRequest.getRuleSetId()));
log.info("Analyzed with rules {} and dictionary {} for ruleSet: {}", classifiedDoc.getRulesVersion(), classifiedDoc
.getDictionaryVersion(), analyzeRequest.getRuleSetId());
// first create changelog - this only happens when we migrate files analyzed via the old process and we don't want to loose changeLog data
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
// store redactionLog
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.TEXT, new Text(pageCount, classifiedDoc
.getSectionText()));
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.SECTION_GRID, classifiedDoc
.getSectionGrid());
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, pageCount, redactionLog, changeLog);
}
@SneakyThrows
public AnalyzeResult reanalyze(@RequestBody AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
var redactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
var text = redactionStorageService.getText(analyzeRequest.getProjectId(), analyzeRequest.getFileId());
// not yet ready for reanalysis
if (redactionLog == null || text == null || text.getNumberOfPages() == 0) {
return analyze(analyzeRequest);
}
DictionaryIncrement dictionaryIncrement = dictionaryService.getDictionaryIncrements(analyzeRequest.getRuleSetId(), new DictionaryVersion(redactionLog
.getDictionaryVersion(), redactionLog.getDossierDictionaryVersion()), analyzeRequest.getProjectId());
Set<String> manualForceAndRemoveIds = getForceAndRemoveIds(analyzeRequest.getManualRedactions());
Map<String, List<Comment>> comments = null;
Set<ManualRedactionEntry> manualAdds = null;
if (analyzeRequest.getManualRedactions() != null) {
// TODO comments will be removed from redactionLog, so we ignore this first.
comments = analyzeRequest.getManualRedactions().getComments();
manualAdds = analyzeRequest.getManualRedactions().getEntriesToAdd();
}
Set<Integer> sectionsToReanalyse = new HashSet<>();
Map<Integer, Set<Image>> imageEntries = new HashMap<>();
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
if (entry.isManual() || manualForceAndRemoveIds.contains(entry.getId())) {
sectionsToReanalyse.add(entry.getSectionNumber());
}
if (entry.isImage() || entry.getType().equals("image")) {
imageEntries.computeIfAbsent(entry.getSectionNumber(), x -> new HashSet<>()).add(convert(entry));
}
}
for (SectionText sectionText : text.getSectionTexts()) {
if (EntitySearchUtils.sectionContainsAny(sectionText.getText(), dictionaryIncrement.getValues())) {
sectionsToReanalyse.add(sectionText.getSectionNumber());
}
if (manualAdds != null) {
for (SectionArea sectionArea : sectionText.getSectionAreas()) {
for (ManualRedactionEntry manualAdd : manualAdds) {
for (Rectangle manualPosition : manualAdd.getPositions()) {
if (sectionArea.contains(manualPosition)) {
manualAdd.setSection(sectionText.getHeadline());
manualAdd.setSectionNumber(sectionText.getSectionNumber());
}
}
}
}
}
}
log.info("Should reanalyze {} sections for request: {}", sectionsToReanalyse.size(), analyzeRequest);
if (sectionsToReanalyse.isEmpty() && (manualAdds == null || manualAdds.isEmpty())) {
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
List<SectionText> reanalysisSections = new ArrayList<>();
for (SectionText sectionText : text.getSectionTexts()) {
if (sectionsToReanalyse.contains(sectionText.getSectionNumber())) {
reanalysisSections.add(sectionText);
}
}
//--
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getRuleSetId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getRuleSetId(), analyzeRequest.getProjectId());
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
if (reanalysisSection.getCellStarts() != null) {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
.getCellStarts());
} else {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary);
}
sectionSearchableTextPairs.add(new SectionSearchableTextPair(Section.builder()
.isLocal(false)
.dictionaryTypes(dictionary.getTypes())
.entities(entities)
.text(reanalysisSection.getSearchableText().getAsStringWithLinebreaks())
.searchText(reanalysisSection.getSearchableText().toString())
.headline(reanalysisSection.getHeadline())
.sectionNumber(reanalysisSection.getSectionNumber())
.tabularData(reanalysisSection.getTabularData())
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.build(), reanalysisSection.getSearchableText()));
}
Set<Entity> entities = new HashSet<>();
Map<Integer, Set<Image>> imagesPerPage = new HashMap<>();
sectionSearchableTextPairs.forEach(sectionSearchableTextPair -> {
Section analysedRowSection = droolsExecutionService.executeRules(kieContainer, sectionSearchableTextPair.getSection());
entities.addAll(analysedRowSection.getEntities());
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
for (Image image : analysedRowSection.getImages()) {
imagesPerPage.computeIfAbsent(image.getPage(), (a) -> new HashSet<>()).add(image);
}
});
Map<Integer, List<Entity>> entitiesPerPage = new HashMap<>();
for (Entity entity : entities) {
Map<Integer, List<EntityPositionSequence>> sequenceOnPage = new HashMap<>();
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
sequenceOnPage.computeIfAbsent(entityPositionSequence.getPageNumber(), (x) -> new ArrayList<>())
.add(entityPositionSequence);
}
for (Map.Entry<Integer, List<EntityPositionSequence>> entry : sequenceOnPage.entrySet()) {
entitiesPerPage.computeIfAbsent(entry.getKey(), (x) -> new ArrayList<>())
.add(new Entity(entity.getWord(), entity.getType(), entity.isRedaction(), entity.getRedactionReason(), entry
.getValue(), entity.getHeadline(), entity.getMatchedRule(), entity.getSectionNumber(), entity
.getLegalBasis(), entity.isDictionaryEntry(), entity.getTextBefore(), entity.getTextAfter(), entity
.getStart(), entity.getEnd(), entity.isDossierDictionaryEntry()));
}
}
List<RedactionLogEntry> newRedactionLogEntries = new ArrayList<>();
for (int page = 1; page <= text.getNumberOfPages(); page++) {
if (entitiesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addEntries(entitiesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
.getRuleSetId()));
}
if (imagesPerPage.get(page) != null) {
newRedactionLogEntries.addAll(redactionLogCreatorService.addImageEntries(imagesPerPage, analyzeRequest.getManualRedactions(), page, analyzeRequest
.getRuleSetId()));
}
newRedactionLogEntries.addAll(redactionLogCreatorService.addManualAddEntries(manualAdds, comments, page, analyzeRequest
.getRuleSetId()));
}
redactionLog.getRedactionLogEntry()
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
}
private AnalyzeResult finalizeAnalysis(@RequestBody AnalyzeRequest analyzeRequest, long startTime,
RedactionLog redactionLog, Text text,
DictionaryIncrement dictionaryIncrement) {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getRulesetVersion());
redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion());
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
long duration = System.currentTimeMillis() - startTime;
return analyzeResponseService.createAnalyzeResponse(analyzeRequest.getProjectId(), analyzeRequest.getFileId(), duration, text
.getNumberOfPages(), redactionLog, changeLog);
}
private Set<String> getForceAndRemoveIds(ManualRedactions manualRedactions) {
if (manualRedactions == null) {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))
.collect(Collectors.toSet());
}
public Image convert(RedactionLogEntry entry) {
Rectangle position = entry.getPositions().get(0);
return Image.builder()
.type(entry.getType())
.position(new RedRectangle2D(position.getTopLeft().getX(), position.getTopLeft()
.getY(), position.getWidth(), position.getHeight()))
.sectionNumber(entry.getSectionNumber())
.section(entry.getSection())
.page(position.getPage())
.build();
}
}

View File

@ -1,19 +1,25 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.ChangeType;
import com.iqser.red.service.redaction.v1.model.RedactionChangeLog;
import com.iqser.red.service.redaction.v1.model.RedactionChangeLogEntry;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.model.Change;
import com.iqser.red.service.redaction.v1.model.ChangeType;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogChanges;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@ -22,72 +28,80 @@ public class RedactionChangeLogService {
private final RedactionStorageService redactionStorageService;
public RedactionChangeLog createAndStoreChangeLog(String projectId, String fileId, RedactionLog currentRedactionLog) {
try {
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(projectId, fileId);
var changeLog = createChangeLog(currentRedactionLog, previousRedactionLog);
redactionStorageService.storeObject(projectId, fileId, FileType.REDACTION_CHANGELOG, changeLog);
return changeLog;
} catch (Exception e) {
log.debug("Previous redaction log not available");
return null;
}
public RedactionLogChanges computeChanges(String dossierId, String fileId, RedactionLog currentRedactionLog) {
}
private RedactionChangeLog createChangeLog(RedactionLog currentRedactionLog, RedactionLog previousRedactionLog) {
long start = System.currentTimeMillis();
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(dossierId, fileId);
if (previousRedactionLog == null) {
return null;
currentRedactionLog.getRedactionLogEntry().forEach(entry -> {
entry.getChanges().add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
});
return new RedactionLogChanges(currentRedactionLog, false);
}
List<RedactionLogEntry> added = new ArrayList<>(currentRedactionLog.getRedactionLogEntry());
added.removeAll(previousRedactionLog.getRedactionLogEntry());
List<RedactionLogEntry> notRemovedPreviousEntries = previousRedactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> !entry.getChanges()
.get(entry.getChanges().size() - 1)
.getType()
.equals(ChangeType.REMOVED))
.collect(Collectors.toList());
List<RedactionLogEntry> removed = new ArrayList<>(previousRedactionLog.getRedactionLogEntry());
Set<RedactionLogEntry> added = new HashSet<>(currentRedactionLog.getRedactionLogEntry());
added.removeAll(notRemovedPreviousEntries);
Set<RedactionLogEntry> removed = new HashSet<>(notRemovedPreviousEntries);
removed.removeAll(currentRedactionLog.getRedactionLogEntry());
List<RedactionChangeLogEntry> changeLogEntries = added.stream()
.map(entry -> convert(entry, ChangeType.ADDED))
.collect(Collectors.toList());
changeLogEntries.addAll(removed.stream()
.map(entry -> convert(entry, ChangeType.REMOVED))
.collect(Collectors.toList()));
Map<String, RedactionLogEntry> addedIds = new HashMap<>();
added.forEach(entry -> {
addedIds.put(entry.getId(), entry);
});
return new RedactionChangeLog(changeLogEntries, currentRedactionLog.getDictionaryVersion(), currentRedactionLog.getRulesVersion(), currentRedactionLog
.getRuleSetId());
}
Set<String> removedIds = new HashSet<>();
removed.forEach(entry -> {
removedIds.add(entry.getId());
});
List<RedactionLogEntry> newRedactionLogEntries = previousRedactionLog.getRedactionLogEntry();
private RedactionChangeLogEntry convert(RedactionLogEntry entry, ChangeType changeType) {
List<RedactionLogEntry> toRemove = new ArrayList<>();
newRedactionLogEntries.forEach(entry -> {
if (removedIds.contains(entry.getId()) && addedIds.containsKey(entry.getId())) {
List<Change> changes = entry.getChanges();
changes.add(new Change(ChangeType.CHANGED, OffsetDateTime.now()));
var newEntry = addedIds.get(entry.getId());
newEntry.setChanges(changes);
addedIds.put(entry.getId(), newEntry);
toRemove.add(entry);
} else if (removedIds.contains(entry.getId())) {
entry.getChanges().add(new Change(ChangeType.REMOVED, OffsetDateTime.now()));
} else if (addedIds.containsKey(entry.getId())) {
List<Change> changes = entry.getChanges();
changes.add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
var newEntry = addedIds.get(entry.getId());
newEntry.setChanges(changes);
addedIds.put(entry.getId(), newEntry);
toRemove.add(entry);
}
});
return RedactionChangeLogEntry.builder()
.id(entry.getId())
.type(entry.getType())
.value(entry.getValue())
.reason(entry.getReason())
.matchedRule(entry.getMatchedRule())
.legalBasis(entry.getLegalBasis())
.redacted(entry.isRedacted())
.isHint(entry.isHint())
.isRecommendation(entry.isRecommendation())
.section(entry.getSection())
.color(entry.getColor())
.positions(entry.getPositions())
.sectionNumber(entry.getSectionNumber())
.manual(entry.isManual())
.status(entry.getStatus())
.manualRedactionType(entry.getManualRedactionType())
.isDictionaryEntry(entry.isDictionaryEntry())
.textBefore(entry.getTextBefore())
.textAfter(entry.getTextAfter())
.comments(entry.getComments())
.changeType(changeType)
.isDossierDictionaryEntry(entry.isDossierDictionaryEntry())
.build();
newRedactionLogEntries.removeAll(toRemove);
addedIds.forEach((k, v) -> {
if(v.getChanges().isEmpty()) {
v.getChanges().add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
}
newRedactionLogEntries.add(v);
});
currentRedactionLog.setRedactionLogEntry(newRedactionLogEntries);
log.info("Change computation took: {}", System.currentTimeMillis() - start);
return new RedactionLogChanges(currentRedactionLog, !addedIds.isEmpty() || !removedIds.isEmpty());
}
}

View File

@ -1,22 +1,5 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@ -24,6 +7,22 @@ import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.parsing.model.RedTextPosition;
import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class RedactionLogCreatorService {
@ -31,35 +30,27 @@ public class RedactionLogCreatorService {
private final DictionaryService dictionaryService;
public void createRedactionLog(Document classifiedDoc, int numberOfPages, ManualRedactions manualRedactions,
String ruleSetId) {
public List<RedactionLogEntry> createRedactionLog(PageEntities pageEntities, int numberOfPages,
String dossierTemplateId) {
Set<Integer> manualRedactionPages = getManualRedactionPages(manualRedactions);
List<RedactionLogEntry> entries = new ArrayList<>();
for (int page = 1; page <= numberOfPages; page++) {
addSectionGrid(classifiedDoc, page);
if (classifiedDoc.getEntities().get(page) != null) {
classifiedDoc.getRedactionLogEntities()
.addAll(addEntries(classifiedDoc.getEntities(), manualRedactions, page, ruleSetId));
if (pageEntities.getEntitiesPerPage().get(page) != null) {
entries.addAll(addEntries(pageEntities.getEntitiesPerPage(), page, dossierTemplateId));
}
if (manualRedactionPages.contains(page)) {
classifiedDoc.getRedactionLogEntities()
.addAll(addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, ruleSetId));
}
if (classifiedDoc.getImages().get(page) != null && !classifiedDoc.getImages().get(page).isEmpty()) {
classifiedDoc.getRedactionLogEntities()
.addAll(addImageEntries(classifiedDoc.getImages(), manualRedactions, page, ruleSetId));
if (pageEntities.getImagesPerPage().get(page) != null) {
entries.addAll(addImageEntries(pageEntities.getImagesPerPage(), page, dossierTemplateId));
}
}
return entries;
}
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, ManualRedactions manualRedactions,
int pageNumber, String ruleSetId) {
public List<RedactionLogEntry> addImageEntries(Map<Integer, Set<Image>> images, int pageNumber,
String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
@ -69,14 +60,14 @@ public class RedactionLogCreatorService {
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
.id(id)
.color(getColorForImage(image, ruleSetId, false))
.color(getColor(image.getType(), dossierTemplateId, image.isRedaction()))
.isImage(true)
.type(image.getType())
.redacted(image.isRedaction())
.reason(image.getRedactionReason())
.legalBasis(image.getLegalBasis())
.matchedRule(image.getMatchedRule())
.isHint(dictionaryService.isHint(image.getType(), ruleSetId))
.isHint(dictionaryService.isHint(image.getType(), dossierTemplateId))
.manual(false)
.isDictionaryEntry(false)
.isRecommendation(false)
@ -85,62 +76,9 @@ public class RedactionLogCreatorService {
.getWidth(), (float) image.getPosition().getHeight(), pageNumber)))
.sectionNumber(image.getSectionNumber())
.section(image.getSection())
.imageHasTransparency(image.isHasTransparency())
.build();
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(id)) {
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
image.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = image.getRedactionReason() + ", removed by manual override";
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to remove";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(id)) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
image.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, false));
manualOverrideReason = image.getRedactionReason() + ", forced by manual override";
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to force redact";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, ruleSetId, true));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
redactionLogEntities.add(redactionLogEntry);
}
@ -148,25 +86,7 @@ public class RedactionLogCreatorService {
}
private Set<Integer> getManualRedactionPages(ManualRedactions manualRedactions) {
Set<Integer> manualRedactionPages = new HashSet<>();
if (manualRedactions == null) {
return manualRedactionPages;
}
manualRedactions.getEntriesToAdd().forEach(entry -> {
entry.getPositions().forEach(pos -> {
manualRedactionPages.add(pos.getPage());
});
});
return manualRedactionPages;
}
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, ManualRedactions manualRedactions,
int page, String ruleSetId) {
public List<RedactionLogEntry> addEntries(Map<Integer, List<Entity>> entities, int page, String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
@ -176,11 +96,9 @@ public class RedactionLogCreatorService {
entityLoop:
for (Entity entity : entities.get(page)) {
List<Comment> comments = null;
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, ruleSetId);
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(entity, dossierTemplateId);
if (processedIds.contains(entityPositionSequence.getId())) {
// TODO refactor this outer loop jump as soon as we have the time.
@ -189,60 +107,7 @@ public class RedactionLogCreatorService {
processedIds.add(entityPositionSequence.getId());
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(entityPositionSequence.getId())) {
comments = manualRedactions.getComments().get(manualRemoval.getId());
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
entity.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
redactionLogEntry.setColor(getColor(entity, ruleSetId, false));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = entity.getRedactionReason() + ", requested to remove";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(entity, ruleSetId, true));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(entityPositionSequence.getId())) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
entity.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColor(entity, ruleSetId, false));
manualOverrideReason = entity.getRedactionReason() + ", forced by manual override";
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = entity.getRedactionReason() + ", requested to force redact";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(entity, ruleSetId, true));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
redactionLogEntry.setId(entityPositionSequence.getId());
if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
List<Rectangle> rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences()
@ -250,17 +115,10 @@ public class RedactionLogCreatorService {
.flatMap(seq -> seq.getTextPositions().stream())
.collect(Collectors.toList()), page);
if (manualRedactions != null) {
comments = manualRedactions.getComments().get(entityPositionSequence.getId());
}
redactionLogEntry.setComments(comments);
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
}
redactionLogEntry.setId(entityPositionSequence.getId());
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
redactionLogEntities.add(redactionLogEntry);
@ -276,20 +134,22 @@ public class RedactionLogCreatorService {
List<Rectangle> rectangles = new ArrayList<>();
if (textPositions.size() == 1) {
rectangles.add( TextPositionSequence.fromData(textPositions, page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle());
} else {
float y = textPositions.get(0).getYDirAdj();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) {
float yDirAdj = textPositions.get(i).getYDirAdj();
if (yDirAdj != y) {
rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, i), page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page)
.getRectangle());
y = yDirAdj;
startIndex = i;
}
}
if (startIndex != textPositions.size()) {
rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page)
.getRectangle());
}
}
@ -297,80 +157,20 @@ public class RedactionLogCreatorService {
}
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds,
Map<String, List<Comment>> comments, int page,
String ruleSetId) {
private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntities = new ArrayList<>();
if (manualAdds == null) {
return redactionLogEntities;
}
for (ManualRedactionEntry manualRedactionEntry : manualAdds) {
String id = manualRedactionEntry.getId();
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, id, ruleSetId);
List<Rectangle> rectanglesOnPage = new ArrayList<>();
for (Rectangle rectangle : manualRedactionEntry.getPositions()) {
if (page == rectangle.getPage()) {
rectanglesOnPage.add(rectangle);
redactionLogEntry.getPositions().add(rectangle);
}
}
redactionLogEntry.setComments(comments.get(id));
if (!rectanglesOnPage.isEmpty() && !approvedAndShouldBeInDictionary(manualRedactionEntry)) {
redactionLogEntities.add(redactionLogEntry);
}
}
return redactionLogEntities;
}
private boolean approvedAndShouldBeInDictionary(ManualRedactionEntry manualRedactionEntry) {
return manualRedactionEntry.getStatus().equals(Status.APPROVED) && manualRedactionEntry.isAddToDictionary();
}
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
String ruleSetId) {
Set<String> referenceIds = new HashSet<>();
entity.getReferences().forEach(ref -> ref.getPositionSequences().forEach(pos -> referenceIds.add(pos.getId())));
return RedactionLogEntry.builder()
.id(id)
.color(getColorForManualAdd(manualRedactionEntry.getType(), ruleSetId, manualRedactionEntry.getStatus()))
.reason(manualRedactionEntry.getReason())
.legalBasis(manualRedactionEntry.getLegalBasis())
.value(manualRedactionEntry.getValue())
.type(manualRedactionEntry.getType())
.redacted(true)
.isHint(false)
.section(manualRedactionEntry.getSection())
.sectionNumber(manualRedactionEntry.getSectionNumber())
.manual(true)
.status(manualRedactionEntry.getStatus())
.manualRedactionType(ManualRedactionType.ADD)
.isDictionaryEntry(false)
.isDossierDictionaryEntry(manualRedactionEntry.isAddToDossierDictionary())
.build();
}
private RedactionLogEntry createRedactionLogEntry(Entity entity, String ruleSetId) {
return RedactionLogEntry.builder()
.color(getColor(entity, ruleSetId, false))
.color(getColor(entity.getType(), dossierTemplateId, entity.isRedaction()))
.reason(entity.getRedactionReason())
.legalBasis(entity.getLegalBasis())
.value(entity.getWord())
.type(entity.getType())
.redacted(entity.isRedaction())
.isHint(isHint(entity, ruleSetId))
.isRecommendation(isRecommendation(entity, ruleSetId))
.isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(isRecommendation(entity.getType(), dossierTemplateId))
.section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule())
@ -380,104 +180,30 @@ public class RedactionLogCreatorService {
.startOffset(entity.getStart())
.endOffset(entity.getEnd())
.isDossierDictionaryEntry(entity.isDossierDictionaryEntry())
.engines(entity.getEngines())
.reference(referenceIds)
.build();
}
private float[] getColor(Entity entity, String ruleSetId, boolean requestedToRemove) {
private float[] getColor(String type, String dossierTemplateId, boolean isRedaction) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor(ruleSetId);
if (!isRedaction && !isHint(type, dossierTemplateId)) {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
if (!entity.isRedaction() && !isHint(entity, ruleSetId)) {
return dictionaryService.getNotRedactedColor(ruleSetId);
}
return dictionaryService.getColor(entity.getType(), ruleSetId);
return dictionaryService.getColor(type, dossierTemplateId);
}
private float[] getColorForManualAdd(String type, String ruleSetId, Status status) {
private boolean isHint(String type, String dossierTemplateId) {
if (status.equals(Status.REQUESTED)) {
return dictionaryService.getRequestAddColor(ruleSetId);
} else if (status.equals(Status.DECLINED)) {
return dictionaryService.getNotRedactedColor(ruleSetId);
}
return getColor(type, ruleSetId);
return dictionaryService.isHint(type, dossierTemplateId);
}
private float[] getColor(String type, String ruleSetId) {
private boolean isRecommendation(String type, String dossierTemplateId) {
return dictionaryService.getColor(type, ruleSetId);
}
private float[] getColorForImage(Image image, String ruleSetId, boolean requestedToRemove) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor(ruleSetId);
}
if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), ruleSetId)) {
return dictionaryService.getNotRedactedColor(ruleSetId);
}
return dictionaryService.getColor(image.getType(), ruleSetId);
}
private boolean isHint(Entity entity, String ruleSetId) {
return dictionaryService.isHint(entity.getType(), ruleSetId);
}
private boolean isRecommendation(Entity entity, String ruleSetId) {
return dictionaryService.isRecommendation(entity.getType(), ruleSetId);
}
private void addSectionGrid(Document classifiedDoc, int page) {
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
if (textBlock.getPage() != page) {
continue;
}
if (textBlock instanceof TextBlock) {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
} else if (textBlock instanceof Table) {
List<CellRectangle> cellRectangles = new ArrayList<>();
for (List<Cell> row : ((Table) textBlock).getRows()) {
for (Cell cell : row) {
if (cell != null) {
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight()));
}
}
}
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
}
}
}
return dictionaryService.isRecommendation(type, dossierTemplateId);
}
}

View File

@ -0,0 +1,328 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Comment;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.ManualLegalBasisChange;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactionType;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.Status;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class RedactionLogMergeService {
private final DictionaryService dictionaryService;
public RedactionLog mergeRedactionLogData(RedactionLog redactionLog, String dossierTemplateId,
ManualRedactions manualRedactions, Set<Integer> excludedPages) {
log.info("Merging Redaction log with manual redactions {}", manualRedactions);
if (manualRedactions != null) {
var manualRedactionLogEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), dossierTemplateId);
redactionLog.getRedactionLogEntry().addAll(manualRedactionLogEntries);
var manualRedactionWrappers = createManualRedactionWrappers(manualRedactions);
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
processRedactionLogEntry(manualRedactionWrappers.stream()
.filter(mr -> entry.getId().equals(mr.getId()))
.collect(Collectors.toList()), dossierTemplateId, entry);
entry.setComments(manualRedactions.getComments().get(entry.getId()));
if (excludedPages != null && !excludedPages.isEmpty()) {
entry.getPositions().forEach(pos -> {
if (excludedPages.contains(pos.getPage())) {
entry.setExcluded(true);
}
});
}
}
}
return redactionLog;
}
private List<ManualRedactionWrapper> createManualRedactionWrappers(ManualRedactions manualRedactions) {
List<ManualRedactionWrapper> manualRedactionWrappers = new ArrayList<>();
manualRedactions.getImageRecategorizations().forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getId(), item.getRequestDate(), item));
}
});
manualRedactions.getIdsToRemove().forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getId(), item.getRequestDate(), item));
}
});
manualRedactions.getForceRedacts().forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getId(), item.getRequestDate(), item));
}
});
manualRedactions.getManualLegalBasisChanges().forEach(item -> {
if (item.getSoftDeletedTime() == null) {
manualRedactionWrappers.add(new ManualRedactionWrapper(item.getId(), item.getRequestDate(), item));
}
});
Collections.sort(manualRedactionWrappers);
return manualRedactionWrappers;
}
private void processRedactionLogEntry(List<ManualRedactionWrapper> manualRedactionWrappers,
String dossierTemplateId, RedactionLogEntry redactionLogEntry) {
manualRedactionWrappers.forEach(mrw -> {
if (mrw.getItem() instanceof ManualImageRecategorization) {
var imageRecategorization = (ManualImageRecategorization) mrw.getItem();
String manualOverrideReason = null;
if (imageRecategorization.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setType(imageRecategorization.getType());
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", recategorized by manual override");
} else if (imageRecategorization.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to recategorize");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry
.isRedacted(), false));
redactionLogEntry.setRecategorizationType(imageRecategorization.getType());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
redactionLogEntry.setManualRedactionUserId(imageRecategorization.getUser());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.RECATEGORIZE);
}
if (mrw.getItem() instanceof IdRemoval) {
var manualRemoval = (IdRemoval) mrw.getItem();
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", removed by manual override");
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry
.isRedacted(), true));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to remove");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry
.isRedacted(), false));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionUserId(manualRemoval.getUser());
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
redactionLogEntry.setDictionaryEntry(manualRemoval.isRemoveFromDictionary());
redactionLogEntry.setDossierDictionaryEntry(manualRemoval.isRemoveFromDictionary());
}
if (mrw.getItem() instanceof ManualForceRedact) {
var manualForceRedact = (ManualForceRedact) mrw.getItem();
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry
.isRedacted(), false));
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", forced by manual override");
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", requested to force redact");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry
.isRedacted(), false));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
redactionLogEntry.setManualRedactionUserId(manualForceRedact.getUser());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
if (mrw.getItem() instanceof ManualLegalBasisChange) {
var manualLegalBasisChange = (ManualLegalBasisChange) mrw.getItem();
String manualOverrideReason = null;
if (manualLegalBasisChange.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", legal basis was manually changed");
redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis());
} else if (manualLegalBasisChange.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(redactionLogEntry.getReason(), ", legal basis change requested");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry
.isRedacted(), false));
redactionLogEntry.setLegalBasisChangeValue(manualLegalBasisChange.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
redactionLogEntry.setManualRedactionUserId(manualLegalBasisChange.getUser());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
}
});
}
private String mergeReasonIfNecessary(String currentReason, String addition) {
if (currentReason != null) {
if (!currentReason.contains(addition)) {
return currentReason + addition;
}
return currentReason;
} else {
return "";
}
}
public List<RedactionLogEntry> addManualAddEntries(Set<ManualRedactionEntry> manualAdds,
Map<String, List<Comment>> comments, String dossierTemplateId) {
List<RedactionLogEntry> redactionLogEntries = new ArrayList<>();
for (ManualRedactionEntry manualRedactionEntry : manualAdds) {
if (!approvedAndShouldBeInDictionary(manualRedactionEntry)) {
RedactionLogEntry redactionLogEntry = createRedactionLogEntry(manualRedactionEntry, manualRedactionEntry
.getId(), dossierTemplateId);
redactionLogEntry.setPositions(manualRedactionEntry.getPositions());
redactionLogEntry.setComments(comments.get(manualRedactionEntry.getId()));
redactionLogEntries.add(redactionLogEntry);
}
}
return redactionLogEntries;
}
private boolean approvedAndShouldBeInDictionary(ManualRedactionEntry manualRedactionEntry) {
return manualRedactionEntry.getStatus()
.equals(Status.APPROVED) && (manualRedactionEntry.isAddToDictionary() || manualRedactionEntry.isAddToDossierDictionary());
}
private RedactionLogEntry createRedactionLogEntry(ManualRedactionEntry manualRedactionEntry, String id,
String dossierTemplateId) {
return RedactionLogEntry.builder()
.id(id)
.color(getColorForManualAdd(manualRedactionEntry.getType(), dossierTemplateId, manualRedactionEntry.getStatus()))
.reason(manualRedactionEntry.getReason())
.isDictionaryEntry(manualRedactionEntry.isAddToDictionary())
.isDossierDictionaryEntry(manualRedactionEntry.isAddToDossierDictionary())
.legalBasis(manualRedactionEntry.getLegalBasis())
.value(manualRedactionEntry.getValue())
.type(manualRedactionEntry.getType())
.redacted(true)
.isHint(false)
.section(null)
.sectionNumber(-1)
.manual(true)
.status(manualRedactionEntry.getStatus())
.manualRedactionType(ManualRedactionType.ADD)
.manualRedactionUserId(manualRedactionEntry.getUser())
.build();
}
private float[] getColor(String type, String dossierTemplateId, boolean requested, boolean isRedaction,
boolean skipped) {
if (requested) {
return dictionaryService.getRequestRemoveColor(dossierTemplateId);
}
if (skipped || !isRedaction && !dictionaryService.isHint(type, dossierTemplateId)) {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
return dictionaryService.getColor(type, dossierTemplateId);
}
private float[] getColorForManualAdd(String type, String dossierTemplateId, Status status) {
if (status.equals(Status.REQUESTED)) {
return dictionaryService.getRequestAddColor(dossierTemplateId);
} else if (status.equals(Status.DECLINED)) {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
return getColor(type, dossierTemplateId);
}
private float[] getColor(String type, String dossierTemplateId) {
return dictionaryService.getColor(type, dossierTemplateId);
}
@Data
@AllArgsConstructor
private static class ManualRedactionWrapper implements Comparable<ManualRedactionWrapper> {
private String id;
private OffsetDateTime date;
private Object item;
@Override
public int compareTo(ManualRedactionWrapper o) {
return this.date.compareTo(o.date);
}
}
}

View File

@ -0,0 +1,76 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.CellRectangle;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.SectionRectangle;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor
public class SectionGridCreatorService {
public void createSectionGrid(Document classifiedDoc, int numberOfPages) {
for (int page = 1; page <= numberOfPages; page++) {
addSectionGrid(classifiedDoc, page);
}
}
private void addSectionGrid(Document classifiedDoc, int page) {
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
for (int i = 0; i <= paragraph.getPageBlocks().size() - 1; i++) {
AbstractTextContainer textBlock = paragraph.getPageBlocks().get(i);
if (textBlock.getPage() != page) {
continue;
}
if (textBlock instanceof TextBlock) {
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size()));
} else if (textBlock instanceof Table) {
List<CellRectangle> cellRectangles = new ArrayList<>();
for (List<Cell> row : ((Table) textBlock).getRows()) {
for (Cell cell : row) {
if (cell != null) {
cellRectangles.add(new CellRectangle(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight()));
}
}
}
classifiedDoc.getSectionGrid()
.getRectanglesPerPage()
.computeIfAbsent(page, (x) -> new ArrayList<>())
.add(new SectionRectangle(new Point(textBlock.getMinX(), textBlock.getMinY()), textBlock.getWidth(), textBlock
.getHeight(), i + 1, paragraph.getPageBlocks().size(), cellRectangles));
}
}
}
}
}

View File

@ -0,0 +1,220 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Point;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Footer;
import com.iqser.red.service.redaction.v1.server.classification.model.Header;
import com.iqser.red.service.redaction.v1.server.classification.model.Paragraph;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.classification.model.UnclassifiedText;
import com.iqser.red.service.redaction.v1.server.redaction.model.CellValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.ImageType;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class SectionTextBuilderService {
public List<SectionText> buildSectionText(Document classifiedDoc) {
List<SectionText> sectionTexts = new ArrayList<>();
AtomicInteger sectionNumber = new AtomicInteger(1);
for (Paragraph paragraph : classifiedDoc.getParagraphs()) {
List<Table> tables = paragraph.getTables();
for (Table table : tables) {
if (table.getColCount() == 2) {
sectionTexts.add(processTableAsOneText(table, sectionNumber));
} else {
sectionTexts.addAll(processTablePerRow(table, sectionNumber));
}
sectionNumber.incrementAndGet();
}
sectionTexts.add(processText(paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph.getHeadline(), sectionNumber, paragraph
.getImages()));
sectionNumber.incrementAndGet();
}
for (Header header : classifiedDoc.getHeaders()) {
sectionTexts.add(processText(header.getSearchableText(), header.getTextBlocks(), "Header", sectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (Footer footer : classifiedDoc.getFooters()) {
sectionTexts.add(processText(footer.getSearchableText(), footer.getTextBlocks(), "Footer", sectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
sectionTexts.add(processText(unclassifiedText.getSearchableText(), unclassifiedText.getTextBlocks(), "", sectionNumber, new ArrayList<>()));
sectionNumber.incrementAndGet();
}
return sectionTexts;
}
private List<SectionText> processTablePerRow(Table table, AtomicInteger sectionNumber) {
List<SectionText> sectionTexts = new ArrayList<>();
for (List<Cell> row : table.getRows()) {
SearchableText searchableRow = new SearchableText();
Map<String, CellValue> tabularData = new HashMap<>();
int start = 0;
List<Integer> cellStarts = new ArrayList<>();
SectionText sectionText = new SectionText();
for (Cell cell : row) {
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
}
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getSectionAreas().add(sectionArea);
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
int cellStart = start;
if (!cell.isHeaderCell()) {
cell.getHeaderCells().forEach(headerCell -> {
StringBuilder headerBuilder = new StringBuilder();
headerCell.getTextBlocks().forEach(textBlock -> headerBuilder.append(textBlock.getText()));
String headerName = headerBuilder.toString()
.replaceAll("\n", "")
.replaceAll(" ", "")
.replaceAll("-", "");
sectionArea.setHeader(headerName);
tabularData.put(headerName, new CellValue(cell.getTextBlocks(), cellStart));
});
}
for (TextBlock textBlock : cell.getTextBlocks()) {
// TODO avoid cell overlap merging.
searchableRow.addAll(textBlock.getSequences());
}
cellStarts.add(cellStart);
start = start + cell.toString().trim().length() + 1;
}
sectionText.setText(searchableRow.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(true);
sectionText.setTabularData(tabularData);
sectionText.setCellStarts(cellStarts);
sectionTexts.add(sectionText);
sectionNumber.incrementAndGet();
}
return sectionTexts;
}
private SectionText processTableAsOneText(Table table, AtomicInteger sectionNumber) {
SearchableText entireTableText = new SearchableText();
SectionText sectionText = new SectionText();
int start = 0;
List<Integer> cellStarts = new ArrayList<>();
for (List<Cell> row : table.getRows()) {
for (Cell cell : row) {
if (CollectionUtils.isEmpty(cell.getTextBlocks())) {
continue;
}
SectionArea sectionArea = new SectionArea(new Point((float) cell.getX(), (float) cell.getY()), (float) cell
.getWidth(), (float) cell.getHeight(), cell.getTextBlocks()
.get(0)
.getSequences()
.get(0)
.getPage());
sectionText.getTextBlocks().addAll(cell.getTextBlocks());
sectionText.getSectionAreas().add(sectionArea);
for (TextBlock textBlock : cell.getTextBlocks()) {
entireTableText.addAll(textBlock.getSequences());
}
cellStarts.add(start);
start = start + cell.toString().trim().length() + 1;
}
}
sectionText.setCellStarts(cellStarts);
sectionText.setText(entireTableText.toString());
sectionText.setHeadline(table.getHeadline());
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(true);
return sectionText;
}
private SectionText processText(SearchableText searchableText, List<TextBlock> paragraphTextBlocks, String headline,
AtomicInteger sectionNumber, List<PdfImage> images) {
SectionText sectionText = new SectionText();
for (TextBlock paragraphTextBlock : paragraphTextBlocks) {
SectionArea sectionArea = new SectionArea(new Point(paragraphTextBlock.getMinX(), paragraphTextBlock.getMinY()), paragraphTextBlock
.getWidth(), paragraphTextBlock.getHeight(), paragraphTextBlock.getPage());
sectionText.getSectionAreas().add(sectionArea);
}
sectionText.setText(searchableText.toString());
sectionText.setHeadline(headline);
sectionText.setSectionNumber(sectionNumber.intValue());
sectionText.setTable(false);
sectionText.setImages(images.stream()
.map(image -> convertImage(image, sectionNumber.intValue(), headline))
.collect(Collectors.toSet()));
sectionText.setTextBlocks(paragraphTextBlocks);
return sectionText;
}
private Image convertImage(PdfImage pdfImage, int sectionNumber, String headline) {
return Image.builder()
.type(pdfImage.getImageType().equals(ImageType.OTHER) ? "image" : pdfImage.getImageType()
.name()
.toLowerCase(Locale.ROOT))
.position(pdfImage.getPosition())
.sectionNumber(sectionNumber)
.section(headline)
.page(pdfImage.getPage())
.hasTransparency(pdfImage.isHasTransparency())
.build();
}
}

View File

@ -1,10 +1,12 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrementValue;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.SearchableText;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
@ -17,7 +19,6 @@ import java.util.stream.Collectors;
@SuppressWarnings("PMD")
public class EntitySearchUtils {
public boolean sectionContainsAny(String sectionText, Set<DictionaryIncrementValue> values) {
String inputString = sectionText.toLowerCase(Locale.ROOT);
@ -38,9 +39,7 @@ public class EntitySearchUtils {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
if (value.isCaseinsensitive() || !value.isCaseinsensitive() && sectionText.substring(startIndex, stopIndex).equals(value.getValue())) {
return true;
}
return true;
}
} while (startIndex > -1);
}
@ -49,7 +48,7 @@ public class EntitySearchUtils {
public Set<Entity> find(String inputString, Set<String> values, String type, String headline, int sectionNumber,
boolean local, boolean isDossierDictionary) {
boolean isDictionaryEntry, boolean isDossierDictionary, Engine engine) {
Set<Entity> found = new HashSet<>();
@ -69,7 +68,7 @@ public class EntitySearchUtils {
if (startIndex > -1 && (startIndex == 0 || Character.isWhitespace(inputString.charAt(startIndex - 1)) || isSeparator(inputString
.charAt(startIndex - 1))) && (stopIndex == inputString.length() || isSeparator(inputString.charAt(stopIndex)))) {
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, !local, isDossierDictionary));
found.add(new Entity(inputString.substring(startIndex, stopIndex), type, startIndex, stopIndex, headline, sectionNumber, isDictionaryEntry, isDossierDictionary, engine));
}
} while (startIndex > -1);
}
@ -98,8 +97,8 @@ public class EntitySearchUtils {
.sorted(Comparator.comparing(Entity::getStart))
.collect(Collectors.toList());
Entity firstEntity = orderedEntities.get(0);
List<EntityPositionSequence> positionSequences = text.getSequences(firstEntity.getWord().trim(), dictionary.isCaseInsensitiveDictionary(firstEntity
.getType()), firstEntity.getTargetSequences());
List<EntityPositionSequence> positionSequences = text.getSequences(firstEntity.getWord()
.trim(), dictionary.isCaseInsensitiveDictionary(firstEntity.getType()), firstEntity.getTargetSequences());
for (int i = 0; i <= orderedEntities.size() - 1; i++) {
try {
@ -133,6 +132,7 @@ public class EntitySearchUtils {
public void addEntitiesWithHigherRank(Set<Entity> entities, Set<Entity> found, Dictionary dictionary) {
found.forEach(f -> addEntitiesWithHigherRank(entities, f, dictionary));
}
@ -143,14 +143,33 @@ public class EntitySearchUtils {
Entity existing = entities.stream().filter(entity -> entity.equals(found)).findFirst().get();
if (dictionary.getDictionaryRank(existing.getType()) <= dictionary.getDictionaryRank(found.getType())) {
entities.remove(found);
entities.add(found);
} else {
existing.getEngines().addAll(found.getEngines());
}
} else {
entities.add(found);
}
entities.add(found);
}
public void addEntitiesIgnoreRank(Set<Entity> entities, Set<Entity> found) {
// HashSet keeps old value but we want the new.
entities.removeAll(found);
entities.addAll(found);
}
public void addOrAddEngine(Set<Entity> existing, Set<Entity> toBeAdded){
for(Entity toAdd: toBeAdded){
if (existing.contains(toAdd)) {
Entity existingEntity = existing.stream().filter(entity -> entity.equals(toAdd)).findFirst().get();
existingEntity.getEngines().addAll(toAdd.getEngines());
} else {
existing.add(toAdd);
}
}
}
}

View File

@ -12,7 +12,7 @@ public class TextNormalizationUtilities {
* @return Text without line-break hyphenation.
*/
public static String removeHyphenLineBreaks(String text) {
return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R|\n\r(.+ )", "$1$2");
return text.replaceAll("([^\\s\\d\\-]{2,})[\\-\\u00AD]\\R", "$1");
}
}

View File

@ -0,0 +1,165 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import java.awt.Graphics;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ImageMergeService {
public List<PdfImage> mergeImages(List<PdfImage> images, int rotation){
List<PdfImage> mergedList = processImages(images, rotation);
List<PdfImage> imagesInImage = new ArrayList<>();
for(PdfImage image: mergedList){
for (PdfImage inner: mergedList){
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
imagesInImage.add(inner);
}
}
}
mergedList.removeAll(imagesInImage);
return mergedList;
}
//merge images, if they are separated during pdf import, return new list of Pdfimages
private List<PdfImage> processImages(List<PdfImage> imageList, int rotation) {
if (imageList.size() > 1) {
List<PdfImage> mergedList = new ArrayList<>();
int countElementsInList = 0;
boolean beginImage = true;
// a List of Boolean, true = candidate for merging, false = no merging
List<Boolean> candidatesList = getCandidatesList(imageList, rotation);
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
for (int i = 0; i < candidatesList.size(); i++) {
if (candidatesList.get(i)) {
if (beginImage) {
//begin of image, merge two parts of imageList
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation);
// image merge successful
if (mergedImage != null) {
mergedList.add(mergedImage);
countElementsInList++;
}
} else {
//middle of an image, merge current piece auf mergedList with image of imageList
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation);
// image merge successful
if (mergedImage != null) {
mergedList.set(countElementsInList - 1, mergedImage);
}
}
beginImage = false;
} else {
// if the last candidate is false, then both images i and i+1 must be added
if (i == candidatesList.size() - 1) {
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
mergedList.add(imageList.get(i + 1));
} else {
mergedList.add(imageList.get(i));
mergedList.add(imageList.get(i + 1));
}
} else {
//first image is not splitted, add i to resultlist
if (beginImage) {
mergedList.add(imageList.get(i));
countElementsInList++;
} else {
// i is the end of an image, add begin of new image
mergedList.add(imageList.get(i + 1));
countElementsInList++;
beginImage = false;
}
}
}
}
return mergedList;
} else {
return imageList;
}
}
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) {
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
double width = image1.getPosition().getWidth();
double width2 = image2.getPosition().getWidth();
double height1 = image1.getPosition().getHeight();
double height2 = image2.getPosition().getHeight();
// mit den Werten, die unter Image gespeichert sind, funktioniert es
double img1height = image1.getImage().getHeight();
double img1width = image1.getImage().getWidth();
double img2height = image2.getImage().getHeight();
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
Graphics mergedImageGraphics = mergedImage.getGraphics();
try {
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
// set Image, Position and type for merged Image
//set position for merged image with values of image1 and the height of both
Rectangle2D pos = new Rectangle2D.Float();
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2);
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage(), image1.isHasTransparency() || image2.isHasTransparency());
// Graphics need to be disposed
image1.getImage().flush();
image2.getImage().flush();
mergedImage.flush();
mergedImageGraphics.dispose();
return newPdfImage;
} catch (Exception e) {
// failed to merge image
log.error("Failed to merge image", e);
return null;
}
}
//make a list of true and false, if the image is a candidate for merging
private List<Boolean> getCandidatesList(List<PdfImage> imageList, int rotation) {
List<Boolean> candidatesList = new ArrayList<>();
for (int i = 0; i < imageList.size(); i++) {
if (i >= 1) {
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation));
}
}
return candidatesList;
}
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) {
double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX();
double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY();
double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth();
double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX();
double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY();
double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth();
double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight();
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
// then it is the same picture and has to be merged -> return true
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6);
}
}

View File

@ -1,6 +1,19 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
@ -15,24 +28,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import java.awt.Graphics;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@ -47,13 +45,17 @@ public class PdfSegmentationService {
private final ClassificationService classificationService;
private final SectionsBuilderService sectionsBuilderService;
private final ImageClassificationService imageClassificationService;
private final ImageMergeService imageMergeService;
public Document parseDocument(InputStream documentInputStream) throws IOException {
return parseDocument(documentInputStream, false);
}
public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException {
PDDocument pdDocument = null;
try {
//create tempFile
@ -64,7 +66,6 @@ public class PdfSegmentationService {
Document document = new Document();
List<Page> pages = new ArrayList<>();
pdDocument = reinitializePDDocument(tempFile, null);
long pageCount = pdDocument.getNumberOfPages();
@ -101,32 +102,19 @@ public class PdfSegmentationService {
page.setRotation(rotation);
page.setLandscape(isLandscape || isRotated);
page.setPageNumber(pageNumber);
List<PdfImage> mergedList = processImages(stripper.getImages());
List<PdfImage> imagesInImage = new ArrayList<>();
for(PdfImage image: mergedList){
for (PdfImage inner: mergedList){
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
imagesInImage.add(inner);
}
}
}
mergedList.removeAll(imagesInImage);
List<PdfImage> mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation);
page.setImages(mergedList);
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
increaseDocumentStatistics(page, document);
if (!ignoreImages) {
imageClassificationService.classifyImages(page);
}
pages.add(page);
}
document.setPages(pages);
@ -149,7 +137,9 @@ public class PdfSegmentationService {
}
}
private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException {
if (pdDocument != null) {
pdDocument.close();
}
@ -164,130 +154,6 @@ public class PdfSegmentationService {
return newPDDocument;
}
//merge images, if they are separated during pdf import, return new list of Pdfimages
private List<PdfImage> processImages(List<PdfImage> imageList) {
if (imageList.size() > 1) {
List<PdfImage> mergedList = new ArrayList<>();
int countElementsInList = 0;
boolean beginImage = true;
// a List of Boolean, true = candidate for merging, false = no merging
List<Boolean> candidatesList = getCandidatesList(imageList);
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
for (int i = 0; i < candidatesList.size(); i++) {
if (candidatesList.get(i)) {
if (beginImage) {
//begin of image, merge two parts of imageList
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1));
// image merge successful
if (mergedImage != null) {
mergedList.add(mergedImage);
countElementsInList++;
}
} else {
//middle of an image, merge current piece auf mergedList with image of imageList
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1));
// image merge successful
if (mergedImage != null) {
mergedList.set(countElementsInList - 1, mergedImage);
}
}
beginImage = false;
} else {
// if the last candidate is false, then both images i and i+1 must be added
if (i == candidatesList.size() - 1) {
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
mergedList.add(imageList.get(i + 1));
} else {
mergedList.add(imageList.get(i));
mergedList.add(imageList.get(i + 1));
}
} else {
//first image is not splitted, add i to resultlist
if (beginImage) {
mergedList.add(imageList.get(i));
countElementsInList++;
} else {
// i is the end of an image, add begin of new image
mergedList.add(imageList.get(i + 1));
countElementsInList++;
beginImage = false;
}
}
}
}
return mergedList;
} else {
return imageList;
}
}
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2) {
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
double width = image1.getPosition().getWidth();
double height1 = image1.getPosition().getHeight();
double height2 = image2.getPosition().getHeight();
// mit den Werten, die unter Image gespeichert sind, funktioniert es
double img1height = image1.getImage().getHeight();
double img1width = image1.getImage().getWidth();
double img2height = image2.getImage().getHeight();
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
Graphics mergedImageGraphics = mergedImage.getGraphics();
try {
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
// set Image, Position and type for merged Image
//set position for merged image with values of image1 and the height of both
Rectangle2D pos = new Rectangle2D.Float();
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width, height1 + height2);
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage());
// Graphics need to be disposed
image1.getImage().flush();
image2.getImage().flush();
mergedImage.flush();
mergedImageGraphics.dispose();
return newPdfImage;
} catch (Exception e) {
// failed to merge image
log.error("Failed to merge image", e);
return null;
}
}
//make a list of true and false, if the image is a candidate for merging
private List<Boolean> getCandidatesList(List<PdfImage> imageList) {
List<Boolean> candidatesList = new ArrayList<>();
for (int i = 0; i < imageList.size(); i++) {
if (i >= 1) {
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i)));
}
}
return candidatesList;
}
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2) {
double x1 = image1.getPosition().getX();
double y1 = image1.getPosition().getY();
double width1 = image1.getPosition().getWidth();
double x2 = image2.getPosition().getX();
double y2 = image2.getPosition().getY();
double width2 = image2.getPosition().getWidth();
double height2 = image2.getPosition().getHeight();
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
// then it is the same picture and has to be merged -> return true
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6);
}
private void increaseDocumentStatistics(Page page, Document document) {
@ -319,5 +185,4 @@ public class PdfSegmentationService {
}
}

View File

@ -15,4 +15,8 @@ public class RedactionServiceSettings {
private float maxImageCropboxRatio = 0.9f;
private int analysisVersion = 1;
private boolean enableEntityRecognition = true;
}

View File

@ -1,10 +1,11 @@
package com.iqser.red.service.redaction.v1.server.storage;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.SectionGrid;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntities;
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.Getter;
@ -32,16 +33,16 @@ public class RedactionStorageService {
@SneakyThrows
public void storeObject(String projectId, String fileId, FileType fileType, Object any) {
storageService.storeObject(StorageIdUtils.getStorageId(projectId, fileId, fileType), objectMapper.writeValueAsBytes(any));
public void storeObject(String dossierId, String fileId, FileType fileType, Object any) {
storageService.storeObject(StorageIdUtils.getStorageId(dossierId, fileId, fileType), objectMapper.writeValueAsBytes(any));
}
public RedactionLog getRedactionLog(String projectId, String fileId) {
public RedactionLog getRedactionLog(String dossierId, String fileId) {
InputStreamResource inputStreamResource;
try {
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.REDACTION_LOG));
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG));
} catch (StorageObjectDoesNotExist e) {
log.debug("Text not available.");
return null;
@ -55,11 +56,11 @@ public class RedactionStorageService {
}
public Text getText(String projectId, String fileId) {
public Text getText(String dossierId, String fileId) {
InputStreamResource inputStreamResource;
try {
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.TEXT));
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.TEXT));
} catch (StorageObjectDoesNotExist e) {
log.debug("Text not available.");
return null;
@ -73,9 +74,28 @@ public class RedactionStorageService {
}
public SectionGrid getSectionGrid(String projectId, String fileId) {
public NerEntities getNerEntities(String dossierId, String fileId) {
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(projectId, fileId, FileType.SECTION_GRID));
InputStreamResource inputStreamResource;
try {
inputStreamResource = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.NER_ENTITIES));
} catch (StorageObjectDoesNotExist e) {
log.debug("NER Entities not available.");
return null;
}
try {
return objectMapper.readValue(inputStreamResource.getInputStream(), NerEntities.class);
} catch (IOException e) {
throw new RuntimeException("Could not convert NerEntities", e);
}
}
public SectionGrid getSectionGrid(String dossierId, String fileId) {
var sectionGrid = storageService.getObject(StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID));
try {
return objectMapper.readValue(sectionGrid.getInputStream(), SectionGrid.class);
} catch (IOException e) {
@ -95,8 +115,8 @@ public class RedactionStorageService {
public static class StorageIdUtils {
public static String getStorageId(String projectId, String fileId, FileType fileType) {
return projectId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
public static String getStorageId(String dossierId, String fileId, FileType fileType) {
return dossierId + "/" + fileId + "." + fileType.name() + fileType.getExtension();
}
}

View File

@ -2,6 +2,8 @@ package com.iqser.red.service.redaction.v1.server.tableextraction.model;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.server.classification.model.Orientation;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@ -18,6 +20,8 @@ public abstract class AbstractTextContainer {
protected String classification;
protected int page;
private Orientation orientation = Orientation.NONE;
public abstract String getText();
public boolean contains(AbstractTextContainer other) {

View File

@ -246,8 +246,12 @@ public class Ruling extends Line2D.Float {
public Ruling expand(float amount) {
Ruling r = (Ruling) this.clone();
r.setStart(this.getStart() - amount);
r.setEnd(this.getEnd() + amount);
try {
r.setStart(this.getStart() - amount);
r.setEnd(this.getEnd() + amount);
} catch (UnsupportedOperationException e){
log.warn("Could not expand ruling!");
}
return r;
}

View File

@ -102,7 +102,7 @@ public class PdfVisualisationService {
contentStream.newLineAtOffset(textBlock.getMinX(), textBlock.getMaxY());
contentStream.showText(textBlock.getClassification());
contentStream.showText(textBlock.getClassification() + textBlock.getOrientation());
contentStream.endText();
}

View File

@ -1,9 +1,9 @@
info:
description: Redaction Service Server V1
configuration-service.url: "http://configuration-service-v1:8080"
file-management-service.url: "http://file-management-service-v1:8080"
persistence-service.url: "http://persistence-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
entity-recognition-service.url: "http://entity-recognition-service-v1:8080"
server:
port: 8080

View File

@ -2,9 +2,11 @@ package com.iqser.red.service.redaction.v1.server;
import com.amazonaws.services.s3.AmazonS3;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.configuration.v1.api.model.*;
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.FileType;
import com.iqser.red.service.persistence.service.v1.api.model.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.Colors;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.model.data.configuration.Type;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
@ -13,12 +15,14 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.memory.MemoryStats;
import com.iqser.red.service.redaction.v1.server.redaction.service.ReanalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.redaction.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows;
import org.apache.commons.io.IOUtils;
import org.junit.After;
import org.junit.Before;
@ -84,12 +88,11 @@ public class RedactionIntegrationTest {
private static final String PII = "PII";
@Autowired
private RedactionController redactionController;
@Autowired
private ReanalyzeService reanalyzeService;
private AnalyzeService analyzeService;
@Autowired
private ObjectMapper objectMapper;
@ -127,9 +130,10 @@ public class RedactionIntegrationTest {
private final Map<String, Integer> rankTypeMap = new HashMap<>();
private final Colors colors = new Colors();
private final Map<String, Long> reanlysisVersions = new HashMap<>();
private final Set<String> deleted = new HashSet<>();
private final static String TEST_RULESET_ID = "123";
private final static String TEST_PROJECT_ID = "123";
private final static String TEST_DOSSIER_TEMPLATE_ID = "123";
private final static String TEST_DOSSIER_ID = "123";
private final static String TEST_FILE_ID = "123";
@Configuration
@ -152,18 +156,20 @@ public class RedactionIntegrationTest {
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@After
public void cleanupStorage() {
if (this.storageService instanceof FileSystemBackedStorageService) {
((FileSystemBackedStorageService) this.storageService).clearStorage();
}
@ -173,51 +179,64 @@ public class RedactionIntegrationTest {
@Before
public void stubClients() {
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(JSONPrimitive.of(RULES));
loadDictionaryForTest();
loadTypeForTest();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(TypeResponse.builder()
.types(getTypeResponse())
.build());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(TypeResponse.builder()
.types(List.of(TypeResult.builder()
.type(DOSSIER_REDACTIONS)
.ruleSetId(TEST_RULESET_ID)
.hexColor( "#ffe187")
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS +":"+ TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS))
.build()))
.build());
.build()));
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS, TEST_RULESET_ID, TEST_PROJECT_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
when(dictionaryClient.getDictionaryForType(VERTEBRATE + ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(ADDRESS+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(ADDRESS, false));
when(dictionaryClient.getDictionaryForType(AUTHOR+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(AUTHOR, false));
when(dictionaryClient.getDictionaryForType(SPONSOR+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(SPONSOR, false));
when(dictionaryClient.getDictionaryForType(NO_REDACTION_INDICATOR+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(NO_REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(REDACTION_INDICATOR+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(REDACTION_INDICATOR, false));
when(dictionaryClient.getDictionaryForType(HINT_ONLY+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(HINT_ONLY, false));
when(dictionaryClient.getDictionaryForType(MUST_REDACT+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(MUST_REDACT, false));
when(dictionaryClient.getDictionaryForType(PUBLISHED_INFORMATION+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(PUBLISHED_INFORMATION, false));
when(dictionaryClient.getDictionaryForType(TEST_METHOD+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(TEST_METHOD, false));
when(dictionaryClient.getDictionaryForType(PII+ ":" + TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getDictionaryResponse(PII, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_AUTHOR+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(RECOMMENDATION_AUTHOR, false));
when(dictionaryClient.getDictionaryForType(RECOMMENDATION_ADDRESS+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(RECOMMENDATION_ADDRESS, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
when(dictionaryClient.getDictionaryForType(PURITY+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(PURITY, false));
when(dictionaryClient.getDictionaryForType(IMAGE+ ":" + TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getDictionaryResponse(IMAGE, false));
when(dictionaryClient.getDictionaryForType(OCR+ ":" + TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getDictionaryResponse(OCR, false));
when(dictionaryClient.getDictionaryForType(LOGO+ ":" + TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getDictionaryResponse(LOGO, false));
when(dictionaryClient.getDictionaryForType(SIGNATURE+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(SIGNATURE, false));
when(dictionaryClient.getDictionaryForType(FORMULA+ ":" + TEST_DOSSIER_TEMPLATE_ID))
.thenReturn(getDictionaryResponse(FORMULA, false));
when(dictionaryClient.getDictionaryForType(DOSSIER_REDACTIONS+ ":" + TEST_DOSSIER_TEMPLATE_ID)).thenReturn(getDictionaryResponse(DOSSIER_REDACTIONS, true));
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@ -455,13 +474,14 @@ public class RedactionIntegrationTest {
}
private List<TypeResult> getTypeResponse() {
private List<Type> getTypeResponse() {
return typeColorMap.entrySet()
.stream()
.map(typeColor -> TypeResult.builder()
.map(typeColor -> Type.builder()
.id(typeColor.getKey() + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(typeColor.getKey())
.ruleSetId(TEST_RULESET_ID)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColor.getValue())
.isHint(hintTypeMap.get(typeColor.getKey()))
.isCaseInsensitive(caseInSensitiveMap.get(typeColor.getKey()))
@ -473,11 +493,13 @@ public class RedactionIntegrationTest {
}
private DictionaryResponse getDictionaryResponse(String type, boolean isDossierDictionary) {
private Type getDictionaryResponse(String type, boolean isDossierDictionary) {
return DictionaryResponse.builder()
return Type.builder()
.id(type + ":" +TEST_DOSSIER_TEMPLATE_ID)
.hexColor(typeColorMap.get(type))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary.get(type)))
.entries(isDossierDictionary ? toDictionaryEntry(dossierDictionary.get(type)) : toDictionaryEntry(dictionary
.get(type)))
.isHint(hintTypeMap.get(type))
.isCaseInsensitive(caseInSensitiveMap.get(type))
.isRecommendation(recommendationTypeMap.get(type))
@ -490,7 +512,11 @@ public class RedactionIntegrationTest {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(new DictionaryEntry(entry, reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L, false));
dictionaryEntries.add(DictionaryEntry.builder()
.value(entry)
.version(reanlysisVersions.containsKey(entry) ? reanlysisVersions.get(entry) : 0L)
.deleted(deleted
.contains(entry) ? true : false).build());
});
return dictionaryEntries;
}
@ -498,9 +524,11 @@ public class RedactionIntegrationTest {
@Test
public void test270Rotated() {
AnalyzeRequest request = prepareStorage("files/Minimal Examples/270Rotated.pdf");
MemoryStats.printMemoryStats();
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();
}
@ -508,12 +536,15 @@ public class RedactionIntegrationTest {
@Test
@Ignore
public void testLargeScannedFileOOM() {
AnalyzeRequest request = prepareStorage("scanned/VV-377031.pdf");
MemoryStats.printMemoryStats();
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
assertThat(result).isNotNull();
}
@Test
public void testMergedImages() throws IOException {
@ -521,11 +552,12 @@ public class RedactionIntegrationTest {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/merge_images.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
@ -536,10 +568,10 @@ public class RedactionIntegrationTest {
});
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -547,19 +579,18 @@ public class RedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
long rstart = System.currentTimeMillis();
reanalyzeService.reanalyze(request);
analyzeService.reanalyze(request);
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
long end = System.currentTimeMillis();
System.out.println("duration: " + (end - start));
}
@Test
@Ignore
public void noExceptionShouldBeThrownForAnyFiles() throws IOException {
@ -577,11 +608,15 @@ public class RedactionIntegrationTest {
AnalyzeRequest request = prepareStorage(new FileInputStream((path)));
System.out.println("Redacting file : " + path.getName());
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
long fstart = System.currentTimeMillis();
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("analysis analysis duration: " + (System.currentTimeMillis() - fstart));
Map<String, List<RedactionLogEntry>> duplicates = new HashMap<>();
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
duplicates.computeIfAbsent(entry.getId(), v -> new ArrayList<>()).add(entry);
@ -592,10 +627,10 @@ public class RedactionIntegrationTest {
});
dictionary.get(AUTHOR).add("Drinking water");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(1L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(1L);
long rstart = System.currentTimeMillis();
reanalyzeService.reanalyze(request);
analyzeService.reanalyze(request);
long rend = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (rend - rstart));
@ -631,13 +666,22 @@ public class RedactionIntegrationTest {
public void redactionTest() throws IOException {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));
AnalyzeResult result = reanalyzeService.analyze(request);
request.setFileAttributes(List.of(FileAttribute.builder()
.id("fileAttributeId")
.label("Vertebrate Study")
.placeholder("{fileattributes.vertebrateStudy}")
.value("true")
.build()));
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
var text = redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var text = redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
if (entry.isImage()) {
@ -650,7 +694,7 @@ public class RedactionIntegrationTest {
System.out.println("first analysis duration: " + (end - start));
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Test.json")) {
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_PROJECT_ID, TEST_FILE_ID)));
fileOutputStream.write(objectMapper.writeValueAsBytes(redactionStorageService.getText(TEST_DOSSIER_ID, TEST_FILE_ID)));
}
int correctFound = 0;
@ -680,21 +724,43 @@ public class RedactionIntegrationTest {
dictionary.get(AUTHOR).add("physical");
reanlysisVersions.put("physical", 2L);
dictionary.get(VERTEBRATE).add("s-metolachlor");
reanlysisVersions.put("s-metolachlor", 3L);
deleted.add("David Chubb");
deleted.add("mouse");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(3L);
dictionary.get(FALSE_POSITIVE).add("David Chubb");
reanlysisVersions.put("David Chubb", 3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
reanlysisVersions.put("mouse", 3L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(3L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE))
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
when(dictionaryClient.getDictionaryForType(FALSE_POSITIVE))
.thenReturn(getDictionaryResponse(FALSE_POSITIVE, false));
start = System.currentTimeMillis();
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.setImageRecategorizations(Set.of(ManualImageRecategorization.builder()
.id("37eee3e9d589a5cc529bfec38c3ba479")
.status(Status.APPROVED)
.type("signature")
.build()));
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -702,6 +768,20 @@ public class RedactionIntegrationTest {
fileOutputStream.write(annotateResponse.getDocument());
}
deleted.remove("mouse");
reanlysisVersions.put("mouse", 4L);
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(4L);
when(dictionaryClient.getDictionaryForType(VERTEBRATE))
.thenReturn(getDictionaryResponse(VERTEBRATE, false));
analyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
System.out.println("hi");
}
@ -712,10 +792,11 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
AnalyzeRequest request = prepareStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
@ -771,26 +852,31 @@ public class RedactionIntegrationTest {
// manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setManualRedactions(manualRedactions);
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
.id("5b940b2cb401ed9f5be6fc24f6e77bcf")
.status(Status.APPROVED)
.build()));
manualRedactions.setManualLegalBasisChanges(Set.of(ManualLegalBasisChange.builder()
.id("675eba69b0c2917de55462c817adaa05")
.legalBasis("Manual Legal Basis Change")
.status(Status.APPROVED)
.build()));
reanalyzeService.reanalyze(request);
analyzeService.reanalyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());
try (FileOutputStream fileOutputStream = new FileOutputStream("/tmp/Annotated.pdf")) {
fileOutputStream.write(annotateResponse.getDocument());
}
@ -805,15 +891,14 @@ public class RedactionIntegrationTest {
public void classificationTest() throws IOException {
System.out.println("classificationTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Trinexapac/93 Trinexapac-ethyl_RAR_03_Volume_3CA_B-1_2017-03-31.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
RedactionRequest redactionRequest = RedactionRequest.builder()
.projectId(request.getProjectId())
.dossierId(request.getDossierId())
.fileId(request.getFileId())
.ruleSetId(request.getRuleSetId())
.dossierTemplateId(request.getDossierTemplateId())
.build();
RedactionResult result = redactionController.classify(redactionRequest);
@ -828,14 +913,14 @@ public class RedactionIntegrationTest {
public void sectionsTest() throws IOException {
System.out.println("sectionsTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Fludioxonil/51 " + "Fludioxonil_RAR_02_Volume_2_2018-02-21.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
RedactionRequest redactionRequest = RedactionRequest.builder()
.projectId(request.getProjectId())
.dossierId(request.getDossierId())
.fileId(request.getFileId())
.ruleSetId(request.getRuleSetId())
.dossierTemplateId(request.getDossierTemplateId())
.build();
RedactionResult result = redactionController.sections(redactionRequest);
@ -850,14 +935,14 @@ public class RedactionIntegrationTest {
public void htmlTablesTest() throws IOException {
System.out.println("htmlTablesTest");
ClassPathResource pdfFileResource = new ClassPathResource("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
ClassPathResource pdfFileResource = new ClassPathResource("files/S11.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
RedactionRequest redactionRequest = RedactionRequest.builder()
.projectId(request.getProjectId())
.dossierId(request.getDossierId())
.fileId(request.getFileId())
.ruleSetId(request.getRuleSetId())
.dossierTemplateId(request.getDossierTemplateId())
.build();
RedactionResult result = redactionController.htmlTables(redactionRequest);
@ -877,9 +962,9 @@ public class RedactionIntegrationTest {
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
RedactionRequest redactionRequest = RedactionRequest.builder()
.projectId(request.getProjectId())
.dossierId(request.getDossierId())
.fileId(request.getFileId())
.ruleSetId(request.getRuleSetId())
.dossierTemplateId(request.getDossierTemplateId())
.build();
RedactionResult result = redactionController.htmlTables(redactionRequest);
@ -897,9 +982,10 @@ public class RedactionIntegrationTest {
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_PROJECT_ID, TEST_FILE_ID);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
redactionLog.getRedactionLogEntry().forEach(entry -> {
if (!entry.isHint()) {
@ -908,8 +994,10 @@ public class RedactionIntegrationTest {
});
}
@SneakyThrows
private AnalyzeRequest prepareStorage(String file) {
ClassPathResource pdfFileResource = new ClassPathResource(file);
return prepareStorage(pdfFileResource.getInputStream());
@ -920,15 +1008,15 @@ public class RedactionIntegrationTest {
private AnalyzeRequest prepareStorage(InputStream stream) {
AnalyzeRequest request = AnalyzeRequest.builder()
.ruleSetId(TEST_RULESET_ID)
.projectId(TEST_PROJECT_ID)
.dossierTemplateId(TEST_DOSSIER_TEMPLATE_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.lastProcessed(OffsetDateTime.now())
.build();
var bytes = IOUtils.toByteArray(stream);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_PROJECT_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
storageService.storeObject(RedactionStorageService.StorageIdUtils.getStorageId(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ORIGIN), bytes);
return request;
@ -941,13 +1029,13 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/sponsor_companies.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
AnalyzeResult result = reanalyzeService.analyze(request);
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
AnalyzeResult result = analyzeService.analyze(request);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.projectId(TEST_PROJECT_ID)
.dossierId(TEST_DOSSIER_ID)
.fileId(TEST_FILE_ID)
.build());

View File

@ -0,0 +1,18 @@
package com.iqser.red.service.redaction.v1.server.redaction.rulebuilder;
import com.iqser.red.service.redaction.v1.model.RuleBuilderModel;
import org.junit.Test;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;
public class RuleBuilderModelServiceTest {
@Test
public void testRuleBuilderModelProvider() {
RuleBuilderModel model = new RuleBuilderModelService().getRuleBuilderModel();
assertThat(model.getWhenClauses().size()).isGreaterThan(1);
assertThat(model.getThenConditions().size()).isGreaterThan(1);
}
}

View File

@ -1,511 +0,0 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.amazonaws.services.s3.AmazonS3;
import com.iqser.red.service.configuration.v1.api.model.*;
import com.iqser.red.service.configuration.v1.api.resource.DictionaryResource;
import com.iqser.red.service.redaction.v1.server.Application;
import com.iqser.red.service.redaction.v1.server.FileSystemBackedStorageService;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.storage.commons.service.StorageService;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.kie.api.KieServices;
import org.kie.api.builder.KieBuilder;
import org.kie.api.builder.KieFileSystem;
import org.kie.api.builder.KieModule;
import org.kie.api.runtime.KieContainer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(EntityRedactionServiceTest.RedactionIntegrationTestConfiguration.class)
public class EntityRedactionServiceTest {
private static final String DEFAULT_RULES = loadFromClassPath("drools/rules.drl");
private static final String AUTHOR_CODE = "author";
private static final String ADDRESS_CODE = "address";
private static final String SPONSOR_CODE = "sponsor";
private static final AtomicLong DICTIONARY_VERSION = new AtomicLong();
private static final AtomicLong RULES_VERSION = new AtomicLong();
@MockBean
private DictionaryClient dictionaryClient;
@MockBean
private RulesClient rulesClient;
@Autowired
private EntityRedactionService entityRedactionService;
@Autowired
private PdfSegmentationService pdfSegmentationService;
@Autowired
private DroolsExecutionService droolsExecutionService;
@MockBean
private AmazonS3 amazonS3;
@MockBean
private LegalBasisClient legalBasisClient;
private final static String TEST_RULESET_ID = "123";
@Configuration
@EnableAutoConfiguration(exclude = {RabbitAutoConfiguration.class})
public static class RedactionIntegrationTestConfiguration {
@Bean
public KieContainer kieContainer() {
KieServices kieServices = KieServices.Factory.get();
KieFileSystem kieFileSystem = kieServices.newKieFileSystem();
InputStream input = new ByteArrayInputStream(DEFAULT_RULES.getBytes(StandardCharsets.UTF_8));
kieFileSystem.write("src/test/resources/drools/rules.drl", kieServices.getResources()
.newInputStreamResource(input));
KieBuilder kieBuilder = kieServices.newKieBuilder(kieFileSystem);
kieBuilder.buildAll();
KieModule kieModule = kieBuilder.getKieModule();
return kieServices.newKieContainer(kieModule.getReleaseId());
}
@Bean
@Primary
public StorageService inmemoryStorage() {
return new FileSystemBackedStorageService();
}
}
@Test
public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false);
entities.add(nested);
entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
assertThat(entities.size()).isEqualTo(1);
assertThat(entities).contains(nesting);
}
@Test
public void testTableRedaction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Single Table.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@Test
public void testNestedRedaction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/nested_redaction.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Casey, H.W.", "OLoughlin, C.K.", "Salamon, C.M.", "Smith, S.H.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Toxigenics, Inc., Decatur, IL 62526, USA")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@Test
public void testTrueNegativesInTable() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Cyprodinil/40 Cyprodinil - EU AIR3 - LCA Section 1" +
" Supplement - Identity of the active substance - Reference list.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
"the plant protection product.pdf");
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
.noneMatch(entry -> entry.getValue().stream().anyMatch(e -> e.getMatchedRule() == 9))).isTrue();
}
@Test
public void testFalsePositiveInWrongCell() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Row With Ambiguous Redaction.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 9)
.count()).isEqualTo(10);
}
@Test
public void testApplicantInTableRedaction() throws IOException {
String tableRules = "package drools\n" +
"\n" +
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" +
"rule \"6: Redact contact information if applicant is found\"\n" +
" when\n" +
" eval(section.headlineContainsWord(\"applicant\") || section.getText().contains(\"Applicant\"));\n" +
" then\n" +
" section.redactLineAfter(\"Name:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"Address:\", \"Contact\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Contact point:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Phone:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Fax:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg" +
" (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Tel:\", \"address\", 6,true, \"Applicant information was found\", \"Reg " +
"(EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"E-mail:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Email:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Contact:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Telephone number:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Fax number:\", \"address\", 6,true, \"Applicant information was found\"," +
" \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactLineAfter(\"Telephone:\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"No:\", \"Fax\", \"address\", 6,true, \"Applicant information was found\", " +
"\"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redactBetween(\"Contact:\", \"Tel.:\", \"address\", 6,true, \"Applicant information was found\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" end";
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Applicant Producer Table.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_author.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_address.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 6)
.count()).isEqualTo(13);
}
@Test
public void testSponsorInCell() throws IOException {
String tableRules = "package drools\n" +
"\n" +
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" + "rule \"11: Redact sponsor company\"\n" + " when\n" + " " +
"Section(searchText.toLowerCase().contains(\"batches produced at\"))\n" + " then\n" + " section" +
".redactIfPrecededBy(\"batches produced at\", \"sponsor\", 11, \"Redacted because it represents a " +
"sponsor company\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" + " end";
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
droolsExecutionService.updateRules(TEST_RULESET_ID);
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/batches_new_line.pdf");
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse authorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(authorResponse);
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(new ArrayList<>(ResourceLoader.load("dictionaries/CBI_sponsor.txt"))))
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 11)
.count()).isEqualTo(1);
}
@Test
public void headerPropagation() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Bissig R.", "Thanei P.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
pdfFileResource = new ClassPathResource("files/Minimal Examples/Header Propagation2.pdf");
dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Arrays.asList("Tribolet, R.", "Muir, G.", "Kühne-Thu, H.", "Close, C.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
}
@Test
@Ignore
public void testNGuideline() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Empty Tabular Data.pdf");
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Aldershof S.")))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.entries(toDictionaryEntry(Collections.singletonList("Novartis Crop Protection AG, Basel, Switzerland")))
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.entries(Collections.emptyList())
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_RULESET_ID, null, "dossierId");
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}
@Before
public void stubRedaction() {
String tableRules = "package drools\n" +
"\n" +
"import com.iqser.red.service.redaction.v1.server.redaction.model.Section\n" +
"\n" +
"global Section section\n" +
"rule \"8: Not redacted because Vertebrate Study = N\"\n" +
" when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"N\") || rowEquals(\"Vertebrate study Y/N\", \"No\"))\n" +
" then\n" +
" section.redactNotCell(\"Author(s)\", 8, \"name\", false, \"Not redacted because row is not a vertebrate study\");\n" +
" section.redactNot(\"address\", 8, \"Not redacted because row is not a vertebrate study\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 8, \"hint_only\");\n" +
" end\n" +
"rule \"9: Redact Authors and Addresses in Reference Table, if it is a Vertebrate study\"\n" +
" when\n" +
" Section(rowEquals(\"Vertebrate study Y/N\", \"Y\") || rowEquals(\"Vertebrate study Y/N\", " +
"\"Yes\"))\n" +
" then\n" +
" section.redactCell(\"Author(s)\", 9, \"name\", false, \"Redacted because row is a vertebrate study\", \"Reg (EC) No 1107/2009 Art. 63 (2g)\");\n" +
" section.redact(\"address\", 9, \"Redacted because row is a vertebrate study\", \"Reg (EC) No" +
" 1107/2009 Art. 63 (2g)\");\n" +
" section.highlightCell(\"Vertebrate study Y/N\", 9, \"must_redact\");\n" +
" end";
when(rulesClient.getVersion(TEST_RULESET_ID)).thenReturn(RULES_VERSION.incrementAndGet());
when(rulesClient.getRules(TEST_RULESET_ID)).thenReturn(new RulesResponse(tableRules));
TypeResponse typeResponse = TypeResponse.builder()
.types(Arrays.asList(
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(AUTHOR_CODE).hexColor("#ffff00").build(),
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(ADDRESS_CODE).hexColor("#ff00ff").build(),
TypeResult.builder().ruleSetId(TEST_RULESET_ID).type(SPONSOR_CODE).hexColor("#00ffff").build()))
.build();
when(dictionaryClient.getVersion(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(DICTIONARY_VERSION.incrementAndGet());
when(dictionaryClient.getAllTypes(TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(typeResponse);
// Default empty return to prevent NPEs
DictionaryResponse dictionaryResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(AUTHOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
DictionaryResponse addressResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
DictionaryResponse sponsorResponse = DictionaryResponse.builder()
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_RULESET_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Colors colors = new Colors();
colors.setDefaultColor("#acfc00");
colors.setNotRedacted("#cccccc");
colors.setRequestAdd("#04b093");
colors.setRequestRemove("#04b093");
when(dictionaryClient.getColors(TEST_RULESET_ID)).thenReturn(colors);
}
private static String loadFromClassPath(String path) {
URL resource = ResourceLoader.class.getClassLoader().getResource(path);
if (resource == null) {
throw new IllegalArgumentException("could not load classpath resource: drools/rules.drl");
}
try (BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), StandardCharsets.UTF_8))) {
StringBuilder sb = new StringBuilder();
String str;
while ((str = br.readLine()) != null) {
sb.append(str).append("\n");
}
return sb.toString();
} catch (IOException e) {
throw new IllegalArgumentException("could not load classpath resource: " + path, e);
}
}
private List<DictionaryEntry> toDictionaryEntry(List<String> entries) {
List<DictionaryEntry> dictionaryEntries = new ArrayList<>();
entries.forEach(entry -> {
dictionaryEntries.add(new DictionaryEntry(entry, 1L, false));
});
return dictionaryEntries;
}
}

View File

@ -0,0 +1,30 @@
package com.iqser.red.service.redaction.v1.server.redaction.utils;
import static org.assertj.core.api.Assertions.assertThat;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import com.iqser.red.service.redaction.v1.model.Engine;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
public class EntitySearchUtilsTest {
@Test
public void testNestedEntitiesRemoval() {
Set<Entity> entities = new HashSet<>();
Entity nested = new Entity("nested", "fake type", 10, 16, "fake headline", 0, false, false, Engine.RULE);
Entity nesting = new Entity("nesting nested", "fake type", 2, 16, "fake headline", 0, false, false, Engine.RULE);
entities.add(nested);
entities.add(nesting);
EntitySearchUtils.removeEntitiesContainedInLarger(entities);
assertThat(entities.size()).isEqualTo(1);
assertThat(entities).contains(nesting);
}
}

View File

@ -1,6 +1,6 @@
configuration-service.url: "http://configuration-service-v1:8080"
image-service.url: "http://image-service-v1:8080"
file-management-service.url: "http://file-management-service-v1:8080"
persistence-service.url: "http://persistence-service-v1:8080"
entity-recognition-service.url: "localhost:8080"
ribbon:
ConnectTimeout: 600000
@ -17,3 +17,4 @@ platform.multi-tenancy:
redaction-service:
enable-image-classification: false
enable-entity-recognition: false

View File

@ -1652,3 +1652,5 @@ Zoecon Corp.
Zoecon Corp., Palo Alto, USA
Zyma SA
Zyma SA, Nyon, Switzerland
Mambo-Tox Ltd. Biomedical Sciences Building Bassett Crescent East Southampton SO16 7PX UK
Syngenta Environmental Sciences Jealotts Hill International Research Centre Bracknell, Berkshire RG42 6EY UK

View File

@ -1676,7 +1676,6 @@ da Silva Rejane
Das R
Das, R.
Daughtry, CST
David Chubb
David Chubb|Lorraine Britton
David Clarke
Davies

View File

@ -235,4 +235,5 @@ N/A
No details reported
Not available
Test facility
TBD
TBD
David Chubb

View File

@ -56,8 +56,8 @@ rule "5: Do not redact Names and Addresses if no redaction Indicator is containe
when
Section(matchesType("vertebrate"), matchesType("published_information"))
then
section.redactNot("CBI_author", 5, "Vertebrate and Published Information found");
section.redactNot("CBI_address", 5, "Vertebrate and Published Information found");
section.redactNotAndReference("CBI_author","published_information", 5, "Vertebrate and Published Information found");
section.redactNotAndReference("CBI_address","published_information", 5, "Vertebrate and Published Information found");
end
@ -268,7 +268,7 @@ rule "18: Redact contact information if Producer is found"
rule "19: Redact AUTHOR(S)"
when
Section(searchText.contains("AUTHOR(S):"))
Section(searchText.contains("AUTHOR(S):") && fileAttributeByPlaceholderEquals("{fileattributes.vertebrateStudy}", "true"))
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 19, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end