Merge branch 'master' of ssh://git.iqser.com:2222/red/redaction-service into Test

This commit is contained in:
aoezyetimoglu 2021-07-09 12:03:28 +02:00
commit 83b22f3e14
26 changed files with 774 additions and 353 deletions

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>platform-dependency</artifactId>
<groupId>com.iqser.red</groupId>
<version>1.1.2</version>
<version>1.1.3</version>
</parent>
<modelVersion>4.0.0</modelVersion>
@ -32,7 +32,7 @@
<dependency>
<groupId>com.iqser.red</groupId>
<artifactId>platform-commons-dependency</artifactId>
<version>1.3.1</version>
<version>1.3.6</version>
<scope>import</scope>
<type>pom</type>
</dependency>

View File

@ -6,6 +6,11 @@ import lombok.Data;
import lombok.NoArgsConstructor;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@Data
@Builder
@ -19,6 +24,10 @@ public class AnalyzeRequest {
private boolean reanalyseOnlyIfPossible;
private ManualRedactions manualRedactions;
private OffsetDateTime lastProcessed;
private Set<Integer> excludedPages;
@Builder.Default
private List<FileAttribute> fileAttributes = new ArrayList<>();
}

View File

@ -25,6 +25,8 @@ public class AnalyzeResult {
private long rulesVersion;
private long legalBasisVersion;
private boolean wasReanalyzed;
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class FileAttribute {
private String id;
private String label;
private String placeholder;
private String value;
}

View File

@ -0,0 +1,21 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualImageRecategorization {
private String id;
private String user;
private Status status;
private String type;
private String legalBasis;
private boolean redacted;
}

View File

@ -0,0 +1,19 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class ManualLegalBasisChange {
private String id;
private String user;
private Status status;
private String legalBasis;
}

View File

@ -1,5 +1,5 @@
package com.iqser.red.service.redaction.v1.model;
public enum ManualRedactionType {
ADD, REMOVE, FORCE_REDACT
ADD, REMOVE, FORCE_REDACT, RECATEGORIZE, LEGAL_BASIS_CHANGE
}

View File

@ -26,6 +26,12 @@ public class ManualRedactions {
@Builder.Default
private Set<ManualRedactionEntry> entriesToAdd = new HashSet<>();
@Builder.Default
private Set<ManualImageRecategorization> imageRecategorizations = new HashSet<>();
@Builder.Default
private Set<ManualLegalBasisChange> manualLegalBasisChanges = new HashSet<>();
@Builder.Default
private Map<String, List<Comment>> comments = new HashMap<>();

View File

@ -44,4 +44,6 @@ public class RedactionChangeLogEntry {
private boolean isDossierDictionaryEntry;
private boolean excluded;
}

View File

@ -49,4 +49,9 @@ public class RedactionLogEntry {
private boolean isDossierDictionaryEntry;
private boolean excluded;
private String recategorizationType;
private String legalBasisChangeValue;
}

View File

@ -8,8 +8,6 @@ import org.springframework.web.bind.annotation.RequestBody;
public interface RedactionResource {
String SERVICE_NAME = "redaction-service-v1";
String RULE_SET_PARAMETER_NAME = "dossierTemplateId";
String RULE_SET_PATH_VARIABLE = "/{" + RULE_SET_PARAMETER_NAME + "}";
@ -32,4 +30,7 @@ public interface RedactionResource {
@PostMapping(value = "/rules/test", consumes = MediaType.APPLICATION_JSON_VALUE)
void testRules(@RequestBody String rules);
@PostMapping(value = "/redaction-log/preview", consumes = MediaType.APPLICATION_JSON_VALUE)
RedactionLog getRedactionLogPreview(@RequestBody RedactionRequest redactionRequest);
}

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.controller;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.AnnotateRequest;
import com.iqser.red.service.redaction.v1.model.AnnotateResponse;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionRequest;
import com.iqser.red.service.redaction.v1.model.RedactionResult;
import com.iqser.red.service.redaction.v1.resources.RedactionResource;
@ -12,6 +13,7 @@ import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.service.AnnotationService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.redaction.service.DroolsExecutionService;
import com.iqser.red.service.redaction.v1.server.redaction.service.RedactionLogCreatorService;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
@ -39,7 +41,7 @@ public class RedactionController implements RedactionResource {
private final AnnotationService annotationService;
private final PdfSegmentationService pdfSegmentationService;
private final RedactionStorageService redactionStorageService;
private final RedactionLogCreatorService redactionLogCreatorService;
public AnnotateResponse annotate(@RequestBody AnnotateRequest annotateRequest) {
@ -155,6 +157,14 @@ public class RedactionController implements RedactionResource {
droolsExecutionService.testRules(rules);
}
@Override
public RedactionLog getRedactionLogPreview(RedactionRequest redactionRequest) {
var redactionLog = redactionStorageService.getRedactionLog(redactionRequest.getDossierId(), redactionRequest.getFileId());
return redactionLogCreatorService.getRedactionLogPreview(redactionLog, redactionRequest.getDossierTemplateId(), redactionRequest.getManualRedactions());
}
private RedactionResult convert(PDDocument document, int numberOfPages) throws IOException {

View File

@ -9,7 +9,7 @@ import java.util.List;
@Data
@EqualsAndHashCode(onlyExplicitlyIncluded = true)
public class Entity {
public class Entity implements ReasonHolder {
private final String word;

View File

@ -9,7 +9,7 @@ import lombok.NoArgsConstructor;
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class Image {
public class Image implements ReasonHolder {
private String type;
private RedRectangle2D position;

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
public interface ReasonHolder {
String getRedactionReason();
void setRedactionReason(String reason);
boolean isRedaction();
void setRedaction(boolean value);
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.model;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
@ -8,9 +9,11 @@ import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
@ -52,6 +55,22 @@ public class Section {
@Builder.Default
private Set<Image> images = new HashSet<>();
@Builder.Default
private List<FileAttribute> fileAttributes = new ArrayList<>();
public boolean fileAttributeByIdEquals(String id, String value){
return fileAttributes != null && fileAttributes.stream().filter(attribute -> id.equals(attribute.getId()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
public boolean fileAttributeByPlaceholderEquals(String placeholder, String value){
return fileAttributes != null && fileAttributes.stream().filter(attribute -> placeholder.equals(attribute.getPlaceholder()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
public boolean fileAttributeByLabelEquals(String label, String value){
return fileAttributes != null && fileAttributes.stream().filter(attribute -> label.equals(attribute.getLabel()) && value.equals(attribute.getValue())).findFirst().isPresent();
}
public boolean rowEquals(String headerName, String value) {

View File

@ -4,31 +4,42 @@ import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.RedactionChangeLog;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import org.springframework.stereotype.Service;
@Service
public class AnalyzeResponseService {
public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount, RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
boolean hasHints = redactionLog.getRedactionLogEntry().stream().anyMatch(RedactionLogEntry::isHint);
public AnalyzeResult createAnalyzeResponse(String dossierId, String fileId, long duration, int pageCount,
RedactionLog redactionLog, RedactionChangeLog redactionChangeLog) {
boolean hasHints = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> !entry.isExcluded())
.anyMatch(entry -> entry.isHint() && !entry.getType().equals("false_positive"));
boolean hasRequests = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> !entry.isExcluded())
.anyMatch(entry -> entry.isManual() && entry.getStatus()
.equals(com.iqser.red.service.redaction.v1.model.Status.REQUESTED));
boolean hasRedactions = redactionLog.getRedactionLogEntry()
.stream()
.filter(entry -> !entry.isExcluded())
.anyMatch(entry -> entry.isRedacted() && !entry.isManual() || entry.isManual() && entry.getStatus()
.equals(com.iqser.red.service.redaction.v1.model.Status.APPROVED));
boolean hasImages = redactionLog.getRedactionLogEntry()
.stream()
.anyMatch(entry -> entry.isHint() && entry.getType().equals("image"));
.filter(entry -> !entry.isExcluded())
.anyMatch(entry -> entry.isHint() && entry.getType().equals("image") || entry.isImage());
boolean hasUpdates = redactionChangeLog != null && redactionChangeLog.getRedactionLogEntry() != null && !redactionChangeLog
.getRedactionLogEntry()
.isEmpty() && redactionChangeLog.getRedactionLogEntry().stream().anyMatch(entry -> !entry.getType().equals("false_positive"));
.isEmpty() && redactionChangeLog.getRedactionLogEntry()
.stream()
.anyMatch(entry -> !entry.getType().equals("false_positive"));
return AnalyzeResult.builder()
.dossierId(dossierId)
@ -46,4 +57,5 @@ public class AnalyzeResponseService {
.dossierDictionaryVersion(redactionLog.getDossierDictionaryVersion())
.build();
}
}

View File

@ -1,5 +1,6 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import com.iqser.red.service.redaction.v1.model.FileAttribute;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Point;
@ -33,7 +34,7 @@ public class EntityRedactionService {
private final SurroundingWordsService surroundingWordsService;
public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, String dossierId) {
public void processDocument(Document classifiedDoc, String dossierTemplateId, ManualRedactions manualRedactions, String dossierId, List<FileAttribute> fileAttributes) {
dictionaryService.updateDictionary(dossierTemplateId, dossierId);
KieContainer container = droolsExecutionService.updateRules(dossierTemplateId);
@ -41,7 +42,7 @@ public class EntityRedactionService {
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(dossierTemplateId, dossierId);
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null));
Set<Entity> documentEntities = new HashSet<>(findEntities(classifiedDoc, container, manualRedactions, dictionary, false, null, fileAttributes));
if (dictionary.hasLocalEntries()) {
@ -53,7 +54,7 @@ public class EntityRedactionService {
}
});
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber);
Set<Entity> foundByLocal = findEntities(classifiedDoc, container, manualRedactions, dictionary, true, hintsPerSectionNumber, fileAttributes);
EntitySearchUtils.addEntitiesWithHigherRank(documentEntities, foundByLocal, dictionary);
EntitySearchUtils.removeEntitiesContainedInLarger(documentEntities);
}
@ -84,7 +85,7 @@ public class EntityRedactionService {
private Set<Entity> findEntities(Document classifiedDoc, KieContainer kieContainer,
ManualRedactions manualRedactions, Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
Map<Integer, Set<Entity>> hintsPerSectionNumber, List<FileAttribute> fileAttributes) {
Set<Entity> documentEntities = new HashSet<>();
@ -95,31 +96,31 @@ public class EntityRedactionService {
List<Table> tables = paragraph.getTables();
for (Table table : tables) {
if (table.getColCount() == 2) {
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionSearchableTextPairs.addAll(processTableAsOneText(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
} else {
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber));
sectionSearchableTextPairs.addAll(processTablePerRow(classifiedDoc, table, manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, fileAttributes));
}
sectionNumber.incrementAndGet();
}
sectionSearchableTextPairs.add(processText(classifiedDoc, paragraph.getSearchableText(), paragraph.getTextBlocks(), paragraph
.getHeadline(), manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, paragraph
.getImages()));
.getImages(), fileAttributes));
sectionNumber.incrementAndGet();
}
for (Header header : classifiedDoc.getHeaders()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionSearchableTextPairs.add(processText(classifiedDoc, header.getSearchableText(), header.getTextBlocks(), "Header", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
sectionNumber.incrementAndGet();
}
for (Footer footer : classifiedDoc.getFooters()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
sectionSearchableTextPairs.add(processText(classifiedDoc, footer.getSearchableText(), footer.getTextBlocks(), "Footer", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
sectionNumber.incrementAndGet();
}
for (UnclassifiedText unclassifiedText : classifiedDoc.getUnclassifiedTexts()) {
sectionSearchableTextPairs.add(processText(classifiedDoc, unclassifiedText.getSearchableText(), unclassifiedText
.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>()));
.getTextBlocks(), "", manualRedactions, sectionNumber, dictionary, local, hintsPerSectionNumber, new ArrayList<>(), fileAttributes));
sectionNumber.incrementAndGet();
}
@ -164,7 +165,7 @@ public class EntityRedactionService {
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
Map<Integer, Set<Entity>> hintsPerSectionNumber, List<FileAttribute> fileAttributes) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
@ -229,6 +230,7 @@ public class EntityRedactionService {
.tabularData(tabularData)
.searchableText(searchableRow)
.dictionary(dictionary)
.fileAttributes(fileAttributes)
.build(), searchableRow));
if (!local) {
@ -252,7 +254,8 @@ public class EntityRedactionService {
ManualRedactions manualRedactions,
AtomicInteger sectionNumber, Dictionary dictionary,
boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber) {
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<FileAttribute> fileAttributes) {
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
SearchableText entireTableText = new SearchableText();
@ -296,6 +299,7 @@ public class EntityRedactionService {
.sectionNumber(sectionNumber.intValue())
.searchableText(entireTableText)
.dictionary(dictionary)
.fileAttributes(fileAttributes)
.build(), entireTableText));
if (!local) {
@ -315,7 +319,7 @@ public class EntityRedactionService {
ManualRedactions manualRedactions, AtomicInteger sectionNumber,
Dictionary dictionary, boolean local,
Map<Integer, Set<Entity>> hintsPerSectionNumber,
List<PdfImage> images) {
List<PdfImage> images, List<FileAttribute> fileAttributes) {
if (!local) {
SectionText sectionText = new SectionText();
@ -355,6 +359,7 @@ public class EntityRedactionService {
.images(images.stream()
.map(image -> convert(image, sectionNumber.intValue(), headline))
.collect(Collectors.toSet()))
.fileAttributes(fileAttributes)
.build(), searchableText);
}

View File

@ -1,14 +1,46 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.file.management.v1.api.model.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.model.AnalyzeRequest;
import com.iqser.red.service.redaction.v1.model.AnalyzeResult;
import com.iqser.red.service.redaction.v1.model.Comment;
import com.iqser.red.service.redaction.v1.model.IdRemoval;
import com.iqser.red.service.redaction.v1.model.ManualForceRedact;
import com.iqser.red.service.redaction.v1.model.ManualImageRecategorization;
import com.iqser.red.service.redaction.v1.model.ManualLegalBasisChange;
import com.iqser.red.service.redaction.v1.model.ManualRedactionEntry;
import com.iqser.red.service.redaction.v1.model.ManualRedactions;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.SectionArea;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.model.Section;
import com.iqser.red.service.redaction.v1.server.redaction.model.SectionSearchableTextPair;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -17,14 +49,6 @@ import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@RequiredArgsConstructor
@ -41,6 +65,7 @@ public class ReanalyzeService {
private final AnalyzeResponseService analyzeResponseService;
private final LegalBasisClient legalBasisClient;
public AnalyzeResult analyze(AnalyzeRequest analyzeRequest) {
long startTime = System.currentTimeMillis();
@ -59,18 +84,18 @@ public class ReanalyzeService {
log.info("Document structure analysis successful, starting redaction analysis...");
entityRedactionService.processDocument(classifiedDoc, analyzeRequest.getDossierTemplateId(), analyzeRequest.getManualRedactions(), analyzeRequest
.getDossierId());
.getDossierId(), analyzeRequest.getFileAttributes());
redactionLogCreatorService.createRedactionLog(classifiedDoc, pageCount, analyzeRequest.getManualRedactions(), analyzeRequest
.getDossierTemplateId());
log.info("Redaction analysis successful...");
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(),legalBasis,
classifiedDoc.getDictionaryVersion().getDossierTemplateVersion(),
classifiedDoc.getDictionaryVersion().getDossierVersion(),
classifiedDoc.getRulesVersion(),
legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var redactionLog = new RedactionLog(classifiedDoc.getRedactionLogEntities(), legalBasis, classifiedDoc.getDictionaryVersion()
.getDossierTemplateVersion(), classifiedDoc.getDictionaryVersion()
.getDossierVersion(), classifiedDoc.getRulesVersion(), legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
log.info("Analyzed with rules {} and dictionary {} for dossierTemplate: {}", classifiedDoc.getRulesVersion(), classifiedDoc
.getDictionaryVersion(), analyzeRequest.getDossierTemplateId());
@ -165,14 +190,15 @@ public class ReanalyzeService {
KieContainer kieContainer = droolsExecutionService.updateRules(analyzeRequest.getDossierTemplateId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest
.getDossierId());
List<SectionSearchableTextPair> sectionSearchableTextPairs = new ArrayList<>();
for (SectionText reanalysisSection : reanalysisSections) {
Set<Entity> entities = entityRedactionService.findEntities(reanalysisSection.getSearchableText(), reanalysisSection
.getHeadline(), reanalysisSection.getSectionNumber(), dictionary, false);
if (reanalysisSection.getCellStarts() != null) {
if (reanalysisSection.getCellStarts() != null && !reanalysisSection.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(entities, reanalysisSection.getSearchableText(), dictionary, reanalysisSection
.getCellStarts());
} else {
@ -191,6 +217,7 @@ public class ReanalyzeService {
.searchableText(reanalysisSection.getSearchableText())
.dictionary(dictionary)
.images(reanalysisSection.getImages())
.fileAttributes(analyzeRequest.getFileAttributes())
.build(), reanalysisSection.getSearchableText()));
}
@ -240,11 +267,11 @@ public class ReanalyzeService {
.getDossierTemplateId()));
}
redactionLog.getRedactionLogEntry()
.removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().removeIf(entry -> sectionsToReanalyse.contains(entry.getSectionNumber()));
redactionLog.getRedactionLogEntry().addAll(newRedactionLogEntries);
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
AnalyzeResult analyzeResult = finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionaryIncrement);
analyzeResult.setWasReanalyzed(true);
return analyzeResult;
}
@ -255,6 +282,8 @@ public class ReanalyzeService {
redactionLog.setDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierTemplateVersion());
redactionLog.setDossierDictionaryVersion(dictionaryIncrement.getDictionaryVersion().getDossierVersion());
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
var changeLog = redactionChangeLogService.createAndStoreChangeLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLog);
@ -271,9 +300,9 @@ public class ReanalyzeService {
return new HashSet<>();
}
return Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))
return Stream.concat(manualRedactions.getManualLegalBasisChanges().stream().map(ManualLegalBasisChange::getId),
Stream.concat(manualRedactions.getImageRecategorizations().stream().map(ManualImageRecategorization::getId),
Stream.concat(manualRedactions.getIdsToRemove().stream().map(IdRemoval::getId), manualRedactions.getForceRedacts().stream().map(ManualForceRedact::getId))))
.collect(Collectors.toSet());
}
@ -292,4 +321,18 @@ public class ReanalyzeService {
.build();
}
private void excludeExcludedPages(RedactionLog redactionLog, Set<Integer> excludedPages) {
redactionLog.getRedactionLogEntry().forEach(entry -> {
entry.getPositions().forEach(pos -> {
if (excludedPages != null && excludedPages.contains(pos.getPage())) {
entry.setExcluded(true);
} else {
entry.setExcluded(false);
}
});
});
}
}

View File

@ -90,6 +90,7 @@ public class RedactionChangeLogService {
.comments(entry.getComments())
.changeType(changeType)
.isDossierDictionaryEntry(entry.isDossierDictionaryEntry())
.excluded(entry.isExcluded())
.build();
}

View File

@ -9,19 +9,17 @@ import com.iqser.red.service.redaction.v1.server.parsing.model.TextPositionSeque
import com.iqser.red.service.redaction.v1.server.redaction.model.Entity;
import com.iqser.red.service.redaction.v1.server.redaction.model.EntityPositionSequence;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.ReasonHolder;
import com.iqser.red.service.redaction.v1.server.redaction.utils.IdBuilder;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractTextContainer;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import lombok.RequiredArgsConstructor;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.stream.Collectors;
@Service
@ -69,7 +67,7 @@ public class RedactionLogCreatorService {
RedactionLogEntry redactionLogEntry = RedactionLogEntry.builder()
.id(id)
.color(getColorForImage(image, dossierTemplateId, false))
.color(getColorForImage(image.getType(), dossierTemplateId, false, image.isRedaction()))
.isImage(true)
.type(image.getType())
.redacted(image.isRedaction())
@ -87,59 +85,7 @@ public class RedactionLogCreatorService {
.section(image.getSection())
.build();
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(id)) {
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
image.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = image.getRedactionReason() + ", removed by manual override";
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to remove";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(id)) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
image.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, false));
manualOverrideReason = image.getRedactionReason() + ", forced by manual override";
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = image.getRedactionReason() + ", requested to force redact";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(image, dossierTemplateId, true));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
processImageEntry(manualRedactions, dossierTemplateId, image, redactionLogEntry);
redactionLogEntities.add(redactionLogEntry);
}
@ -147,6 +93,118 @@ public class RedactionLogCreatorService {
return redactionLogEntities;
}
private void processImageEntry(ManualRedactions manualRedactions, String dossierTemplateId, ReasonHolder image, RedactionLogEntry redactionLogEntry) {
if (manualRedactions != null && !manualRedactions.getImageRecategorizations().isEmpty()) {
for (ManualImageRecategorization recategorization : manualRedactions.getImageRecategorizations()) {
if (recategorization.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (recategorization.getStatus().equals(Status.APPROVED)) {
image.setRedaction(recategorization.isRedacted());
redactionLogEntry.setType(recategorization.getType());
redactionLogEntry.setHint(dictionaryService.isHint(recategorization.getType(), dossierTemplateId));
redactionLogEntry.setRedacted(recategorization.isRedacted());
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setLegalBasis(recategorization.getLegalBasis());
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", recategorized by manual override");
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted()));
} else if (recategorization.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to recategorize");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
redactionLogEntry.setRecategorizationType(recategorization.getType());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.RECATEGORIZE);
}
}
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
image.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", removed by manual override");
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted()));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to remove");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
image.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted()));
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", forced by manual override");
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", requested to force redact");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
if (manualRedactions != null && !manualRedactions.getManualLegalBasisChanges().isEmpty()) {
for (ManualLegalBasisChange manualLegalBasisChange : manualRedactions.getManualLegalBasisChanges()) {
if (manualLegalBasisChange.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (manualLegalBasisChange.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", legal basis was manually changed");
redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis());
} else if (manualLegalBasisChange.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(image.getRedactionReason(), ", legal basis change requested");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColorForImage(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
redactionLogEntry.setLegalBasisChangeValue(manualLegalBasisChange.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
image.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : image.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
}
}
}
}
private Set<Integer> getManualRedactionPages(ManualRedactions manualRedactions) {
@ -176,7 +234,6 @@ public class RedactionLogCreatorService {
entityLoop:
for (Entity entity : entities.get(page)) {
List<Comment> comments = null;
for (EntityPositionSequence entityPositionSequence : entity.getPositionSequences()) {
@ -189,60 +246,8 @@ public class RedactionLogCreatorService {
processedIds.add(entityPositionSequence.getId());
}
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(entityPositionSequence.getId())) {
comments = manualRedactions.getComments().get(manualRemoval.getId());
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
entity.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = entity.getRedactionReason() + ", removed by manual override";
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = entity.getRedactionReason() + ", requested to remove";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(entityPositionSequence.getId())) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
entity.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, false));
manualOverrideReason = entity.getRedactionReason() + ", forced by manual override";
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = entity.getRedactionReason() + ", requested to force redact";
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(entity, dossierTemplateId, true));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
entity.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : entity.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
redactionLogEntry.setId(entityPositionSequence.getId());
processRedactionLogEntry(manualRedactions, dossierTemplateId, redactionLogEntry, entity);
if (CollectionUtils.isNotEmpty(entityPositionSequence.getSequences())) {
List<Rectangle> rectanglesPerLine = getRectanglesPerLine(entityPositionSequence.getSequences()
@ -250,16 +255,11 @@ public class RedactionLogCreatorService {
.flatMap(seq -> seq.getTextPositions().stream())
.collect(Collectors.toList()), page);
if (manualRedactions != null) {
comments = manualRedactions.getComments().get(entityPositionSequence.getId());
}
redactionLogEntry.setComments(comments);
redactionLogEntry.getPositions().addAll(rectanglesPerLine);
}
redactionLogEntry.setId(entityPositionSequence.getId());
// FIXME ids should never be null. Figure out why this happens.
if (redactionLogEntry.getId() != null) {
@ -271,25 +271,130 @@ public class RedactionLogCreatorService {
return redactionLogEntities;
}
private void processRedactionLogEntry(ManualRedactions manualRedactions, String dossierTemplateId, RedactionLogEntry redactionLogEntry, ReasonHolder reasonHolder) {
List<Comment> comments = null;
if (manualRedactions != null && !manualRedactions.getIdsToRemove().isEmpty()) {
for (IdRemoval manualRemoval : manualRedactions.getIdsToRemove()) {
if (manualRemoval.getId().equals(redactionLogEntry.getId())) {
comments = manualRedactions.getComments().get(manualRemoval.getId());
String manualOverrideReason = null;
if (manualRemoval.getStatus().equals(Status.APPROVED)) {
reasonHolder.setRedaction(false);
redactionLogEntry.setRedacted(false);
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", removed by manual override");
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted()));
} else if (manualRemoval.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", requested to remove");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.REMOVE);
}
}
}
if (manualRedactions != null && !manualRedactions.getForceRedacts().isEmpty()) {
for (ManualForceRedact manualForceRedact : manualRedactions.getForceRedacts()) {
if (manualForceRedact.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (manualForceRedact.getStatus().equals(Status.APPROVED)) {
reasonHolder.setRedaction(true);
redactionLogEntry.setRedacted(true);
redactionLogEntry.setStatus(Status.APPROVED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, false, redactionLogEntry.isRedacted()));
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", forced by manual override");
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else if (manualForceRedact.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", requested to force redact");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
redactionLogEntry.setLegalBasis(manualForceRedact.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.FORCE_REDACT);
}
}
}
if (manualRedactions != null && !manualRedactions.getManualLegalBasisChanges().isEmpty()) {
for (ManualLegalBasisChange manualLegalBasisChange : manualRedactions.getManualLegalBasisChanges()) {
if (manualLegalBasisChange.getId().equals(redactionLogEntry.getId())) {
String manualOverrideReason = null;
if (manualLegalBasisChange.getStatus().equals(Status.APPROVED)) {
redactionLogEntry.setStatus(Status.APPROVED);
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", legal basis was manually changed");
redactionLogEntry.setLegalBasis(manualLegalBasisChange.getLegalBasis());
} else if (manualLegalBasisChange.getStatus().equals(Status.REQUESTED)) {
manualOverrideReason = mergeReasonIfNecessary(reasonHolder.getRedactionReason(), ", legal basis change requested");
redactionLogEntry.setStatus(Status.REQUESTED);
redactionLogEntry.setColor(getColor(redactionLogEntry.getType(), dossierTemplateId, true, redactionLogEntry.isRedacted()));
redactionLogEntry.setLegalBasisChangeValue(manualLegalBasisChange.getLegalBasis());
} else {
redactionLogEntry.setStatus(Status.DECLINED);
}
reasonHolder.setRedactionReason(manualOverrideReason != null ? manualOverrideReason : reasonHolder.getRedactionReason());
redactionLogEntry.setReason(manualOverrideReason);
redactionLogEntry.setManual(true);
redactionLogEntry.setManualRedactionType(ManualRedactionType.LEGAL_BASIS_CHANGE);
}
}
}
if (manualRedactions != null) {
comments = manualRedactions.getComments().get(redactionLogEntry.getId());
}
redactionLogEntry.setComments(comments);
}
private String mergeReasonIfNecessary(String currentReason, String addition) {
if (currentReason != null) {
if (!currentReason.contains(addition)) {
return currentReason + addition;
}
return currentReason;
} else {
return "";
}
}
private List<Rectangle> getRectanglesPerLine(List<RedTextPosition> textPositions, int page) {
List<Rectangle> rectangles = new ArrayList<>();
if (textPositions.size() == 1) {
rectangles.add( TextPositionSequence.fromData(textPositions, page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions, page).getRectangle());
} else {
float y = textPositions.get(0).getYDirAdj();
int startIndex = 0;
for (int i = 1; i < textPositions.size(); i++) {
float yDirAdj = textPositions.get(i).getYDirAdj();
if (yDirAdj != y) {
rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, i), page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, i), page)
.getRectangle());
y = yDirAdj;
startIndex = i;
}
}
if (startIndex != textPositions.size()) {
rectangles.add( TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page).getRectangle());
rectangles.add(TextPositionSequence.fromData(textPositions.subList(startIndex, textPositions.size()), page)
.getRectangle());
}
}
@ -363,14 +468,14 @@ public class RedactionLogCreatorService {
private RedactionLogEntry createRedactionLogEntry(Entity entity, String dossierTemplateId) {
return RedactionLogEntry.builder()
.color(getColor(entity, dossierTemplateId, false))
.color(getColor(entity.getType(), dossierTemplateId, false, entity.isRedaction()))
.reason(entity.getRedactionReason())
.legalBasis(entity.getLegalBasis())
.value(entity.getWord())
.type(entity.getType())
.redacted(entity.isRedaction())
.isHint(isHint(entity, dossierTemplateId))
.isRecommendation(isRecommendation(entity, dossierTemplateId))
.isHint(isHint(entity.getType(), dossierTemplateId))
.isRecommendation(isRecommendation(entity.getType(), dossierTemplateId))
.section(entity.getHeadline())
.sectionNumber(entity.getSectionNumber())
.matchedRule(entity.getMatchedRule())
@ -384,15 +489,15 @@ public class RedactionLogCreatorService {
}
private float[] getColor(Entity entity, String dossierTemplateId, boolean requestedToRemove) {
private float[] getColor(String type, String dossierTemplateId, boolean requestedToRemove, boolean isRedaction) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor(dossierTemplateId);
}
if (!entity.isRedaction() && !isHint(entity, dossierTemplateId)) {
if (!isRedaction && !isHint(type, dossierTemplateId)) {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
return dictionaryService.getColor(entity.getType(), dossierTemplateId);
return dictionaryService.getColor(type, dossierTemplateId);
}
@ -413,27 +518,27 @@ public class RedactionLogCreatorService {
}
private float[] getColorForImage(Image image, String dossierTemplateId, boolean requestedToRemove) {
private float[] getColorForImage(String type, String dossierTemplateId, boolean requestedToRemove, boolean isRedaction) {
if (requestedToRemove) {
return dictionaryService.getRequestRemoveColor(dossierTemplateId);
}
if (!image.isRedaction() && !dictionaryService.isHint(image.getType(), dossierTemplateId)) {
if (!isRedaction && !dictionaryService.isHint(type, dossierTemplateId)) {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
return dictionaryService.getColor(image.getType(), dossierTemplateId);
return dictionaryService.getColor(type, dossierTemplateId);
}
private boolean isHint(Entity entity, String dossierTemplateId) {
private boolean isHint(String type, String dossierTemplateId) {
return dictionaryService.isHint(entity.getType(), dossierTemplateId);
return dictionaryService.isHint(type, dossierTemplateId);
}
private boolean isRecommendation(Entity entity, String dossierTemplateId) {
private boolean isRecommendation(String type, String dossierTemplateId) {
return dictionaryService.isRecommendation(entity.getType(), dossierTemplateId);
return dictionaryService.isRecommendation(type, dossierTemplateId);
}
@ -480,4 +585,74 @@ public class RedactionLogCreatorService {
}
}
public RedactionLog getRedactionLogPreview(RedactionLog redactionLog, String dossierTemplateId, ManualRedactions manualRedactions) {
var manualRedactionPages = getManualRedactionPages(manualRedactions);
// generate all manual entries
var manualRedactionLogEntries = new HashMap<String, RedactionLogEntry>();
for (var page : manualRedactionPages) {
var pageEntries = addManualAddEntries(manualRedactions.getEntriesToAdd(), manualRedactions.getComments(), page, dossierTemplateId);
for (var entry : pageEntries) {
manualRedactionLogEntries.put(entry.getId(), entry);
}
}
for (var manualEntry : manualRedactionLogEntries.values()) {
var existingEntry = redactionLog.getRedactionLogEntry().stream().filter(e -> e.getId().equals(manualEntry.getId())).findAny();
if (existingEntry.isPresent()) {
// if it has already been processed of sorts, update it
BeanUtils.copyProperties(manualEntry, existingEntry.get());
} else {
// not yet in the redaction-log - add it
redactionLog.getRedactionLogEntry().add(manualEntry);
}
}
for (RedactionLogEntry entry : redactionLog.getRedactionLogEntry()) {
var reasonHolder = new PreviewReasonHolder(entry);
if (entry.isImage()) {
processImageEntry(manualRedactions, dossierTemplateId, reasonHolder, entry);
}
processRedactionLogEntry(manualRedactions, dossierTemplateId, entry, reasonHolder);
}
return redactionLog;
}
public static class PreviewReasonHolder implements ReasonHolder {
private final RedactionLogEntry entry;
public PreviewReasonHolder(RedactionLogEntry entry) {
this.entry = entry;
}
@Override
public String getRedactionReason() {
return entry.getReason();
}
@Override
public void setRedactionReason(String reason) {
entry.setReason(reason);
}
@Override
public boolean isRedaction() {
return entry.isRedacted();
}
@Override
public void setRedaction(boolean value) {
entry.setRedacted(value);
}
}
}

View File

@ -0,0 +1,165 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import java.awt.Graphics;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.util.ArrayList;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.redaction.model.PdfImage;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ImageMergeService {
public List<PdfImage> mergeImages(List<PdfImage> images, int rotation){
List<PdfImage> mergedList = processImages(images, rotation);
List<PdfImage> imagesInImage = new ArrayList<>();
for(PdfImage image: mergedList){
for (PdfImage inner: mergedList){
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
imagesInImage.add(inner);
}
}
}
mergedList.removeAll(imagesInImage);
return mergedList;
}
//merge images, if they are separated during pdf import, return new list of Pdfimages
private List<PdfImage> processImages(List<PdfImage> imageList, int rotation) {
if (imageList.size() > 1) {
List<PdfImage> mergedList = new ArrayList<>();
int countElementsInList = 0;
boolean beginImage = true;
// a List of Boolean, true = candidate for merging, false = no merging
List<Boolean> candidatesList = getCandidatesList(imageList, rotation);
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
for (int i = 0; i < candidatesList.size(); i++) {
if (candidatesList.get(i)) {
if (beginImage) {
//begin of image, merge two parts of imageList
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1), rotation);
// image merge successful
if (mergedImage != null) {
mergedList.add(mergedImage);
countElementsInList++;
}
} else {
//middle of an image, merge current piece auf mergedList with image of imageList
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1), rotation);
// image merge successful
if (mergedImage != null) {
mergedList.set(countElementsInList - 1, mergedImage);
}
}
beginImage = false;
} else {
// if the last candidate is false, then both images i and i+1 must be added
if (i == candidatesList.size() - 1) {
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
mergedList.add(imageList.get(i + 1));
} else {
mergedList.add(imageList.get(i));
mergedList.add(imageList.get(i + 1));
}
} else {
//first image is not splitted, add i to resultlist
if (beginImage) {
mergedList.add(imageList.get(i));
countElementsInList++;
} else {
// i is the end of an image, add begin of new image
mergedList.add(imageList.get(i + 1));
countElementsInList++;
beginImage = false;
}
}
}
}
return mergedList;
} else {
return imageList;
}
}
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2, int rotation) {
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
double width = image1.getPosition().getWidth();
double width2 = image2.getPosition().getWidth();
double height1 = image1.getPosition().getHeight();
double height2 = image2.getPosition().getHeight();
// mit den Werten, die unter Image gespeichert sind, funktioniert es
double img1height = image1.getImage().getHeight();
double img1width = image1.getImage().getWidth();
double img2height = image2.getImage().getHeight();
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
Graphics mergedImageGraphics = mergedImage.getGraphics();
try {
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
// set Image, Position and type for merged Image
//set position for merged image with values of image1 and the height of both
Rectangle2D pos = new Rectangle2D.Float();
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), rotation == 90 ? width + width2: width, rotation == 90 ? height1 : height1 + height2);
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage());
// Graphics need to be disposed
image1.getImage().flush();
image2.getImage().flush();
mergedImage.flush();
mergedImageGraphics.dispose();
return newPdfImage;
} catch (Exception e) {
// failed to merge image
log.error("Failed to merge image", e);
return null;
}
}
//make a list of true and false, if the image is a candidate for merging
private List<Boolean> getCandidatesList(List<PdfImage> imageList, int rotation) {
List<Boolean> candidatesList = new ArrayList<>();
for (int i = 0; i < imageList.size(); i++) {
if (i >= 1) {
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i), rotation));
}
}
return candidatesList;
}
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2, int rotation) {
double x1 = rotation == 90 ? image1.getPosition().getY() : image1.getPosition().getX();
double y1 = rotation == 90 ? image1.getPosition().getX() : image1.getPosition().getY();
double width1 = rotation == 90 ? image1.getPosition().getHeight() : image1.getPosition().getWidth();
double x2 = rotation == 90 ? image2.getPosition().getY() : image2.getPosition().getX();
double y2 = rotation == 90 ? image2.getPosition().getX() : image2.getPosition().getY();
double width2 = rotation == 90 ? image2.getPosition().getHeight() : image2.getPosition().getWidth();
double height2 = rotation == 90 ? image2.getPosition().getWidth() : image2.getPosition().getHeight();
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
// then it is the same picture and has to be merged -> return true
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(rotation == 90 ? y2 - y1 : y1 - y2) && width2 > (height2 / 6);
}
}

View File

@ -1,6 +1,19 @@
package com.iqser.red.service.redaction.v1.server.segmentation;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.Page;
import com.iqser.red.service.redaction.v1.server.classification.model.TextBlock;
@ -15,24 +28,9 @@ import com.iqser.red.service.redaction.v1.server.tableextraction.model.AbstractT
import com.iqser.red.service.redaction.v1.server.tableextraction.model.CleanRulings;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.springframework.stereotype.Service;
import java.awt.Graphics;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
@ -47,13 +45,17 @@ public class PdfSegmentationService {
private final ClassificationService classificationService;
private final SectionsBuilderService sectionsBuilderService;
private final ImageClassificationService imageClassificationService;
private final ImageMergeService imageMergeService;
public Document parseDocument(InputStream documentInputStream) throws IOException {
return parseDocument(documentInputStream, false);
}
public Document parseDocument(InputStream documentInputStream, boolean ignoreImages) throws IOException {
PDDocument pdDocument = null;
try {
//create tempFile
@ -64,7 +66,6 @@ public class PdfSegmentationService {
Document document = new Document();
List<Page> pages = new ArrayList<>();
pdDocument = reinitializePDDocument(tempFile, null);
long pageCount = pdDocument.getNumberOfPages();
@ -101,32 +102,19 @@ public class PdfSegmentationService {
page.setRotation(rotation);
page.setLandscape(isLandscape || isRotated);
page.setPageNumber(pageNumber);
List<PdfImage> mergedList = processImages(stripper.getImages());
List<PdfImage> imagesInImage = new ArrayList<>();
for(PdfImage image: mergedList){
for (PdfImage inner: mergedList){
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
imagesInImage.add(inner);
}
}
}
mergedList.removeAll(imagesInImage);
List<PdfImage> mergedList = imageMergeService.mergeImages(stripper.getImages(), rotation);
page.setImages(mergedList);
tableExtractionService.extractTables(cleanRulings, page);
buildPageStatistics(page);
increaseDocumentStatistics(page, document);
if (!ignoreImages) {
imageClassificationService.classifyImages(page);
}
pages.add(page);
}
document.setPages(pages);
@ -149,7 +137,9 @@ public class PdfSegmentationService {
}
}
private PDDocument reinitializePDDocument(File tempFile, PDDocument pdDocument) throws IOException {
if (pdDocument != null) {
pdDocument.close();
}
@ -164,130 +154,6 @@ public class PdfSegmentationService {
return newPDDocument;
}
//merge images, if they are separated during pdf import, return new list of Pdfimages
private List<PdfImage> processImages(List<PdfImage> imageList) {
if (imageList.size() > 1) {
List<PdfImage> mergedList = new ArrayList<>();
int countElementsInList = 0;
boolean beginImage = true;
// a List of Boolean, true = candidate for merging, false = no merging
List<Boolean> candidatesList = getCandidatesList(imageList);
// loop through list, if there are candidates for merging (true), merge images and add it to mergedList
for (int i = 0; i < candidatesList.size(); i++) {
if (candidatesList.get(i)) {
if (beginImage) {
//begin of image, merge two parts of imageList
PdfImage mergedImage = mergeTwoImages(imageList.get(i), imageList.get(i + 1));
// image merge successful
if (mergedImage != null) {
mergedList.add(mergedImage);
countElementsInList++;
}
} else {
//middle of an image, merge current piece auf mergedList with image of imageList
PdfImage mergedImage = mergeTwoImages(mergedList.get(countElementsInList - 1), imageList.get(i + 1));
// image merge successful
if (mergedImage != null) {
mergedList.set(countElementsInList - 1, mergedImage);
}
}
beginImage = false;
} else {
// if the last candidate is false, then both images i and i+1 must be added
if (i == candidatesList.size() - 1) {
if (countElementsInList > 0 && mergedList.get(countElementsInList - 1) == imageList.get(i)) {
mergedList.add(imageList.get(i + 1));
} else {
mergedList.add(imageList.get(i));
mergedList.add(imageList.get(i + 1));
}
} else {
//first image is not splitted, add i to resultlist
if (beginImage) {
mergedList.add(imageList.get(i));
countElementsInList++;
} else {
// i is the end of an image, add begin of new image
mergedList.add(imageList.get(i + 1));
countElementsInList++;
beginImage = false;
}
}
}
}
return mergedList;
} else {
return imageList;
}
}
private PdfImage mergeTwoImages(PdfImage image1, PdfImage image2) {
// diese Angaben von getPosition scheinen nicht richtig zu sein, damit werden teile des Bildes abgeschnitten
double width = image1.getPosition().getWidth();
double height1 = image1.getPosition().getHeight();
double height2 = image2.getPosition().getHeight();
// mit den Werten, die unter Image gespeichert sind, funktioniert es
double img1height = image1.getImage().getHeight();
double img1width = image1.getImage().getWidth();
double img2height = image2.getImage().getHeight();
BufferedImage mergedImage = new BufferedImage((int) img1width, (int) (img1height + img2height), BufferedImage.TYPE_INT_RGB);
Graphics mergedImageGraphics = mergedImage.getGraphics();
try {
mergedImageGraphics.drawImage(image1.getImage(), 0, 0, null);
mergedImageGraphics.drawImage(image2.getImage(), 0, (int) (img1height), null);
// set Image, Position and type for merged Image
//set position for merged image with values of image1 and the height of both
Rectangle2D pos = new Rectangle2D.Float();
pos.setRect(image1.getPosition().getX(), image2.getPosition().getY(), width, height1 + height2);
PdfImage newPdfImage = new PdfImage(mergedImage, pos, image1.getPage());
// Graphics need to be disposed
image1.getImage().flush();
image2.getImage().flush();
mergedImage.flush();
mergedImageGraphics.dispose();
return newPdfImage;
} catch (Exception e) {
// failed to merge image
log.error("Failed to merge image", e);
return null;
}
}
//make a list of true and false, if the image is a candidate for merging
private List<Boolean> getCandidatesList(List<PdfImage> imageList) {
List<Boolean> candidatesList = new ArrayList<>();
for (int i = 0; i < imageList.size(); i++) {
if (i >= 1) {
candidatesList.add(isCandidateForMerging(imageList.get(i - 1), imageList.get(i)));
}
}
return candidatesList;
}
// evaluate if two images are candidates for merging, depending on their coordinates, width and height
private boolean isCandidateForMerging(PdfImage image1, PdfImage image2) {
double x1 = image1.getPosition().getX();
double y1 = image1.getPosition().getY();
double width1 = image1.getPosition().getWidth();
double x2 = image2.getPosition().getX();
double y2 = image2.getPosition().getY();
double width2 = image2.getPosition().getWidth();
double height2 = image2.getPosition().getHeight();
//if the x-coordinates and widths of images are equal and the height is equal to difference between y-coordinates,
// then it is the same picture and has to be merged -> return true
return x1 == x2 && width1 == width2 && Math.ceil(height2) == Math.ceil(y1 - y2) && width2 > (height2 / 6);
}
private void increaseDocumentStatistics(Page page, Document document) {
@ -319,5 +185,4 @@ public class PdfSegmentationService {
}
}

View File

@ -634,6 +634,9 @@ public class RedactionIntegrationTest {
long start = System.currentTimeMillis();
ClassPathResource pdfFileResource = new ClassPathResource("files/new/Single Study - Oral (Gavage) Mouse.pdf");
AnalyzeRequest request = prepareStorage(pdfFileResource.getInputStream());
request.setExcludedPages(Set.of(1));
request.setFileAttributes(List.of(FileAttribute.builder().id("fileAttributeId").label("Vertebrate Study").placeholder("{fileattributes.vertebrateStudy}").value("true").build()));
AnalyzeResult result = reanalyzeService.analyze(request);
@ -689,8 +692,25 @@ public class RedactionIntegrationTest {
when(dictionaryClient.getDictionaryForType(VERTEBRATE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(getDictionaryResponse(VERTEBRATE, false));
start = System.currentTimeMillis();
ManualRedactions manualRedactions = new ManualRedactions();
manualRedactions.setImageRecategorizations(Set.of(ManualImageRecategorization.builder()
.id("37eee3e9d589a5cc529bfec38c3ba479")
.status(Status.APPROVED)
.type("signature")
.redacted(true)
.legalBasis("Article 39(e)(1) and Article 39(e)(2) of Regulation (EC) No 178/2002")
.build()));
request.setManualRedactions(manualRedactions);
AnalyzeResult reanalyzeResult = reanalyzeService.reanalyze(request);
redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
end = System.currentTimeMillis();
System.out.println("reanalysis analysis duration: " + (end - start));
@ -758,6 +778,7 @@ public class RedactionIntegrationTest {
.status(Status.APPROVED)
.build()));
manualRedactions.getComments().put("e5be0f1d941bbb92a068e198648d06c4", List.of(comment));
manualRedactions.getComments().put("0836727c3508a0b2ea271da69c04cc2f", List.of(comment));
manualRedactions.getComments().put(manualAddId, List.of(comment));
@ -777,14 +798,21 @@ public class RedactionIntegrationTest {
request.setManualRedactions(manualRedactions);
AnalyzeResult result = reanalyzeService.analyze(request);
manualRedactions.getEntriesToAdd().add(manualRedactionEntry);
manualRedactions.setIdsToRemove(Set.of(IdRemoval.builder()
.id("5b940b2cb401ed9f5be6fc24f6e77bcf")
.status(Status.APPROVED)
.build()));
manualRedactions.setManualLegalBasisChanges(Set.of(ManualLegalBasisChange.builder()
.id("675eba69b0c2917de55462c817adaa05")
.legalBasis("Manual Legal Basis Change")
.status(Status.APPROVED)
.build()));
reanalyzeService.reanalyze(request);
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = redactionController.annotate(AnnotateRequest.builder()
.dossierId(TEST_DOSSIER_ID)

View File

@ -151,7 +151,7 @@ public class EntityRedactionServiceTest {
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@ -177,7 +177,7 @@ public class EntityRedactionServiceTest {
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1)).hasSize(7);// 3 author cells, 1 address, 1 Y and 2 N entities
}
@ -202,7 +202,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
@ -210,7 +210,7 @@ public class EntityRedactionServiceTest {
pdfFileResource = new ClassPathResource("files/Compounds/27 A8637C - EU AIR3 - MCP Section 1 - Identity of " +
"the plant protection product.pdf");
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()
.entrySet()
.stream()
@ -235,7 +235,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 9)
@ -302,7 +302,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 6)
@ -341,7 +341,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(dictionaryResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream()
.filter(entity -> entity.getMatchedRule() == 11)
@ -371,7 +371,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(2); // two pages
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(8);
assertThat(classifiedDoc.getEntities().get(2).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(5); // 2 names, 1 address, 2 Y
@ -390,7 +390,7 @@ public class EntityRedactionServiceTest {
when(dictionaryClient.getDictionaryForType(ADDRESS_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(addressResponse);
classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 9).count()).isEqualTo(3);
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(9);
@ -419,7 +419,7 @@ public class EntityRedactionServiceTest {
.build();
when(dictionaryClient.getDictionaryForType(SPONSOR_CODE, TEST_DOSSIER_TEMPLATE_ID, DictionaryResource.GLOBAL_DOSSIER)).thenReturn(sponsorResponse);
Document classifiedDoc = pdfSegmentationService.parseDocument(pdfFileResource.getInputStream());
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId");
entityRedactionService.processDocument(classifiedDoc, TEST_DOSSIER_TEMPLATE_ID, null, "dossierId", null);
assertThat(classifiedDoc.getEntities()).hasSize(1); // one page
assertThat(classifiedDoc.getEntities().get(1).stream().filter(entity -> entity.getMatchedRule() == 8).count()).isEqualTo(6);
}

View File

@ -268,7 +268,7 @@ rule "18: Redact contact information if Producer is found"
rule "19: Redact AUTHOR(S)"
when
Section(searchText.contains("AUTHOR(S):"))
Section(searchText.contains("AUTHOR(S):") && fileAttributeByPlaceholderEquals("{fileattributes.vertebrateStudy}", "true"))
then
section.redactLinesBetween("AUTHOR(S):", "COMPLETION DATE:", "PII", 19, true, "AUTHOR(S) was found", "Reg (EC) No 1107/2009 Art. 63 (2e)");
end