Compare commits

...

9 Commits

Author SHA1 Message Date
Dominique Eiflaender
681f19507f Pull request #338: RED-3327: Ignore recommendations if touches other type
Merge in RED/redaction-service from RED-3327 to release/3.54.x

* commit '07ffd900582ada731a7381d825509ed5e656b70e':
  RED-3327: Ignore recommendations if touches other type
2022-02-01 09:59:39 +01:00
deiflaender
07ffd90058 RED-3327: Ignore recommendations if touches other type 2022-02-01 09:41:21 +01:00
Dominique Eiflaender
0e9082fadc Pull request #322: RED-3190: Ignore if surrounding text can not be calculate because of different parsing order from ui and backend
Merge in RED/redaction-service from RED-3190 to release/3.54.x

* commit '2e64c22a2eefa95a5382497d6acc05b240df6535':
  RED-3190: Ignore if surrounding text can not be calculate because of different parsing order from ui and backend
2022-01-14 12:03:51 +01:00
deiflaender
2e64c22a2e RED-3190: Ignore if surrounding text can not be calculate because of different parsing order from ui and backend 2022-01-14 11:50:52 +01:00
Dominique Eiflaender
687c318e4a Pull request #319: RED-3173: Fixed wrong skipped redaction afer remove and readd to dictionary
Merge in RED/redaction-service from RED-3173 to release/3.54.x

* commit '8e7b4452996a1e67c2c99d3f997162ce8648c162':
  RED-3173: Fixed wrong skipped redaction afer remove and readd to dictionary
2022-01-13 11:16:14 +01:00
deiflaender
8e7b445299 RED-3173: Fixed wrong skipped redaction afer remove and readd to dictionary 2022-01-13 10:55:19 +01:00
Timo Bejan
547380195b Pull request #317: analysis version in redactionlog and changes
Merge in RED/redaction-service from analysis-number-in-redaction-log to release/3.54.x

* commit 'f80d946c5f8231f23546a2028ca0ddf717d1f759':
  analysis version in redactionlog and changes
2022-01-12 19:56:59 +01:00
Timo Bejan
f80d946c5f analysis version in redactionlog and changes 2022-01-12 20:50:00 +02:00
deiflaender
e33e1a5f79 RED-3172: Add AI Entities as recommendations 2022-01-12 13:03:28 +01:00
9 changed files with 68 additions and 66 deletions

View File

@ -26,6 +26,7 @@ public class AnalyzeRequest {
private String dossierTemplateId;
private ManualRedactions manualRedactions;
private OffsetDateTime lastProcessed;
private int analysisNumber;
@Builder.Default
private Set<Integer> excludedPages = new HashSet<>();

View File

@ -28,6 +28,7 @@ public class AnalyzeResult {
private boolean wasReanalyzed;
private int analysisVersion;
private int analysisNumber;
private ManualRedactions manualRedactions;

View File

@ -13,6 +13,7 @@ import lombok.NoArgsConstructor;
@NoArgsConstructor
public class Change {
private int analysisNumber;
private ChangeType type;
private OffsetDateTime dateTime;
}

View File

@ -18,6 +18,11 @@ public class RedactionLog {
*/
private long analysisVersion;
/**
* Which analysis created this redactionLog.
*/
private int analysisNumber;
private List<RedactionLogEntry> redactionLogEntry;
private List<LegalBasis> legalBasis;
@ -26,4 +31,5 @@ public class RedactionLog {
private long rulesVersion = -1;
private long legalBasisVersion = -1;
}

View File

@ -1,40 +1,30 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.iqser.red.service.persistence.service.v1.api.model.annotations.*;
import com.iqser.red.service.persistence.service.v1.api.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.model.Rectangle;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.model.SectionText;
import com.iqser.red.service.redaction.v1.server.classification.model.Text;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.exception.RedactionException;
import com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.redaction.model.Image;
import com.iqser.red.service.redaction.v1.server.redaction.model.PageEntities;
import com.iqser.red.service.redaction.v1.server.redaction.model.RedRectangle2D;
import com.iqser.red.service.redaction.v1.server.redaction.model.*;
import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUtils;
import com.iqser.red.service.redaction.v1.server.segmentation.PdfSegmentationService;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.kie.api.runtime.KieContainer;
import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Service
@ -74,7 +64,7 @@ public class AnalyzeService {
List<SectionText> sectionTexts = sectionTextBuilderService.buildSectionText(classifiedDoc);
sectionGridCreatorService.createSectionGrid(classifiedDoc, pageCount);
Text text = new Text(pageCount,sectionTexts);
Text text = new Text(pageCount, sectionTexts);
// enhance section grid with headline data
sectionTexts.forEach(sectionText -> classifiedDoc
@ -97,7 +87,7 @@ public class AnalyzeService {
var text = redactionStorageService.getText(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
@ -116,10 +106,9 @@ public class AnalyzeService {
.getDossierTemplateId());
var legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), redactionLogEntries, legalBasis, dictionary
.getVersion()
.getDossierTemplateVersion(), dictionary.getVersion()
.getDossierVersion(), rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
var redactionLog = new RedactionLog(redactionServiceSettings.getAnalysisVersion(), analyzeRequest.getAnalysisNumber(),
redactionLogEntries, legalBasis, dictionary.getVersion().getDossierTemplateVersion(), dictionary.getVersion().getDossierVersion(),
rulesVersion, legalBasisClient.getVersion(analyzeRequest.getDossierTemplateId()));
return finalizeAnalysis(analyzeRequest, startTime, redactionLog, text, dictionary.getVersion(), false);
}
@ -149,7 +138,7 @@ public class AnalyzeService {
}
var nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
if(redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null){
if (redactionServiceSettings.isEnableEntityRecognition() && nerEntities == null) {
nerAnalyserService.computeNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
nerEntities = redactionStorageService.getNerEntities(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
}
@ -214,7 +203,8 @@ public class AnalyzeService {
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), redactionLog);
var redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(), analyzeRequest.getFileId(),
redactionLog, analyzeRequest.getAnalysisNumber());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange
.getRedactionLog());
@ -227,6 +217,7 @@ public class AnalyzeService {
.numberOfPages(text.getNumberOfPages())
.hasUpdates(redactionLogChange.isHasChanges())
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
.analysisNumber(analyzeRequest.getAnalysisNumber())
.rulesVersion(redactionLog.getRulesVersion())
.dictionaryVersion(redactionLog.getDictionaryVersion())
.legalBasisVersion(redactionLog.getLegalBasisVersion())
@ -243,12 +234,12 @@ public class AnalyzeService {
}
return Stream.concat(manualRedactions.getLegalBasisChanges()
.stream()
.map(ManualLegalBasisChange::getAnnotationId), Stream.concat(manualRedactions.getImageRecategorization()
.stream()
.map(ManualImageRecategorization::getAnnotationId), Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getAnnotationId), manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))
.stream()
.map(ManualLegalBasisChange::getAnnotationId), Stream.concat(manualRedactions.getImageRecategorization()
.stream()
.map(ManualImageRecategorization::getAnnotationId), Stream.concat(manualRedactions.getIdsToRemove()
.stream()
.map(IdRemoval::getAnnotationId), manualRedactions.getForceRedactions().stream().map(ManualForceRedaction::getAnnotationId))))
.collect(Collectors.toSet());
}

View File

@ -1,5 +1,7 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static com.iqser.red.service.redaction.v1.server.redaction.model.Dictionary.RECOMMENDATION_PREFIX;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@ -92,7 +94,7 @@ public class EntityRedactionService {
.collect(Collectors.toList());
// only approved id removals, that haven't been forced back afterwards
var idsToRemove = analyzeRequest.getManualRedactions().getIdsToRemove().stream()
.filter(idr -> idr.getStatus() == AnnotationStatus.APPROVED)
.filter(idr -> idr.getStatus() == AnnotationStatus.APPROVED && !idr.isRemoveFromDictionary())
.filter(idr -> idr.getRequestDate() != null)
.filter(idr -> approvedForceRedactions.stream().noneMatch(forceRedact -> forceRedact.getRequestDate().isAfter(idr.getRequestDate())))
.map(IdRemoval::getAnnotationId).collect(Collectors.toSet());
@ -263,7 +265,7 @@ public class EntityRedactionService {
.containsKey(sectionNumber)) {
nerEntities.getResult().get(sectionNumber).forEach(res -> {
if (cellstarts == null || cellstarts.isEmpty()) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
nerValuesPerType.computeIfAbsent(RECOMMENDATION_PREFIX + res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
} else {
boolean intersectsCellStart = false;
@ -273,7 +275,7 @@ public class EntityRedactionService {
}
}
if (!intersectsCellStart) {
nerValuesPerType.computeIfAbsent(res.getType(), (a) -> new HashSet<>())
nerValuesPerType.computeIfAbsent(RECOMMENDATION_PREFIX + res.getType(), (a) -> new HashSet<>())
.add(new String(Base64.decodeBase64(res.getValue().getBytes())));
}
}

View File

@ -22,7 +22,9 @@ import com.iqser.red.service.redaction.v1.server.redaction.utils.EntitySearchUti
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
public class ManualRedactionSurroundingTextService {
@ -88,6 +90,11 @@ public class ManualRedactionSurroundingTextService {
Entity correctEntity = getEntityOnCorrectPosition(entitiesWithPositions, toFindPositions);
if(correctEntity == null){
log.warn("Could not calculate surrounding text");
return Pair.of("","");
}
if (sectionText.getCellStarts() != null && !sectionText.getCellStarts().isEmpty()) {
surroundingWordsService.addSurroundingText(Set.of(correctEntity), sectionText.getSearchableText(), null, sectionText
.getCellStarts());

View File

@ -1,25 +1,14 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.model.Change;
import com.iqser.red.service.redaction.v1.model.ChangeType;
import com.iqser.red.service.redaction.v1.model.RedactionLog;
import com.iqser.red.service.redaction.v1.model.RedactionLogChanges;
import com.iqser.red.service.redaction.v1.model.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.*;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
@Service
@ -29,7 +18,7 @@ public class RedactionChangeLogService {
private final RedactionStorageService redactionStorageService;
public RedactionLogChanges computeChanges(String dossierId, String fileId, RedactionLog currentRedactionLog) {
public RedactionLogChanges computeChanges(String dossierId, String fileId, RedactionLog currentRedactionLog, int analysisNumber) {
long start = System.currentTimeMillis();
@ -37,7 +26,7 @@ public class RedactionChangeLogService {
if (previousRedactionLog == null) {
currentRedactionLog.getRedactionLogEntry().forEach(entry -> {
entry.getChanges().add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, OffsetDateTime.now()));
});
return new RedactionLogChanges(currentRedactionLog, false);
}
@ -50,17 +39,16 @@ public class RedactionChangeLogService {
.equals(ChangeType.REMOVED))
.collect(Collectors.toList());
Set<RedactionLogEntry> added = new HashSet<>(currentRedactionLog.getRedactionLogEntry().stream()
Set<RedactionLogEntry> added = currentRedactionLog.getRedactionLogEntry().stream()
.filter(entry -> entry.getChanges().isEmpty() || !entry.getChanges()
.get(entry.getChanges().size() - 1)
.getType()
.equals(ChangeType.REMOVED))
.collect(Collectors.toList()));
.equals(ChangeType.REMOVED)).collect(Collectors.toSet());
added.removeAll(notRemovedPreviousEntries);
notRemovedPreviousEntries.forEach(added::remove);
Set<RedactionLogEntry> removed = new HashSet<>(notRemovedPreviousEntries);
removed.removeAll(currentRedactionLog.getRedactionLogEntry());
currentRedactionLog.getRedactionLogEntry().forEach(removed::remove);
Map<String, RedactionLogEntry> addedIds = new HashMap<>();
added.forEach(entry -> {
@ -78,16 +66,16 @@ public class RedactionChangeLogService {
newRedactionLogEntries.forEach(entry -> {
if (removedIds.contains(entry.getId()) && addedIds.containsKey(entry.getId())) {
List<Change> changes = entry.getChanges();
changes.add(new Change(ChangeType.CHANGED, OffsetDateTime.now()));
changes.add(new Change(analysisNumber, ChangeType.CHANGED, OffsetDateTime.now()));
var newEntry = addedIds.get(entry.getId());
newEntry.setChanges(changes);
addedIds.put(entry.getId(), newEntry);
toRemove.add(entry);
} else if (removedIds.contains(entry.getId())) {
entry.getChanges().add(new Change(ChangeType.REMOVED, OffsetDateTime.now()));
entry.getChanges().add(new Change(analysisNumber, ChangeType.REMOVED, OffsetDateTime.now()));
} else if (addedIds.containsKey(entry.getId())) {
List<Change> changes = entry.getChanges();
changes.add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
changes.add(new Change(analysisNumber, ChangeType.ADDED, OffsetDateTime.now()));
var newEntry = addedIds.get(entry.getId());
newEntry.setChanges(changes);
addedIds.put(entry.getId(), newEntry);
@ -98,8 +86,8 @@ public class RedactionChangeLogService {
newRedactionLogEntries.removeAll(toRemove);
addedIds.forEach((k, v) -> {
if(v.getChanges().isEmpty()) {
v.getChanges().add(new Change(ChangeType.ADDED, OffsetDateTime.now()));
if (v.getChanges().isEmpty()) {
v.getChanges().add(new Change(analysisNumber,ChangeType.ADDED, OffsetDateTime.now()));
}
newRedactionLogEntries.add(v);
});

View File

@ -128,7 +128,12 @@ public class EntitySearchUtils {
for (Entity inner : entities) {
if (inner.getWord().length() < word.getWord()
.length() && inner.getStart() >= word.getStart() && inner.getEnd() <= word.getEnd() && word != inner && word.getSectionNumber() == inner.getSectionNumber()) {
wordsToRemove.add(inner);
// FIXME this is workaround for RED-3327 and should be removed in the future.
if(word.getType().contains("recommendation_") && !inner.getType().contains("recommendation_")) {
wordsToRemove.add(word);
} else {
wordsToRemove.add(inner);
}
}
}
}