RED-6009: Document Tree Structure

This commit is contained in:
Kilian Schüttler 2023-07-12 18:40:04 +02:00
parent 63f38a8708
commit 83776b6685
18 changed files with 276 additions and 91 deletions

View File

@ -9,13 +9,17 @@ import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Point;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
@ -29,15 +33,18 @@ import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class RedactionLogEntryAdapter {
public class CustomEntityCreationAdapter {
private static final double MATCH_THRESHOLD = 1;
private final EntityCreationService entityCreationService;
@Autowired
public RedactionLogEntryAdapter(EntityEnrichmentService entityEnrichmentService) {
public CustomEntityCreationAdapter(EntityEnrichmentService entityEnrichmentService) {
entityCreationService = new EntityCreationService(entityEnrichmentService);
}
@ -46,24 +53,80 @@ public class RedactionLogEntryAdapter {
public Stream<RedactionEntity> toRedactionEntity(RedactionLog redactionLog, SemanticNode node) {
List<Integer> pageNumbers = redactionLog.getRedactionLogEntry().stream().flatMap(entry -> entry.getPositions().stream().map(Rectangle::getPage)).distinct().toList();
Set<String> entryValues = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet());
Map<String, List<RedactionEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
assert allValuesFound(tempEntitiesByValue, entryValues);
List<RedactionEntity> entities = redactionLog.getRedactionLogEntry()
.stream()
.map(entry -> findClosestEntity(entry, tempEntitiesByValue).map(tempEntity -> createCorrectEntity(entry, node, tempEntity)))
.filter(Optional::isPresent)
.map(Optional::get)
.toList();
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph);
return entities.stream();
}
private Optional<RedactionEntity> findClosestEntity(RedactionLogEntry entry, Map<String, List<RedactionEntity>> tempEntitiesByValue) {
List<RedactionEntity> possibleEntities = tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT));
if (possibleEntities == null || possibleEntities.isEmpty()) {
log.warn("Entity could not be created for manual add entry: {}, due to the string not being found.", entry);
return Optional.empty();
}
return findClosestRedactionEntity(entry.getPositions(), possibleEntities);
}
public void createRedactionEntities(Set<ManualRedactionEntry> manualRedactionEntries, SemanticNode node) {
List<Integer> pageNumbers = manualRedactionEntries.stream()
.flatMap(entry -> entry.getPositions().stream().map(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle::getPage))
.distinct()
.toList();
Set<String> entryValues = manualRedactionEntries.stream().map(ManualRedactionEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet());
Map<String, List<RedactionEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
manualRedactionEntries.forEach(entry -> {
List<RedactionEntity> possibleEntities = tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT));
if (possibleEntities == null || possibleEntities.isEmpty()) {
log.warn("Entity could not be created for manual add entry: {}, due to the string not being found.", entry);
return;
}
List<Rectangle> originalPositions = entry.getPositions().stream().map(CustomEntityCreationAdapter::toRectangle).toList();
findClosestRedactionEntity(originalPositions, possibleEntities).ifPresent(closestEntity -> createCorrectEntity(entry, node, closestEntity));
});
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph);
}
private static Rectangle toRectangle(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rectangle) {
return new Rectangle(new Point(rectangle.getTopLeftX(), rectangle.getTopLeftY()), rectangle.getWidth(), rectangle.getHeight(), rectangle.getPage());
}
private Map<String, List<RedactionEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List<Integer> pageNumbers, Set<String> entryValues) {
if (!pageNumbers.stream().allMatch(node::isOnPage)) {
throw new IllegalArgumentException(format("SemanticNode %s does not contain these pages %s present in the redaction log",
node,
pageNumbers.stream().filter(pageNumber -> !node.isOnPage(pageNumber)).toList()));
}
Set<String> entryValues = redactionLog.getRedactionLogEntry().stream().map(RedactionLogEntry::getValue).map(String::toLowerCase).collect(Collectors.toSet());
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
Map<String, List<RedactionEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValueIgnoringCase(node, searchImplementation);
assert allValuesFound(tempEntitiesByValue, entryValues);
List<RedactionEntity> entities = redactionLog.getRedactionLogEntry()
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
.stream()
.map(entry -> findClosestRedactionEntity(entry, tempEntitiesByValue.get(entry.getValue().toLowerCase(Locale.ROOT)), node))
.toList();
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(RedactionEntity::removeFromGraph);
return entities.stream();
.map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node))
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
@ -73,31 +136,24 @@ public class RedactionLogEntryAdapter {
}
private Map<String, List<RedactionEntity>> findAllPossibleEntitiesAndGroupByValueIgnoringCase(SemanticNode node, SearchImplementation searchImplementation) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getBoundary())
.stream()
.map(boundary -> entityCreationService.forceByBoundary(boundary, "temp", EntityType.ENTITY, node))
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
private RedactionEntity findClosestRedactionEntity(RedactionLogEntry redactionLogEntry, List<RedactionEntity> entitiesWithSameValue, SemanticNode node) {
private Optional<RedactionEntity> findClosestRedactionEntity(List<Rectangle> originalPositions, List<RedactionEntity> entitiesWithSameValue) {
RedactionEntity closestEntity = entitiesWithSameValue.stream()
.filter(entity -> pagesMatch(entity, redactionLogEntry))
.min(Comparator.comparingDouble(entity -> calculateMinDistance(redactionLogEntry, entity)))
.orElseThrow(() -> new NotFoundException(format("No entity with similar position found for %s", redactionLogEntry)));
.filter(entity -> pagesMatch(entity, originalPositions))
.min(Comparator.comparingDouble(entity -> calculateMinDistance(originalPositions, entity)))
.orElseThrow(() -> new NotFoundException(format("No entity with similar position found for %s", originalPositions)));
double distance = calculateMinDistance(redactionLogEntry, closestEntity);
double distance = calculateMinDistance(originalPositions, closestEntity);
if (distance > MATCH_THRESHOLD) {
throw new NotFoundException(format("Distance to closest found entity is %.2f for \n%s \n%s",
log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s",
distance,
redactionLogEntry.getPositions(),
MATCH_THRESHOLD,
originalPositions,
closestEntity.getRedactionPositionsPerPage()));
return Optional.empty();
}
return createCorrectEntity(redactionLogEntry, node, closestEntity);
return Optional.of(closestEntity);
}
@ -119,20 +175,30 @@ public class RedactionLogEntryAdapter {
}
private static boolean pagesMatch(RedactionEntity entity, RedactionLogEntry redactionLogEntry) {
private RedactionEntity createCorrectEntity(ManualRedactionEntry redactionLogEntry, SemanticNode node, RedactionEntity closestEntity) {
RedactionEntity correctEntity = entityCreationService.forceByBoundary(closestEntity.getBoundary(), redactionLogEntry.getType(), EntityType.ENTITY, node);
correctEntity.force("MAN.0.0", redactionLogEntry.getReason(), redactionLogEntry.getLegalBasis());
return correctEntity;
}
private static boolean pagesMatch(RedactionEntity entity, List<Rectangle> originalPositions) {
Set<Integer> entityPageNumbers = entity.getRedactionPositionsPerPage().stream().map(RedactionPosition::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> redactionLogEntryPageNumbers = redactionLogEntry.getPositions().stream().map(Rectangle::getPage).collect(Collectors.toSet());
Set<Integer> redactionLogEntryPageNumbers = originalPositions.stream().map(Rectangle::getPage).collect(Collectors.toSet());
return entityPageNumbers.equals(redactionLogEntryPageNumbers);
}
private double calculateMinDistance(RedactionLogEntry redactionLogEntry, RedactionEntity entity) {
private double calculateMinDistance(List<Rectangle> originalPositions, RedactionEntity entity) {
if (redactionLogEntry.getPositions().size() != countRectangles(entity)) {
if (originalPositions.size() != countRectangles(entity)) {
return Double.MAX_VALUE;
}
return redactionLogEntry.getPositions().stream().mapToDouble(redactionLogEntryRectangle -> calculateMinDistancePerRectangle(entity, redactionLogEntryRectangle)).sum();
return originalPositions.stream().mapToDouble(redactionLogEntryRectangle -> calculateMinDistancePerRectangle(entity, redactionLogEntryRectangle)).sum();
}
@ -142,14 +208,14 @@ public class RedactionLogEntryAdapter {
}
private double calculateMinDistancePerRectangle(RedactionEntity entity, Rectangle redactionLogEntryRectangle) {
private double calculateMinDistancePerRectangle(RedactionEntity entity, Rectangle originalRectangle) {
return entity.getRedactionPositionsPerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == redactionLogEntryRectangle.getPage())
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == originalRectangle.getPage())
.map(RedactionPosition::getRectanglePerLine)
.flatMap(Collection::stream)
.mapToDouble(rectangle -> calculateDistance(rectangle, toRectangle2D(redactionLogEntryRectangle)))
.mapToDouble(rectangle -> calculateDistance(rectangle, toRectangle2D(originalRectangle)))
.min()
.orElse(Double.MAX_VALUE);
}

View File

@ -165,6 +165,7 @@ public class SectionNodeFactory {
@SuppressWarnings("PMD")
// experimental feature to be used later
private List<TextPageBlock> findTextBlocksWithSameClassificationAndAlignsYAndSameOrientationUntilConvergence(TextPageBlock originalTextBlocks,
List<AbstractPageBlock> pageBlocks) {

View File

@ -2,16 +2,21 @@ package com.iqser.red.service.redaction.v1.server.redaction.model.dictionary;
import static java.lang.String.format;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import com.iqser.red.service.redaction.v1.server.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.redaction.utils.Patterns;
@ -48,7 +53,7 @@ public class Dictionary {
public boolean hasLocalEntries() {
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntries().isEmpty());
return dictionaryModels.stream().anyMatch(dm -> !dm.getLocalEntriesWithMatchedRules().isEmpty());
}
@ -88,7 +93,7 @@ public class Dictionary {
}
public void addLocalDictionaryEntry(String type, String value, boolean alsoAddLastname) {
private void addLocalDictionaryEntry(String type, String value, Collection<MatchedRule> matchedRules, boolean alsoAddLastname) {
if (value.isBlank()) {
return;
@ -96,23 +101,30 @@ public class Dictionary {
if (localAccessMap.get(type) == null) {
throw new IllegalArgumentException(format("DictionaryModel of type %s does not exist", type));
}
if (localAccessMap.get(type).getLocalEntries() == null) {
if (localAccessMap.get(type).getLocalEntriesWithMatchedRules() == null) {
throw new IllegalArgumentException(format("DictionaryModel of type %s has no local Entries", type));
}
if (StringUtils.isEmpty(value)) {
throw new IllegalArgumentException(format("%s is not a valid dictionary entry", value));
}
localAccessMap.get(type).getLocalEntries().add(value.trim());
Set<MatchedRule> matchedRulesSet = new HashSet<>(matchedRules);
localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(value.trim(), matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
if (alsoAddLastname) {
String lastname = value.split(" ")[0];
localAccessMap.get(type).getLocalEntries().add(lastname);
localAccessMap.get(type).getLocalEntriesWithMatchedRules().merge(lastname, matchedRulesSet, (set1, set2) -> Stream.concat(set1.stream(), set2.stream()).collect(Collectors.toSet()));
}
}
public void addLocalDictionaryEntry(RedactionEntity redactionEntity) {
public void recommendEverywhere(RedactionEntity redactionEntity) {
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), false);
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), false);
}
public void recommendEverywhereWithLastNameSeparately(RedactionEntity redactionEntity) {
addLocalDictionaryEntry(redactionEntity.getType(), redactionEntity.getValue(), redactionEntity.getMatchedRuleList(), true);
}
@ -125,7 +137,7 @@ public class Dictionary {
while (matcher.find()) {
String match = matcher.group().trim();
if (match.length() >= 3) {
addLocalDictionaryEntry(redactionEntity.getType(), match, true);
addLocalDictionaryEntry(redactionEntity.getType(), match, redactionEntity.getMatchedRuleList(), true);
}
}
}

View File

@ -1,12 +1,13 @@
package com.iqser.red.service.redaction.v1.server.redaction.model.dictionary;
import java.io.Serializable;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntry;
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntryModel;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.MatchedRule;
import lombok.AllArgsConstructor;
import lombok.Data;
@ -28,9 +29,9 @@ public class DictionaryModel implements Serializable {
private transient SearchImplementation entriesSearch;
private transient SearchImplementation falsePositiveSearch;
private transient SearchImplementation falseRecommendationsSearch;
private transient SearchImplementation localSearch;
private final Set<String> localEntries = new HashSet<>();
private final HashMap<String, Set<MatchedRule>> localEntriesWithMatchedRules = new HashMap<>();
private transient SearchImplementation localSearch;
public DictionaryModel(String type,
@ -68,8 +69,8 @@ public class DictionaryModel implements Serializable {
public SearchImplementation getLocalSearch() {
if (this.localSearch == null) {
this.localSearch = new SearchImplementation(this.localEntries, caseInsensitive);
if (this.localSearch == null || this.localSearch.getValues().size() != this.localEntriesWithMatchedRules.size()) {
this.localSearch = new SearchImplementation(this.localEntriesWithMatchedRules.keySet(), caseInsensitive);
}
return this.localSearch;
}

View File

@ -47,7 +47,12 @@ public class SearchImplementation {
if (this.values.size() == 1) {
var text = this.values.iterator().next();
this.pattern = Pattern.compile(Pattern.quote(ignoreCase ? text.toLowerCase(Locale.ROOT) : text));
if (this.ignoreCase) {
this.pattern = Pattern.compile(Pattern.quote(text.toLowerCase(Locale.ROOT)), Pattern.CASE_INSENSITIVE);
} else {
this.pattern = Pattern.compile(Pattern.quote(text));
}
} else {
var builder = Trie.builder();
if (this.ignoreCase) {

View File

@ -14,6 +14,7 @@ import org.springframework.stereotype.Service;
import org.springframework.web.bind.annotation.RequestBody;
import com.iqser.gin4.commons.metrics.meters.FunctionTimerValues;
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntryModel;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
@ -44,6 +45,7 @@ import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntities;
import com.iqser.red.service.redaction.v1.server.redaction.adapter.NerEntitiesAdapter;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.settings.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
@ -143,6 +145,10 @@ public class AnalyzeService {
long rulesVersion = droolsExecutionService.getRulesVersion(analyzeRequest.getDossierTemplateId());
log.info("Updated Rules to Version {} for file {} in dossier {}", rulesVersion, analyzeRequest.getFileId(), analyzeRequest.getDossierId());
if (analyzeRequest.getManualRedactions() != null) {
entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document);
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
entityRedactionService.addDictionaryEntities(dictionary, document);
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -215,7 +221,14 @@ public class AnalyzeService {
analyzeRequest.getFileId(),
analyzeRequest.getDossierId());
if (analyzeRequest.getManualRedactions() != null) {
entityRedactionService.addManualAddRedactionEntities(analyzeRequest.getManualRedactions().getEntriesToAdd(), document);
log.info("Added Manual redaction entries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
}
Dictionary dictionary = dictionaryService.getDeepCopyDictionary(analyzeRequest.getDossierTemplateId(), analyzeRequest.getDossierId());
log.info("Updated Dictionaries for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
sectionsToReAnalyse.forEach(node -> entityRedactionService.addDictionaryEntities(dictionary, node));
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());

View File

@ -1,12 +1,21 @@
package com.iqser.red.service.redaction.v1.server.redaction.service;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toList;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.kie.api.KieServices;
@ -21,6 +30,9 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.IdRemoval;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualForceRedaction;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.exception.RulesValidationException;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
@ -99,10 +111,9 @@ public class DroolsExecutionService {
if (manualRedactions != null) {
manualRedactions.getResizeRedactions().forEach(kieSession::insert);
manualRedactions.getForceRedactions().forEach(kieSession::insert);
manualRedactions.getIdsToRemove().forEach(kieSession::insert);
manualRedactions.getImageRecategorization().forEach(kieSession::insert);
manualRedactions.getEntriesToAdd().forEach(kieSession::insert);
insertLatestForceOrRemovalPerAnnotationId(manualRedactions.getForceRedactions(), manualRedactions.getIdsToRemove(), kieSession);
}
kieSession.insert(nerEntities);
@ -115,6 +126,23 @@ public class DroolsExecutionService {
}
private static void insertLatestForceOrRemovalPerAnnotationId(Collection<ManualForceRedaction> forceRedactions, Collection<IdRemoval> idRemovals, KieSession kieSession) {
Stream.concat(forceRedactions.stream(), idRemovals.stream())
.filter(BaseAnnotation::isApproved)
.filter(baseAnnotation -> baseAnnotation.getRequestDate() != null)
.collect(groupingBy(BaseAnnotation::getAnnotationId, sortByRequestDate()))
.values()
.forEach(baseAnnotations -> kieSession.insert(baseAnnotations.get(0)));
}
private static Collector<BaseAnnotation, Object, List<BaseAnnotation>> sortByRequestDate() {
return collectingAndThen(toList(), originList -> originList.stream().sorted(Comparator.comparing(BaseAnnotation::getRequestDate).reversed()).toList());
}
public List<FileAttribute> getFileAttributes(KieSession kieSession) {
List<FileAttribute> fileAttributes = new LinkedList<>();

View File

@ -9,7 +9,9 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.SemanticNode;
@ -32,6 +34,7 @@ public class EntityRedactionService {
DroolsExecutionService droolsExecutionService;
EntityEnrichmentService entityEnrichmentService;
CustomEntityCreationAdapter customEntityCreationAdapter;
public Set<FileAttribute> addRuleEntities(Dictionary dictionary, Document document, KieContainer kieContainer, AnalyzeRequest analyzeRequest, NerEntities nerEntities) {
@ -70,6 +73,11 @@ public class EntityRedactionService {
return allFileAttributes.stream().filter(fileAttribute -> !analyzeRequest.getFileAttributes().contains(fileAttribute)).collect(Collectors.toUnmodifiableSet());
}
public void addManualAddRedactionEntities(Set<ManualRedactionEntry> manualRedactionEntries, Document document) {
customEntityCreationAdapter.createRedactionEntities(manualRedactionEntries, document);
}
public void addDictionaryEntities(Dictionary dictionary, SemanticNode node) {

View File

@ -1,6 +1,7 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import static org.wildfly.common.Assert.assertFalse;
import static org.wildfly.common.Assert.assertTrue;
import java.io.FileOutputStream;
@ -109,6 +110,7 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
var publishedInformationEntry1 = findEntityByTypeAndValue(redactionLog, "published_information", "Oxford University Press").findFirst().orElseThrow();
var asyaLyon1 = findEntityByTypeAndValueAndSectionNumber(redactionLog, "CBI_author", "Asya Lyon", publishedInformationEntry1.getSectionNumber()).findFirst().orElseThrow();
// works in intellij, but not mvn install, but it works in UI so idk...
// assertFalse(asyaLyon1.isRedacted());
var idRemoval = buildIdRemoval(publishedInformationEntry1.getId());
@ -159,18 +161,20 @@ public class RedactionAcceptanceTest extends AbstractRedactionIntegrationTest {
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
System.out.println("Finished structure analysis");
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
RedactionLogEntry desireeEtAl = findEntityByTypeAndValue(redactionLog, "CBI_author", "Desiree").filter(e -> e.getMatchedRule().startsWith("CBI.16"))
RedactionLogEntry desireeEtAl = findEntityByTypeAndValue(redactionLog, "CBI_author", "Desiree").filter(e -> !e.isRecommendation()).filter(e -> e.getMatchedRule().startsWith("CBI.16"))
.findAny()
.orElseThrow();
IdRemoval removal = buildIdRemoval(desireeEtAl.getId());
request.setManualRedactions(ManualRedactions.builder().idsToRemove(Set.of(removal)).build());
analyzeService.reanalyze(request);
System.out.println("Finished reanalysis");
var redactionLog2 = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
assertTrue(findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").noneMatch(e -> e.getMatchedRule().startsWith("CBI.16")));
assertTrue(findEntityByTypeAndValue(redactionLog2, "CBI_author", "Desiree").filter(entry -> !entry.isRecommendation()).noneMatch(RedactionLogEntry::isRedacted));
}

View File

@ -26,7 +26,7 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.model.StructureAnalyzeRequest;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.RedactionLogEntryAdapter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.classification.adapter.CustomEntityCreationAdapter;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.DocumentData;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.data.mapper.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
@ -41,7 +41,7 @@ public class MigrationPocTest extends BuildDocumentIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@Autowired
private RedactionLogEntryAdapter redactionLogAdapter;
private CustomEntityCreationAdapter redactionLogAdapter;
@Autowired
private RedactionLogCreatorService redactionLogCreatorService;

View File

@ -0,0 +1,34 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.entity.RedactionEntity;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.graph.nodes.Document;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.layoutparsing.document.services.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.redaction.model.dictionary.SearchImplementation;
public class SearchImplementationTest extends BuildDocumentIntegrationTest {
@Autowired
private EntityEnrichmentService entityEnrichmentService;
@Test
public void testSearchImplementationWithSingleEntry() {
Document document = buildGraph("files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections");
SearchImplementation searchImplementation = new SearchImplementation(List.of("mydossierredaction"), true);
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
List<RedactionEntity> entities = entityCreationService.bySearchImplementation(searchImplementation, "dossier_redaction", EntityType.ENTITY, document).toList();
assertEquals(2, entities.size());
}
}

View File

@ -224,7 +224,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -237,7 +237,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -270,7 +270,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -283,7 +283,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -667,7 +667,11 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList();
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -542,7 +542,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -555,7 +555,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -621,7 +621,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -634,7 +634,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -1363,7 +1363,11 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList();
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -1141,11 +1141,11 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -151,11 +151,11 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
entity.addEngine(Engine.RULE);
insert(entity);
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -373,7 +373,7 @@ rule "CBI.16.0: Add CBI_author with \"et al.\" Regex (non vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.0", "Author found by \"et al\" regex", "Article 39(e)(3) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -386,7 +386,7 @@ rule "CBI.16.1: Add CBI_author with \"et al.\" Regex (vertebrate study)"
entityCreationService.byRegex("\\b([A-ZÄÖÜ][^\\s\\.,]+( [A-ZÄÖÜ]{1,2}\\.?)?( ?[A-ZÄÖÜ]\\.?)?) et al\\.?", "CBI_author", EntityType.ENTITY, 1, $section)
.forEach(entity -> {
entity.apply("CBI.16.1", "Author found by \"et al\" regex", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry("CBI_author", entity.getValue(), false);
dictionary.recommendEverywhere(entity);
});
end
@ -452,7 +452,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -465,7 +465,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -1077,7 +1077,11 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document).toList();
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -194,11 +194,12 @@ rule "LDS.0.0: run local dictionary search"
agenda-group "LOCAL_DICTIONARY_ADDS"
salience -999
when
DictionaryModel(!localEntries.isEmpty(), $type: type, $searchImplementation: localSearch) from dictionary.getDictionaryModels()
$dictionaryModel: DictionaryModel(!localEntriesWithMatchedRules.isEmpty()) from dictionary.getDictionaryModels()
then
entityCreationService.bySearchImplementation($searchImplementation, $type, EntityType.RECOMMENDATION, document)
.forEach(entity -> {
insert(entity);
entityCreationService.bySearchImplementation($dictionaryModel.getLocalSearch(), $dictionaryModel.getType(), EntityType.RECOMMENDATION, document)
.forEach(entity -> {
Collection<MatchedRule> matchedRules = $dictionaryModel.getLocalEntriesWithMatchedRules().get(entity.getValue());
entity.addMatchedRules(matchedRules);
});
end

View File

@ -272,7 +272,7 @@ rule "CBI.20.0: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.skip("CBI.20.0", "PERFORMING LABORATORY was found for non vertebrate study");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end
@ -285,7 +285,7 @@ rule "CBI.20.1: Redact between \"PERFORMING LABORATORY\" and \"LABORATORY PROJEC
entityCreationService.betweenStrings("PERFORMING LABORATORY:", "LABORATORY PROJECT ID:", "CBI_address", EntityType.ENTITY, $section)
.forEach(laboratoryEntity -> {
laboratoryEntity.apply("CBI.20.1", "PERFORMING LABORATORY was found", "Article 39(e)(2) of Regulation (EC) No 178/2002");
dictionary.addLocalDictionaryEntry(laboratoryEntity);
dictionary.recommendEverywhere(laboratoryEntity);
});
end