diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java index da521297..c4d64299 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/MigrationMessageReceiver.java @@ -16,6 +16,7 @@ import com.iqser.red.service.redaction.v1.model.MigrationRequest; import com.iqser.red.service.redaction.v1.model.MigrationResponse; import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; +import com.iqser.red.service.redaction.v1.server.service.DictionaryService; import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper; import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService; @@ -37,6 +38,7 @@ public class MigrationMessageReceiver { LegacyRedactionLogMergeService legacyRedactionLogMergeService; LegacyVersion0MigrationService legacyVersion0MigrationService; RabbitTemplate rabbitTemplate; + DictionaryService dictionaryService; @SneakyThrows @@ -45,7 +47,8 @@ public class MigrationMessageReceiver { public void receiveMigrationRequest(Message message) { MigrationRequest migrationRequest = objectMapper.readValue(message.getBody(), MigrationRequest.class); - + log.info("--------------------------------------------------------------------"); + log.info("Starting redactionLog to entityLog migration for dossierId {} and fileId {}", migrationRequest.getDossierId(), migrationRequest.getFileId()); // TODO: if an image is not found, try to copy the old one exactly (like with TextEntities) Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(migrationRequest.getDossierId(), migrationRequest.getFileId())); @@ -57,12 +60,18 @@ public class MigrationMessageReceiver { redactionLog = legacyRedactionLogMergeService.mergeManualChanges(redactionLog, migrationRequest.getManualRedactions(), migrationRequest.getDossierTemplateId()); } + dictionaryService.updateDictionary(migrationRequest.getDossierTemplateId(), migrationRequest.getDossierId()); MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, document); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog()); redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds()); sendFinished(MigrationResponse.builder().dossierId(migrationRequest.getDossierId()).fileId(migrationRequest.getFileId()).build()); + log.info("Migrated {} redactionLog entries for dossierId {} and fileId {}", + migratedEntityLog.getEntityLog().getEntityLogEntry().size(), + migrationRequest.getDossierId(), + migrationRequest.getFileId()); + log.info(""); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java index 079216a0..6e6febd6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/migration/RedactionLogToEntityLogMigrationService.java @@ -1,5 +1,6 @@ package com.iqser.red.service.redaction.v1.server.migration; +import java.awt.geom.Rectangle2D; import java.util.Collection; import java.util.Comparator; import java.util.LinkedList; @@ -15,6 +16,8 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogLegalBasis; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds; +import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus; +import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis; @@ -26,10 +29,12 @@ import com.iqser.red.service.redaction.v1.server.model.document.TextRange; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image; +import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility; +import com.iqser.red.service.redaction.v1.server.utils.IdBuilder; import com.iqser.red.service.redaction.v1.server.utils.MigratedIdsCollector; import lombok.AccessLevel; @@ -66,7 +71,7 @@ public class RedactionLogToEntityLogMigrationService { Map oldToNewIDMapping = migratedIds.buildOldToNewMapping(); entityLog.setEntityLogEntry(entitiesToMigrate.stream().map(migrationEntity -> migrationEntity.toEntityLogEntry(oldToNewIDMapping)).toList()); - if (redactionLog.getRedactionLogEntry().size() != entityLog.getEntityLogEntry().size()) { + if (getNumberOfApprovedEntries(redactionLog) != entityLog.getEntityLogEntry().size()) { String message = String.format("Not all entities have been found during the migration redactionLog has %d entries and new entityLog %d", redactionLog.getRedactionLogEntry().size(), entityLog.getEntityLogEntry().size()); @@ -78,6 +83,17 @@ public class RedactionLogToEntityLogMigrationService { } + private static long getNumberOfApprovedEntries(RedactionLog redactionLog) { + + return redactionLog.getRedactionLogEntry() + .stream() + .filter(redactionLogEntry -> redactionLogEntry.getManualChanges() + .stream() + .allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))) + .count(); + } + + private List calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document) { List images = getImageBasedMigrationEntities(redactionLog, document); @@ -95,23 +111,32 @@ public class RedactionLogToEntityLogMigrationService { private List getImageBasedMigrationEntities(RedactionLog redactionLog, Document document) { List images = document.streamAllImages().collect(Collectors.toList()); - List redactionLogImages = redactionLog.getRedactionLogEntry().stream().filter(RedactionLogEntry::isImage).toList(); + + List redactionLogImages = redactionLog.getRedactionLogEntry() + .stream() + .filter(RedactionLogEntry::isImage) + .filter(redactionLogEntry -> redactionLogEntry.getManualChanges() + .stream() + .allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))) + .toList(); + List migrationEntities = new LinkedList<>(); for (RedactionLogEntry redactionLogImage : redactionLogImages) { List imagePositions = redactionLogImage.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); assert imagePositions.size() == 1; - Image closestImage = images.stream() + Optional optionalClosestImage = images.stream() .filter(image -> image.onPage(redactionLogImage.getPositions().get(0).getPage())) .min(Comparator.comparingDouble(image -> entityFindingUtility.calculateDistance(image.getPosition(), imagePositions.get(0).rectangle2D()))) - .orElseThrow(() -> new RuntimeException("Image from redaction log not found: " + redactionLogImage)); + .filter(image -> entityFindingUtility.calculateDistance(image.getPosition(), imagePositions.get(0).rectangle2D()) <= MATCH_THRESHOLD); - double minDistance = entityFindingUtility.calculateDistance(closestImage.getPosition(), imagePositions.get(0).rectangle2D()); - if (minDistance > MATCH_THRESHOLD) { - throw new RuntimeException(String.format("Closest image has a distance of %.2f which is higher than the allowed %.2f", minDistance, MATCH_THRESHOLD)); + Image closestImage; + if (optionalClosestImage.isEmpty()) { // if no fitting image can be found create a new one with the previous values! + closestImage = buildImageDirectly(document, redactionLogImage); + } else { + closestImage = optionalClosestImage.get(); + images.remove(closestImage); } - images.remove(closestImage); - String ruleIdentifier = "OLDIMG." + redactionLogImage.getMatchedRule() + ".0"; if (redactionLogImage.lastChangeIsRemoved()) { closestImage.remove(ruleIdentifier, redactionLogImage.getReason()); @@ -126,11 +151,36 @@ public class RedactionLogToEntityLogMigrationService { } + private static Image buildImageDirectly(Document document, RedactionLogEntry redactionLogImage) { + + Image image = Image.builder() + .documentTree(document.getDocumentTree()) + .imageType(ImageType.fromString(redactionLogImage.getType())) + .transparent(redactionLogImage.isImageHasTransparency()) + .page(document.getPages().stream().filter(p -> p.getNumber() == redactionLogImage.getPositions().get(0).getPage()).findFirst().orElseThrow()) + .position(toRectangle2D(redactionLogImage.getPositions().get(0))) + .build(); + + List treeId = document.getDocumentTree().createNewMainEntryAndReturnId(image); + image.setTreeId(treeId); + image.setId(IdBuilder.buildId(image.getPages(), image.getBBox().values().stream().toList(), "", "")); + + return image; + } + + private static Rectangle2D toRectangle2D(Rectangle rect) { + + return new Rectangle2D.Double(rect.getTopLeft().getX(), rect.getTopLeft().getY(), rect.getWidth(), rect.getHeight()); + } + private List getTextBasedMigrationEntities(RedactionLog redactionLog, Document document) { List entitiesToMigrate = redactionLog.getRedactionLogEntry() .stream() .filter(redactionLogEntry -> !redactionLogEntry.isImage()) + .filter(redactionLogEntry -> redactionLogEntry.getManualChanges() + .stream() + .allMatch(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))) .map(MigrationEntity::fromRedactionLogEntry) .peek(migrationEntity -> { if (migrationEntity.getRedactionLogEntry().lastChangeIsRemoved()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java index 0b18cad2..5146fa8f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/MigrationEntity.java @@ -50,7 +50,7 @@ public final class MigrationEntity { public static ManualEntity createManualEntity(RedactionLogEntry redactionLogEntry) { - String ruleIdentifier = "OLD." + redactionLogEntry.getMatchedRule() + ".0"; + String ruleIdentifier = buildRuleIdentifier(redactionLogEntry); List rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList(); EntityType entityType = getEntityType(redactionLogEntry); return ManualEntity.builder() @@ -72,6 +72,18 @@ public final class MigrationEntity { } + private static String buildRuleIdentifier(RedactionLogEntry redactionLogEntry) { + + String ruleIdentifier; + if (redactionLogEntry.getMatchedRule() != null) { + ruleIdentifier = "OLD." + redactionLogEntry.getMatchedRule() + ".0"; + } else { + ruleIdentifier = "MAN.5.0"; // pure ManualRedactions used to have no matched rule + } + return ruleIdentifier; + } + + private static EntityType getEntityType(RedactionLogEntry redactionLogEntry) { if (redactionLogEntry.isRecommendation()) { @@ -212,7 +224,9 @@ public final class MigrationEntity { .positions(List.of(new Position(image.getPosition(), image.getPage().getNumber()))) .containingNodeId(image.getTreeId()) .closestHeadline(image.getHeadline().getTextBlock().getSearchText()) - .section(image.getManualOverwrite().getSection().orElse(image.getParent().toString())) + .section(redactionLogEntry.getSection()) + .textAfter(redactionLogEntry.getTextAfter()) + .textBefore(redactionLogEntry.getTextBefore()) .imageHasTransparency(image.isTransparent()) .state(buildEntryState(image)) .entryType(redactionLogEntry.isHint() ? EntryType.IMAGE_HINT : EntryType.IMAGE) @@ -232,14 +246,14 @@ public final class MigrationEntity { .type(type) .state(buildEntryState(manualEntity)) .entryType(buildEntryType(manualEntity)) - .section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection())) + .section(redactionLogEntry.getSection()) + .textAfter(redactionLogEntry.getTextAfter()) + .textBefore(redactionLogEntry.getTextBefore()) .containingNodeId(Collections.emptyList()) .closestHeadline("") .matchedRule(manualEntity.getMatchedRule().getRuleIdentifier().toString()) .dictionaryEntry(manualEntity.isDictionaryEntry()) .dossierDictionaryEntry(manualEntity.isDossierDictionaryEntry()) - .textAfter("") - .textBefore("") .startOffset(-1) .endOffset(-1) .positions(manualEntity.getManualOverwrite() @@ -268,13 +282,13 @@ public final class MigrationEntity { .legalBasis(entity.legalBasis()) .value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue())) .type(entity.getType()) - .section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString())) + .section(redactionLogEntry.getSection()) + .textAfter(redactionLogEntry.getTextAfter()) + .textBefore(redactionLogEntry.getTextBefore()) .containingNodeId(entity.getDeepestFullyContainingNode().getTreeId()) .closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText()) .matchedRule(entity.getMatchedRule().getRuleIdentifier().toString()) .dictionaryEntry(entity.isDictionaryEntry()) - .textAfter(entity.getTextAfter()) - .textBefore(entity.getTextBefore()) .startOffset(entity.getTextRange().start()) .endOffset(entity.getTextRange().end()) .dossierDictionaryEntry(entity.isDossierDictionaryEntry()) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index d04e4633..62da7a73 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -9,6 +9,7 @@ import java.util.Comparator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -44,31 +45,35 @@ public class EntityFindingUtility { } - public Optional findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map> entitiesWithSameValue, double matchThreshold) { + public Optional findClosestEntityAndReturnEmptyIfNotFound(ManualEntity manualEntity, Map> entitiesWithSameValue, double matchThreshold) { - List possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ENGLISH)); + if (manualEntity.getValue() == null) { + return Optional.empty(); + } + + List possibleEntities = entitiesWithSameValue.get(manualEntity.getValue().toLowerCase(Locale.ENGLISH)); if (entityIdentifierValueNotFound(possibleEntities)) { - log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue()); + log.warn("Entity could not be created with manualEntity: {}, due to the value {} not being found anywhere.", manualEntity, manualEntity.getValue()); return Optional.empty(); } Optional optionalClosestEntity = possibleEntities.stream() - .filter(entity -> pagesMatch(entity, identifier.getEntityPosition())) - .min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity))); + .filter(entity -> pagesMatch(entity, manualEntity.getEntityPosition())) + .min(Comparator.comparingDouble(entity -> calculateMinDistance(manualEntity.getEntityPosition(), entity))); if (optionalClosestEntity.isEmpty()) { - log.warn("No Entity with value {} found on page {}", identifier.getValue(), identifier.getEntityPosition()); + log.warn("No Entity with value {} found on page {}", manualEntity.getValue(), manualEntity.getEntityPosition()); return Optional.empty(); } TextEntity closestEntity = optionalClosestEntity.get(); - double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity); + double distance = calculateMinDistance(manualEntity.getEntityPosition(), closestEntity); if (distance > matchThreshold) { log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}", - identifier.getValue(), - identifier.getEntityPosition().get(0).pageNumber(), - identifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(), + manualEntity.getValue(), + manualEntity.getEntityPosition().get(0).pageNumber(), + manualEntity.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(), distance, matchThreshold); return Optional.empty(); @@ -99,7 +104,8 @@ public class EntityFindingUtility { } return originalPositions.stream() .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())) - .sum(); + .average() + .orElse(Double.MAX_VALUE); } @@ -145,7 +151,7 @@ public class EntityFindingUtility { public Map> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List manualEntities) { Set pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet()); - Set entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet()); + Set entryValues = manualEntities.stream().map(ManualEntity::getValue).filter(Objects::nonNull).map(String::toLowerCase).collect(Collectors.toSet()); if (!pageNumbers.stream().allMatch(node::onPage)) { throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s", diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java index 9acb1ac1..e5c4d2d9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/ManualEntityCreationService.java @@ -29,7 +29,7 @@ import lombok.extern.slf4j.Slf4j; @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class ManualEntityCreationService { - static double MATCH_THRESHOLD = 5; // Is compared to the sum of distances in pdf coordinates for each corner of the bounding box of the entities + static double MATCH_THRESHOLD = 10; // Is compared to the average sum of distances in pdf coordinates for each corner of the bounding box of the entities EntityFindingUtility entityFindingUtility; EntityCreationService entityCreationService; DictionaryService dictionaryService; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java index 4379743c..468fa5b5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/storage/RedactionStorageService.java @@ -95,7 +95,7 @@ public class RedactionStorageService { RedactionLog redactionLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.REDACTION_LOG), RedactionLog.class); - redactionLog.setRedactionLogEntry(redactionLog.getRedactionLogEntry().stream().filter(entry -> !(entry.getValue() == null || entry.getValue().isEmpty())).collect(Collectors.toList())); + redactionLog.setRedactionLogEntry(redactionLog.getRedactionLogEntry().stream().filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty())).collect(Collectors.toList())); return redactionLog; } catch (StorageObjectDoesNotExist e) { log.debug("RedactionLog not available."); @@ -110,7 +110,7 @@ public class RedactionStorageService { try { EntityLog entityLog = storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.ENTITY_LOG), EntityLog.class); - entityLog.setEntityLogEntry(entityLog.getEntityLogEntry().stream().filter(entry -> !entry.getValue().isEmpty()).collect(Collectors.toList())); + entityLog.setEntityLogEntry(entityLog.getEntityLogEntry().stream().filter(entry -> !(entry.getPositions() == null || entry.getPositions().isEmpty())).collect(Collectors.toList())); return entityLog; } catch (StorageObjectDoesNotExist e) { log.debug("EntityLog not available."); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml index d9bb2f18..bd5cdc54 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/application-dev.yaml @@ -1,5 +1,5 @@ server: - port: 8077 + port: 8083 persistence-service.url: "http://localhost:8085" tenant-user-management-service.url: "http://localhost:8091/internal"