RED-7384: first prototype for migrating redactionLogs to entityLogs

This commit is contained in:
Kilian Schüttler 2023-10-30 16:27:05 +01:00
parent f38adabb8c
commit 8c062da5a1
34 changed files with 11793 additions and 451 deletions

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class MigrationRequest {
String dossierId;
String fileId;
}

View File

@ -0,0 +1,17 @@
package com.iqser.red.service.redaction.v1.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class MigrationResponse {
String dossierId;
String fileId;
}

View File

@ -0,0 +1,14 @@
package com.iqser.red.service.redaction.v1.model;
public class QueueNames {
public static final String REDACTION_QUEUE = "redactionQueue";
public static final String REDACTION_DQL = "redactionDQL";
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
public static final String MIGRATION_QUEUE = "migrationQueue";
public static final String MIGRATION_DLQ = "migrationDLQ";
public static final String MIGRATION_RESPONSE_QUEUE = "migrationResponseQueue";
}

View File

@ -16,7 +16,7 @@ val layoutParserVersion = "0.74.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"
val persistenceServiceVersion = "2.229.0"
val persistenceServiceVersion = "2.231.0"
configurations {
all {

View File

@ -4,10 +4,9 @@ import java.util.List;
import java.util.PriorityQueue;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
@ -44,39 +43,6 @@ public class ManualEntity implements IEntity {
ManualChangeOverwrite manualOverwrite = new ManualChangeOverwrite();
public static ManualEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
String ruleIdentifier = redactionLogEntry.getType() + "." + redactionLogEntry.getMatchedRule() + ".0";
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
return ManualEntity.builder()
.id(redactionLogEntry.getId())
.value(redactionLogEntry.getValue())
.entityPosition(rectangleWithPages)
.ruleIdentifier(ruleIdentifier)
.reason(redactionLogEntry.getReason())
.legalBasis(redactionLogEntry.getLegalBasis())
.type(redactionLogEntry.getType())
.section(redactionLogEntry.getSection()).entityType(getEntityType(redactionLogEntry))
.applied(redactionLogEntry.isRedacted())
.isDictionaryEntry(redactionLogEntry.isDictionaryEntry())
.isDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry())
.rectangle(redactionLogEntry.isRectangle())
.build();
}
private static EntityType getEntityType(RedactionLogEntry redactionLogEntry) {
if (redactionLogEntry.isRecommendation()) {
return EntityType.RECOMMENDATION;
}
if (redactionLogEntry.isHint()) {
return EntityType.HINT;
}
return EntityType.ENTITY;
}
public static ManualEntity fromManualRedactionEntry(ManualRedactionEntry manualRedactionEntry, boolean hint) {
List<RectangleWithPage> rectangleWithPages = manualRedactionEntry.getPositions().stream().map(RectangleWithPage::fromAnnotationRectangle).toList();

View File

@ -0,0 +1,20 @@
package com.iqser.red.service.redaction.v1.server.model;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class MigratedEntityLog {
MigratedIds migratedIds;
EntityLog entityLog;
}

View File

@ -0,0 +1,305 @@
package com.iqser.red.service.redaction.v1.server.model;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ChangeType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.AnnotationStatus;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Change;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Engine;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualChange;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ManualRedactionType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.RequiredArgsConstructor;
@Data
@AllArgsConstructor
@RequiredArgsConstructor
public final class MigrationEntity {
private final ManualEntity manualEntity;
private final RedactionLogEntry redactionLogEntry;
private IEntity migratedEntity;
private String oldId;
private String newId;
public static MigrationEntity fromRedactionLogEntry(RedactionLogEntry redactionLogEntry) {
return new MigrationEntity(createManualEntity(redactionLogEntry), redactionLogEntry);
}
public static ManualEntity createManualEntity(RedactionLogEntry redactionLogEntry) {
String ruleIdentifier = "OLD." + redactionLogEntry.getMatchedRule() + ".0";
List<RectangleWithPage> rectangleWithPages = redactionLogEntry.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
return ManualEntity.builder()
.id(redactionLogEntry.getId())
.value(redactionLogEntry.getValue())
.entityPosition(rectangleWithPages)
.ruleIdentifier(ruleIdentifier)
.reason(redactionLogEntry.getReason())
.legalBasis(redactionLogEntry.getLegalBasis())
.type(redactionLogEntry.getType())
.section(redactionLogEntry.getSection())
.entityType(getEntityType(redactionLogEntry))
.applied(redactionLogEntry.isRedacted())
.isDictionaryEntry(redactionLogEntry.isDictionaryEntry())
.isDossierDictionaryEntry(redactionLogEntry.isDossierDictionaryEntry())
.rectangle(redactionLogEntry.isRectangle())
.build();
}
private static EntityType getEntityType(RedactionLogEntry redactionLogEntry) {
if (redactionLogEntry.isRecommendation()) {
return EntityType.RECOMMENDATION;
}
if (redactionLogEntry.isHint()) {
return EntityType.HINT;
}
return EntityType.ENTITY;
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change toEntityLogChanges(Change change) {
return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Change(change.getAnalysisNumber(),
toEntityLogType(change.getType()),
change.getDateTime());
}
private static EntryType getEntryType(EntityType entityType) {
return switch (entityType) {
case ENTITY -> EntryType.ENTITY;
case HINT -> EntryType.HINT;
case FALSE_POSITIVE -> EntryType.FALSE_POSITIVE;
case RECOMMENDATION -> EntryType.RECOMMENDATION;
case FALSE_RECOMMENDATION -> EntryType.FALSE_RECOMMENDATION;
};
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange toEntityLogManualChanges(ManualChange manualChange) {
return new com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualChange(toManualRedactionType(manualChange.getManualRedactionType()),
manualChange.getProcessedDate(),
manualChange.getRequestedDate(),
manualChange.getUserId(),
manualChange.getPropertyChanges());
}
private static ChangeType toEntityLogType(com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.ChangeType type) {
return switch (type) {
case ADDED -> ChangeType.ADDED;
case REMOVED -> ChangeType.REMOVED;
case CHANGED -> ChangeType.CHANGED;
};
}
private static com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType toManualRedactionType(ManualRedactionType manualRedactionType) {
return switch (manualRedactionType) {
case ADD_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_LOCALLY;
case ADD_TO_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.ADD_TO_DICTIONARY;
case REMOVE_LOCALLY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_LOCALLY;
case REMOVE_FROM_DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.REMOVE_FROM_DICTIONARY;
case FORCE_REDACT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_REDACT;
case FORCE_HINT -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.FORCE_HINT;
case RECATEGORIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RECATEGORIZE;
case LEGAL_BASIS_CHANGE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.LEGAL_BASIS_CHANGE;
case RESIZE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.ManualRedactionType.RESIZE;
};
}
public EntityLogEntry toEntityLogEntry(Map<String, String> oldToNewIdMapping) {
EntityLogEntry entityLogEntry;
if (migratedEntity instanceof Image image) {
entityLogEntry = createEntityLogEntry(image);
} else if (migratedEntity instanceof TextEntity textEntity) {
entityLogEntry = createEntityLogEntry(textEntity);
} else if (migratedEntity instanceof ManualEntity entity) {
entityLogEntry = createEntityLogEntry(entity);
} else {
throw new UnsupportedOperationException("Unknown subclass " + migratedEntity.getClass());
}
entityLogEntry.setChanges(redactionLogEntry.getChanges().stream().map(MigrationEntity::toEntityLogChanges).toList());
entityLogEntry.setManualChanges(redactionLogEntry.getManualChanges()
.stream()
.filter(manualChange -> manualChange.getAnnotationStatus().equals(AnnotationStatus.APPROVED))
.map(MigrationEntity::toEntityLogManualChanges)
.toList());
entityLogEntry.setColor(redactionLogEntry.getColor());
entityLogEntry.setReference(migrateSetOfIds(redactionLogEntry.getReference(), oldToNewIdMapping));
entityLogEntry.setImportedRedactionIntersections(migrateSetOfIds(redactionLogEntry.getImportedRedactionIntersections(), oldToNewIdMapping));
entityLogEntry.setEngines(redactionLogEntry.getEngines().stream().map(this::toEntityLogEngine).collect(Collectors.toSet()));
if (redactionLogEntry.getLegalBasis() != null) {
entityLogEntry.setLegalBasis(redactionLogEntry.getLegalBasis());
}
return entityLogEntry;
}
private Set<String> migrateSetOfIds(Set<String> ids, Map<String, String> oldToNewIdMapping) {
if (ids == null) {
return Collections.emptySet();
}
return ids.stream().map(oldToNewIdMapping::get).collect(Collectors.toSet());
}
private com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine toEntityLogEngine(Engine engine) {
return switch (engine) {
case DICTIONARY -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.DICTIONARY;
case NER -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.NER;
case RULE -> com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine.RULE;
};
}
public EntityLogEntry createEntityLogEntry(Image image) {
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH);
return EntityLogEntry.builder()
.id(image.getId())
.value(image.value())
.type(imageType)
.reason(image.buildReasonWithManualChangeDescriptions())
.legalBasis(image.legalBasis())
.matchedRule(image.getMatchedRule().getRuleIdentifier().toString())
.dictionaryEntry(false)
.positions(List.of(new Position(image.getPosition(), image.getPage().getNumber())))
.containingNodeId(image.getTreeId())
.closestHeadline(image.getHeadline().getTextBlock().getSearchText())
.section(image.getManualOverwrite().getSection().orElse(image.getParent().toString()))
.imageHasTransparency(image.isTransparent())
.state(buildEntryState(image)).entryType(redactionLogEntry.isHint() ? EntryType.IMAGE_HINT : EntryType.IMAGE)
.build();
}
public EntityLogEntry createEntityLogEntry(ManualEntity manualEntity) {
String type = manualEntity.getManualOverwrite().getType().orElse(manualEntity.getType());
return EntityLogEntry.builder()
.id(manualEntity.getId())
.reason(manualEntity.buildReasonWithManualChangeDescriptions())
.legalBasis(manualEntity.legalBasis())
.value(manualEntity.value())
.type(type)
.state(buildEntryState(manualEntity))
.entryType(buildEntryType(manualEntity))
.section(manualEntity.getManualOverwrite().getSection().orElse(manualEntity.getSection()))
.containingNodeId(Collections.emptyList())
.closestHeadline("")
.matchedRule(manualEntity.getMatchedRule().getRuleIdentifier().toString())
.dictionaryEntry(manualEntity.isDictionaryEntry())
.dossierDictionaryEntry(manualEntity.isDossierDictionaryEntry())
.textAfter("")
.textBefore("")
.startOffset(-1)
.endOffset(-1)
.positions(manualEntity.getManualOverwrite()
.getPositions()
.orElse(manualEntity.getEntityPosition())
.stream()
.map(entityPosition -> new Position(entityPosition.rectangle2D(), entityPosition.pageNumber()))
.toList())
.engines(Collections.emptySet())
.build();
}
public EntityLogEntry createEntityLogEntry(TextEntity entity) {
assert entity.getPositionsOnPagePerPage().size() == 1;
PositionOnPage positionOnPage = entity.getPositionsOnPagePerPage().get(0);
List<Position> rectanglesPerLine = positionOnPage.getRectanglePerLine()
.stream()
.map(rectangle2D -> new Position(rectangle2D, positionOnPage.getPage().getNumber()))
.toList();
return EntityLogEntry.builder()
.id(positionOnPage.getId())
.positions(rectanglesPerLine)
.reason(entity.buildReasonWithManualChangeDescriptions())
.legalBasis(entity.legalBasis())
.value(entity.getManualOverwrite().getValue().orElse(entity.getMatchedRule().isWriteValueWithLineBreaks() ? entity.getValueWithLineBreaks() : entity.getValue()))
.type(entity.getType())
.section(entity.getManualOverwrite().getSection().orElse(entity.getDeepestFullyContainingNode().toString()))
.containingNodeId(entity.getDeepestFullyContainingNode().getTreeId())
.closestHeadline(entity.getDeepestFullyContainingNode().getHeadline().getTextBlock().getSearchText())
.matchedRule(entity.getMatchedRule().getRuleIdentifier().toString())
.dictionaryEntry(entity.isDictionaryEntry())
.textAfter(entity.getTextAfter())
.textBefore(entity.getTextBefore())
.startOffset(entity.getTextRange().start())
.endOffset(entity.getTextRange().end())
.dossierDictionaryEntry(entity.isDossierDictionaryEntry())
.engines(entity.getEngines() != null ? entity.getEngines() : Collections.emptySet())
.state(buildEntryState(entity))
.entryType(buildEntryType(entity))
.build();
}
private EntryState buildEntryState(IEntity entity) {
if (entity.applied() && entity.active()) {
return EntryState.APPLIED;
} else if (entity.skipped() && entity.active()) {
return EntryState.SKIPPED;
} else if (entity.ignored()) {
return EntryState.IGNORED;
} else {
return EntryState.REMOVED;
}
}
private EntryType buildEntryType(IEntity entity) {
if (entity instanceof TextEntity textEntity) {
return getEntryType(textEntity.getEntityType());
} else if (entity instanceof ManualEntity manualEntity) {
if (manualEntity.isRectangle()) {
return EntryType.AREA;
}
return getEntryType(manualEntity.getEntityType());
} else if (entity instanceof Image) {
return EntryType.IMAGE;
}
throw new UnsupportedOperationException(String.format("Entity subclass %s is not implemented!", entity.getClass()));
}
}

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import java.awt.geom.Rectangle2D;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
@ -210,10 +209,4 @@ public class ManualChangeOverwrite {
return positions == null ? Optional.empty() : Optional.of(positions);
}
public static Rectangle2D toRectangle2D(com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.Rectangle rect) {
return new Rectangle2D.Double(rect.getTopLeftX() - rect.getWidth(), rect.getTopLeftY() - rect.getHeight(), rect.getWidth(), rect.getHeight());
}
}

View File

@ -1,13 +1,11 @@
package com.iqser.red.service.redaction.v1.server.queue;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_QUEUE;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import com.fasterxml.jackson.core.JsonProcessingException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

View File

@ -1,6 +1,10 @@
package com.iqser.red.service.redaction.v1.server.queue;
import lombok.RequiredArgsConstructor;
import static com.iqser.red.service.redaction.v1.model.QueueNames.MIGRATION_DLQ;
import static com.iqser.red.service.redaction.v1.model.QueueNames.MIGRATION_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_PRIORITY_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_QUEUE;
import org.springframework.amqp.core.Queue;
import org.springframework.amqp.core.QueueBuilder;
@ -8,15 +12,12 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import lombok.RequiredArgsConstructor;
@Configuration
@RequiredArgsConstructor
public class MessagingConfiguration {
public static final String REDACTION_QUEUE = "redactionQueue";
public static final String REDACTION_DQL = "redactionDQL";
public static final String REDACTION_PRIORITY_QUEUE = "redactionPriorityQueue";
public static final String X_ERROR_INFO_HEADER = "x-error-message";
public static final String X_ERROR_INFO_TIMESTAMP_HEADER = "x-error-message-timestamp";
@ -37,6 +38,20 @@ public class MessagingConfiguration {
}
@Bean
public Queue migrationQueue() {
return QueueBuilder.durable(MIGRATION_QUEUE).withArgument("x-dead-letter-exchange", "").withArgument("x-dead-letter-routing-key", MIGRATION_DLQ).maxPriority(2).build();
}
@Bean
public Queue migrationDLQ() {
return QueueBuilder.durable(MIGRATION_DLQ).build();
}
@Bean
public Queue redactionQueue() {

View File

@ -0,0 +1,66 @@
package com.iqser.red.service.redaction.v1.server.queue;
import static com.iqser.red.service.redaction.v1.model.QueueNames.MIGRATION_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.MIGRATION_RESPONSE_QUEUE;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.redaction.v1.model.MigrationRequest;
import com.iqser.red.service.redaction.v1.model.MigrationResponse;
import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.RedactionLogToEntityLogMigrationService;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class MigrationMessageReceiver {
ObjectMapper objectMapper;
RedactionLogToEntityLogMigrationService redactionLogToEntityLogMigrationService;
RedactionStorageService redactionStorageService;
RabbitTemplate rabbitTemplate;
@SneakyThrows
@RabbitHandler
@RabbitListener(queues = MIGRATION_QUEUE)
public void receiveMigrationRequest(Message message) {
MigrationRequest migrationRequest = objectMapper.readValue(message.getBody(), MigrationRequest.class);
RedactionLog redactionLog = redactionStorageService.getRedactionLog(migrationRequest.getDossierId(), migrationRequest.getFileId());
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(migrationRequest.getDossierId(), migrationRequest.getFileId()));
MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, document);
redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.ENTITY_LOG, migratedEntityLog.getEntityLog());
redactionStorageService.storeObject(migrationRequest.getDossierId(), migrationRequest.getFileId(), FileType.MIGRATED_IDS, migratedEntityLog.getMigratedIds());
sendFinished(MigrationResponse.builder().dossierId(migrationRequest.getDossierId()).fileId(migrationRequest.getFileId()).build());
}
@SneakyThrows
public void sendFinished(MigrationResponse migrationResponse) {
rabbitTemplate.convertAndSend(MIGRATION_RESPONSE_QUEUE, migrationResponse);
}
}

View File

@ -1,13 +1,11 @@
package com.iqser.red.service.redaction.v1.server.queue;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_PRIORITY_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_PRIORITY_QUEUE;
import org.springframework.amqp.core.Message;
import org.springframework.amqp.rabbit.annotation.RabbitHandler;
import org.springframework.amqp.rabbit.annotation.RabbitListener;
import com.fasterxml.jackson.core.JsonProcessingException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

View File

@ -1,8 +1,8 @@
package com.iqser.red.service.redaction.v1.server.queue;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_PRIORITY_QUEUE;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.REDACTION_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_DQL;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_PRIORITY_QUEUE;
import static com.iqser.red.service.redaction.v1.model.QueueNames.REDACTION_QUEUE;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.X_ERROR_INFO_HEADER;
import static com.iqser.red.service.redaction.v1.server.queue.MessagingConfiguration.X_ERROR_INFO_TIMESTAMP_HEADER;
import static java.lang.String.format;
@ -23,8 +23,8 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResu
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileErrorInfo;
import com.iqser.red.service.redaction.v1.server.client.FileStatusProcessingUpdateClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
@ -128,7 +128,7 @@ public class RedactionMessageReceiver {
@RabbitHandler
@RabbitListener(queues = REDACTION_DQL)
public void receiveAnalyzeRequestDQL(Message in) throws IOException {
public void receiveAnalyzeRequestDLQ(Message in) throws IOException {
var analyzeRequest = objectMapper.readValue(in.getBody(), AnalyzeRequest.class);
log.info("Failed to process analyze request: {}", analyzeRequest);

View File

@ -47,7 +47,7 @@ import lombok.extern.slf4j.Slf4j;
@RequiredArgsConstructor
public class DictionaryService {
private static final String DEFAULT_COLOR = "#cccccc";
public static final String DEFAULT_COLOR = "#cccccc";
private final DictionaryClient dictionaryClient;
private final RedactionServiceSettings settings;

View File

@ -60,13 +60,11 @@ public class EntityLogCreatorService {
}
public EntityLog createInitialEntityLog(AnalyzeRequest analyzeRequest,
Document document,
List<ManualEntity> notFoundManualRedactionEntries,
public EntityLog createInitialEntityLog(AnalyzeRequest analyzeRequest, Document document, List<ManualEntity> notFoundManualEntities,
DictionaryVersion dictionaryVersion,
long rulesVersion) {
List<EntityLogEntry> entityLogEntries = createEntityLogEntries(document, analyzeRequest.getDossierTemplateId(), notFoundManualRedactionEntries);
List<EntityLogEntry> entityLogEntries = createEntityLogEntries(document, analyzeRequest.getDossierTemplateId(), notFoundManualEntities);
List<LegalBasis> legalBasis = legalBasisClient.getLegalBasisMapping(analyzeRequest.getDossierTemplateId());
EntityLog entityLog = new EntityLog(redactionServiceSettings.getAnalysisVersion(),
@ -187,7 +185,7 @@ public class EntityLogCreatorService {
}
public EntityLogEntry createEntityLogEntry(Image image, String dossierTemplateId) {
private EntityLogEntry createEntityLogEntry(Image image, String dossierTemplateId) {
String imageType = image.getImageType().equals(ImageType.OTHER) ? "image" : image.getImageType().toString().toLowerCase(Locale.ENGLISH);
boolean isHint = dictionaryService.isHint(imageType, dossierTemplateId);
@ -291,6 +289,7 @@ public class EntityLogCreatorService {
return dictionaryService.getNotRedactedColor(dossierTemplateId);
}
return dictionaryService.getColor(type, dossierTemplateId);
}

View File

@ -55,22 +55,21 @@ public class ImportedRedactionService {
@Timed("redactmanager_processImportedRedactions")
public List<EntityLogEntry> processImportedEntities(String dossierTemplateId,
String dossierId,
String fileId,
List<EntityLogEntry> redactionLogEntries,
String fileId, List<EntityLogEntry> entityLogEntries,
boolean addImportedRedactions) {
ImportedRedactions importedRedactions = redactionStorageService.getImportedRedactions(dossierId, fileId);
if (importedRedactions == null) {
return redactionLogEntries;
return entityLogEntries;
}
redactionLogEntries.forEach(redactionLogEntry -> addIntersections(redactionLogEntry, importedRedactions));
entityLogEntries.forEach(redactionLogEntry -> addIntersections(redactionLogEntry, importedRedactions));
if (addImportedRedactions) {
return addImportedRedactionsEntityLogEntries(dossierTemplateId, redactionLogEntries, importedRedactions);
return addImportedRedactionsEntityLogEntries(dossierTemplateId, entityLogEntries, importedRedactions);
}
return redactionLogEntries;
return entityLogEntries;
}

View File

@ -43,8 +43,7 @@ public class RedactionLogCreatorService {
private final ManualChangeFactory manualChangeFactory;
public List<RedactionLogEntry> createRedactionLog(Document document,
String dossierTemplateId, List<ManualEntity> notFoundManualRedactionEntries) {
public List<RedactionLogEntry> createRedactionLog(Document document, String dossierTemplateId, List<ManualEntity> notFoundManualRedactionEntries) {
List<RedactionLogEntry> entries = new ArrayList<>();
Set<String> processIds = new HashSet<>();
@ -236,10 +235,8 @@ public class RedactionLogCreatorService {
.isDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry())
.textAfter("")
.textBefore("")
.startOffset(-1)
.endOffset(-1).positions(manualEntity.getManualOverwrite().getPositions().orElse(manualEntity.getEntityPosition())
.stream()
.map(entityPosition -> toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber())).toList())
.startOffset(-1).endOffset(-1).positions(manualEntity.getManualOverwrite().getPositions().orElse(manualEntity.getEntityPosition())
.stream().map(entityPosition -> toRedactionLogRectangle(entityPosition.rectangle2D(), entityPosition.pageNumber())).toList())
.engines(Collections.emptySet())
.reference(Collections.emptySet())
.manualChanges(mapManualChanges(manualEntity.getManualOverwrite(), isHint))

View File

@ -0,0 +1,187 @@
package com.iqser.red.service.redaction.v1.server.service;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogLegalBasis;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogLegalBasis;
import com.iqser.red.service.redaction.v1.server.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog;
import com.iqser.red.service.redaction.v1.server.model.MigrationEntity;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityFindingUtility;
import com.iqser.red.service.redaction.v1.server.utils.MigratedIdsCollector;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
//TODO: remove this, once the migration is done
public class RedactionLogToEntityLogMigrationService {
private static final double MATCH_THRESHOLD = 10;
EntityFindingUtility entityFindingUtility;
EntityEnrichmentService entityEnrichmentService;
public MigratedEntityLog migrate(RedactionLog redactionLog, Document document) {
List<MigrationEntity> entitiesToMigrate = calculateMigrationEntitiesFromRedactionLog(redactionLog, document);
MigratedIds migratedIds = entitiesToMigrate.stream().collect(new MigratedIdsCollector());
EntityLog entityLog = new EntityLog();
entityLog.setAnalysisNumber(redactionLog.getAnalysisNumber());
entityLog.setRulesVersion(redactionLog.getRulesVersion());
entityLog.setDictionaryVersion(redactionLog.getDictionaryVersion());
entityLog.setDossierDictionaryVersion(redactionLog.getDossierDictionaryVersion());
entityLog.setLegalBasisVersion(redactionLog.getLegalBasisVersion());
entityLog.setAnalysisVersion(redactionLog.getAnalysisVersion());
entityLog.setLegalBasis(redactionLog.getLegalBasis().stream().map(RedactionLogToEntityLogMigrationService::toEntityLogLegalBasis).toList());
Map<String, String> oldToNewIDMapping = migratedIds.buildOldToNewMapping();
entityLog.setEntityLogEntry(entitiesToMigrate.stream().map(migrationEntity -> migrationEntity.toEntityLogEntry(oldToNewIDMapping)).toList());
if (redactionLog.getRedactionLogEntry().size() != entityLog.getEntityLogEntry().size()) {
String message = String.format("Not all entities have been found during the migration redactionLog has %d entries and new entityLog %d",
redactionLog.getRedactionLogEntry().size(),
entityLog.getEntityLogEntry().size());
log.error(message);
throw new AssertionError(message);
}
return new MigratedEntityLog(migratedIds, entityLog);
}
private List<MigrationEntity> calculateMigrationEntitiesFromRedactionLog(RedactionLog redactionLog, Document document) {
List<MigrationEntity> images = getImageBasedMigrationEntities(redactionLog, document);
List<MigrationEntity> textMigrationEntities = getTextBasedMigrationEntities(redactionLog, document);
return Stream.of(textMigrationEntities.stream(), images.stream()).flatMap(Function.identity()).toList();
}
private static EntityLogLegalBasis toEntityLogLegalBasis(RedactionLogLegalBasis redactionLogLegalBasis) {
return new EntityLogLegalBasis(redactionLogLegalBasis.getName(), redactionLogLegalBasis.getDescription(), redactionLogLegalBasis.getReason());
}
private List<MigrationEntity> getImageBasedMigrationEntities(RedactionLog redactionLog, Document document) {
List<Image> images = document.streamAllImages().collect(Collectors.toList());
List<RedactionLogEntry> redactionLogImages = redactionLog.getRedactionLogEntry().stream().filter(RedactionLogEntry::isImage).toList();
List<MigrationEntity> migrationEntities = new LinkedList<>();
for (RedactionLogEntry redactionLogImage : redactionLogImages) {
List<RectangleWithPage> imagePositions = redactionLogImage.getPositions().stream().map(RectangleWithPage::fromRedactionLogRectangle).toList();
assert imagePositions.size() == 1;
Image closestImage = images.stream()
.filter(image -> image.onPage(redactionLogImage.getPositions().get(0).getPage()))
.min(Comparator.comparingDouble(image -> entityFindingUtility.calculateDistance(image.getPosition(), imagePositions.get(0).rectangle2D())))
.orElseThrow(() -> new RuntimeException("Image from redaction log not found: " + redactionLogImage));
double minDistance = entityFindingUtility.calculateDistance(closestImage.getPosition(), imagePositions.get(0).rectangle2D());
if (minDistance > MATCH_THRESHOLD) {
throw new RuntimeException(String.format("Closest image has a distance of %.2f which is higher than the allowed %.2f", minDistance, MATCH_THRESHOLD));
}
images.remove(closestImage);
String ruleIdentifier = "OLDIMG." + redactionLogImage.getMatchedRule() + ".0";
if (redactionLogImage.lastChangeIsRemoved()) {
closestImage.remove(ruleIdentifier, redactionLogImage.getReason());
} else if (redactionLogImage.isRedacted()) {
closestImage.apply(ruleIdentifier, redactionLogImage.getReason(), redactionLogImage.getLegalBasis());
} else {
closestImage.skip(ruleIdentifier, redactionLogImage.getReason());
}
migrationEntities.add(new MigrationEntity(null, redactionLogImage, closestImage, redactionLogImage.getId(), closestImage.getId()));
}
return migrationEntities;
}
private List<MigrationEntity> getTextBasedMigrationEntities(RedactionLog redactionLog, Document document) {
List<MigrationEntity> entitiesToMigrate = redactionLog.getRedactionLogEntry()
.stream()
.filter(redactionLogEntry -> !redactionLogEntry.isImage())
.map(MigrationEntity::fromRedactionLogEntry)
.peek(migrationEntity -> {
if (migrationEntity.getRedactionLogEntry().lastChangeIsRemoved()) {
migrationEntity.getManualEntity().remove(migrationEntity.getManualEntity().getRuleIdentifier(), migrationEntity.getRedactionLogEntry().getReason());
} else if (migrationEntity.getManualEntity().isApplied()) {
migrationEntity.getManualEntity()
.apply(migrationEntity.getManualEntity().getRuleIdentifier(),
migrationEntity.getManualEntity().getReason(),
migrationEntity.getManualEntity().getLegalBasis());
} else {
migrationEntity.getManualEntity().skip(migrationEntity.getManualEntity().getRuleIdentifier(), migrationEntity.getManualEntity().getReason());
}
})
.toList();
Map<String, List<TextEntity>> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(document,
entitiesToMigrate.stream().map(MigrationEntity::getManualEntity).toList());
for (MigrationEntity migrationEntity : entitiesToMigrate) {
Optional<TextEntity> optionalTextEntity = entityFindingUtility.findClosestEntityAndReturnEmptyIfNotFound(migrationEntity.getManualEntity(),
tempEntitiesByValue,
MATCH_THRESHOLD);
if (optionalTextEntity.isEmpty()) {
migrationEntity.setMigratedEntity(migrationEntity.getManualEntity());
migrationEntity.setOldId(migrationEntity.getManualEntity().getId());
migrationEntity.setNewId(migrationEntity.getManualEntity().getId());
continue;
}
TextEntity entity = createCorrectEntity(migrationEntity.getManualEntity(), document, optionalTextEntity.get().getTextRange());
migrationEntity.setMigratedEntity(entity);
migrationEntity.setOldId(migrationEntity.getManualEntity().getId());
migrationEntity.setNewId(entity.getPositionsOnPagePerPage().get(0).getId()); // Can only be on one page, since redactionLogEntries can only be on one page
}
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
return entitiesToMigrate;
}
private TextEntity createCorrectEntity(ManualEntity manualEntity, SemanticNode node, TextRange closestTextRange) {
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
TextEntity correctEntity = entityCreationService.forceByTextRange(closestTextRange, manualEntity.getType(), manualEntity.getEntityType(), node);
correctEntity.addMatchedRules(manualEntity.getMatchedRuleList());
correctEntity.setDictionaryEntry(manualEntity.isDictionaryEntry());
correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry());
correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog());
return correctEntity;
}
}

View File

@ -421,10 +421,6 @@ public class ComponentCreationService {
groupedEntities.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
}
groupedEntities.stream()
.filter(entity -> !(entity.getContainingNode() instanceof TableCell))
.forEach(entity -> create(ruleIdentifier, name, entity.getValue(), valueDescription, entity));
groupedEntities.stream()
.filter(entity -> entity.getContainingNode() instanceof TableCell)
.collect(Collectors.groupingBy(entity -> ((TableCell) entity.getContainingNode()).getRow()))
@ -433,8 +429,7 @@ public class ComponentCreationService {
.sorted(Comparator.comparingInt(Map.Entry::getKey))
.map(Map.Entry::getValue)
.forEach(entitiesInSameRow -> create(ruleIdentifier,
name,
entitiesInSameRow.stream().sorted(Comparator.comparing(Entity::getType).reversed()).map(Entity::getValue).collect(Collectors.joining(", ")),
name, entitiesInSameRow.stream().sorted(EntityComparators.first()).map(Entity::getValue).collect(Collectors.joining(", ")),
valueDescription,
entitiesInSameRow));
});

View File

@ -0,0 +1,164 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import static java.lang.String.format;
import static java.util.stream.Collectors.groupingBy;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
public class EntityFindingUtility {
EntityCreationService entityCreationService;
DictionaryService dictionaryService;
@Autowired
public EntityFindingUtility(EntityEnrichmentService entityEnrichmentService, DictionaryService dictionaryService) {
entityCreationService = new EntityCreationService(entityEnrichmentService);
this.dictionaryService = dictionaryService;
}
public Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map<String, List<TextEntity>> entitiesWithSameValue, double matchThreshold) {
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ENGLISH));
if (entityIdentifierValueNotFound(possibleEntities)) {
log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue());
return Optional.empty();
}
Optional<TextEntity> optionalClosestEntity = possibleEntities.stream()
.filter(entity -> pagesMatch(entity, identifier.getEntityPosition()))
.min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity)));
if (optionalClosestEntity.isEmpty()) {
log.warn("No Entity with value {} found on page {}", identifier.getValue(), identifier.getEntityPosition());
return Optional.empty();
}
TextEntity closestEntity = optionalClosestEntity.get();
double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity);
if (distance > matchThreshold) {
log.warn("For entity {} on page {} with positions {} distance to closest found entity is {} and therefore higher than the threshold of {}",
identifier.getValue(),
identifier.getEntityPosition().get(0).pageNumber(),
identifier.getEntityPosition().stream().map(RectangleWithPage::rectangle2D).toList(),
distance,
matchThreshold);
return Optional.empty();
}
return Optional.of(closestEntity);
}
private static boolean entityIdentifierValueNotFound(List<TextEntity> possibleEntities) {
return possibleEntities == null || possibleEntities.isEmpty();
}
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
return entityPageNumbers.containsAll(originalPageNumbers);
}
private double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
if (originalPositions.size() != countRectangles(entity)) {
return Double.MAX_VALUE;
}
return originalPositions.stream()
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D()))
.sum();
}
private static long countRectangles(TextEntity entity) {
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
}
private double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
return entity.getPositionsOnPagePerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
.min()
.orElse(Double.MAX_VALUE);
}
public double calculateDistance(Rectangle2D rectangle1, Rectangle2D rectangle2) {
// mirrored coordinates safe comparison
double minX1 = Math.min(rectangle1.getMinX(), rectangle1.getMaxX());
double minY1 = Math.min(rectangle1.getMinY(), rectangle1.getMaxY());
double maxX1 = Math.max(rectangle1.getMinX(), rectangle1.getMaxX());
double maxY1 = Math.max(rectangle1.getMinY(), rectangle1.getMaxY());
double minX2 = Math.min(rectangle2.getMinX(), rectangle2.getMaxX());
double minY2 = Math.min(rectangle2.getMinY(), rectangle2.getMaxY());
double maxX2 = Math.max(rectangle2.getMinX(), rectangle2.getMaxX());
double maxY2 = Math.max(rectangle2.getMinY(), rectangle2.getMaxY());
return Math.abs(minX1 - minX2) //
+ Math.abs(minY1 - minY2) //
+ Math.abs(maxX1 - maxX2) //
+ Math.abs(maxY1 - maxY2);
}
public Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, List<ManualEntity> manualEntities) {
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
Set<String> entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet());
if (!pageNumbers.stream().allMatch(node::onPage)) {
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
node,
pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(),
node.getPages()));
}
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream()
.map(boundary -> entityCreationService.forceByTextRange(boundary, "temp", EntityType.ENTITY, node))
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
}

View File

@ -1,15 +1,9 @@
package com.iqser.red.service.redaction.v1.server.service.document;
import static java.lang.String.format;
import static java.util.stream.Collectors.groupingBy;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
@ -19,15 +13,11 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.ManualRedactionEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.redaction.v1.server.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.model.RectangleWithPage;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.PositionOnPage;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
@ -41,52 +31,20 @@ import lombok.extern.slf4j.Slf4j;
public class ManualEntityCreationService {
static double MATCH_THRESHOLD = 5; // Is compared to the sum of distances in pdf coordinates for each corner of the bounding box of the entities
EntityFindingUtility entityFindingUtility;
EntityCreationService entityCreationService;
DictionaryService dictionaryService;
@Autowired
public ManualEntityCreationService(EntityEnrichmentService entityEnrichmentService, DictionaryService dictionaryService) {
public ManualEntityCreationService(EntityEnrichmentService entityEnrichmentService, DictionaryService dictionaryService, EntityFindingUtility entityFindingUtility) {
this.entityFindingUtility = entityFindingUtility;
entityCreationService = new EntityCreationService(entityEnrichmentService);
this.dictionaryService = dictionaryService;
}
public List<ManualEntity> toTextEntity(RedactionLog redactionLog, SemanticNode node) {
List<ManualEntity> manualEntities = redactionLog.getRedactionLogEntry().stream().map(ManualEntity::fromRedactionLogEntry).peek(manualEntity -> {
if (manualEntity.isApplied()) {
manualEntity.apply(manualEntity.getRuleIdentifier(), manualEntity.getReason(), manualEntity.getLegalBasis());
} else {
manualEntity.skip(manualEntity.getRuleIdentifier(), manualEntity.getReason());
}
}).toList();
return toTextEntity(manualEntities, node);
}
public List<ManualEntity> toTextEntity(List<ManualEntity> manualEntities, SemanticNode node) {
Set<Integer> pageNumbers = manualEntities.stream().flatMap(entry -> entry.getEntityPosition().stream().map(RectangleWithPage::pageNumber)).collect(Collectors.toSet());
Set<String> entryValues = manualEntities.stream().map(ManualEntity::getValue).map(String::toLowerCase).collect(Collectors.toSet());
Map<String, List<TextEntity>> tempEntitiesByValue = findAllPossibleEntitiesAndGroupByValue(node, pageNumbers, entryValues);
List<ManualEntity> notFoundManualEntities = new LinkedList<>();
for (ManualEntity manualEntity : manualEntities) {
Optional<TextEntity> optionalRedactionEntity = findClosestEntityAndReturnEmptyIfNotFound(manualEntity, tempEntitiesByValue);
if (optionalRedactionEntity.isEmpty()) {
notFoundManualEntities.add(manualEntity);
continue;
}
createCorrectEntity(manualEntity, node, optionalRedactionEntity.get().getTextRange());
}
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
return notFoundManualEntities;
}
public List<ManualEntity> createRedactionEntitiesIfFoundAndReturnNotFoundEntries(Set<ManualRedactionEntry> manualRedactionEntries,
SemanticNode node,
String dossierTemplateId) {
@ -102,12 +60,29 @@ public class ManualEntityCreationService {
}
public List<ManualEntity> toTextEntity(List<ManualEntity> manualEntities, SemanticNode node) {
Map<String, List<TextEntity>> tempEntitiesByValue = entityFindingUtility.findAllPossibleEntitiesAndGroupByValue(node, manualEntities);
List<ManualEntity> notFoundManualEntities = new LinkedList<>();
for (ManualEntity manualEntity : manualEntities) {
Optional<TextEntity> optionalRedactionEntity = entityFindingUtility.findClosestEntityAndReturnEmptyIfNotFound(manualEntity, tempEntitiesByValue, MATCH_THRESHOLD);
if (optionalRedactionEntity.isEmpty()) {
notFoundManualEntities.add(manualEntity);
continue;
}
createCorrectEntity(manualEntity, node, optionalRedactionEntity.get().getTextRange());
}
tempEntitiesByValue.values().stream().flatMap(Collection::stream).forEach(TextEntity::removeFromGraph);
return notFoundManualEntities;
}
/**
* Deletes the temp Entity and creates a RedactionEntity with correct values, based on the given parameters.
*
* @param manualEntity The entity identifier for the RedactionEntity.
* @param manualEntity The entity identifier for the RedactionEntity.
* @param node The SemanticNode associated with the RedactionEntity.
* @param closestTextRange The closest Boundary to the RedactionEntity.
* @param closestTextRange The closest Boundary to the RedactionEntity.
*/
private void createCorrectEntity(ManualEntity manualEntity, SemanticNode node, TextRange closestTextRange) {
@ -118,7 +93,6 @@ public class ManualEntityCreationService {
correctEntity.setDossierDictionaryEntry(manualEntity.isDossierDictionaryEntry());
correctEntity.getManualOverwrite().addChanges(manualEntity.getManualOverwrite().getManualChangeLog());
// AnnotationIds must match the IDs in the add requests, or comments break. Maybe think about migrating IDs on the fly!
List<PositionOnPage> redactionPositionsWithIdOfManualOnPage = new ArrayList<>(correctEntity.getPositionsOnPagePerPage().size());
for (PositionOnPage positionOnPage : correctEntity.getPositionsOnPagePerPage()) {
@ -127,123 +101,4 @@ public class ManualEntityCreationService {
correctEntity.setPositionsOnPagePerPage(redactionPositionsWithIdOfManualOnPage);
}
private Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map<String, List<TextEntity>> entitiesWithSameValue) {
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ENGLISH));
if (entityIdentifierValueNotFound(possibleEntities)) {
log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue());
return Optional.empty();
}
Optional<TextEntity> optionalClosestEntity = possibleEntities.stream()
.filter(entity -> pagesMatch(entity, identifier.getEntityPosition()))
.min(Comparator.comparingDouble(entity -> calculateMinDistance(identifier.getEntityPosition(), entity)));
if (optionalClosestEntity.isEmpty()) {
log.warn("No Entity with value {} found on page {}", identifier.getValue(), identifier.getEntityPosition());
return Optional.empty();
}
TextEntity closestEntity = optionalClosestEntity.get();
double distance = calculateMinDistance(identifier.getEntityPosition(), closestEntity);
if (distance > MATCH_THRESHOLD) {
log.warn(format("Distance to closest found entity is %.2f and therefore higher than the threshold of %.2f for \n%s \n%s",
distance,
MATCH_THRESHOLD,
identifier.getEntityPosition(),
closestEntity.getPositionsOnPagePerPage()));
return Optional.empty();
}
return Optional.of(closestEntity);
}
private static boolean entityIdentifierValueNotFound(List<TextEntity> possibleEntities) {
return possibleEntities == null || possibleEntities.isEmpty();
}
private Map<String, List<TextEntity>> findAllPossibleEntitiesAndGroupByValue(SemanticNode node, Set<Integer> pageNumbers, Set<String> entryValues) {
if (!pageNumbers.stream().allMatch(node::onPage)) {
throw new IllegalArgumentException(format("SemanticNode \"%s\" does not contain these pages %s, it has pages: %s",
node,
pageNumbers.stream().filter(pageNumber -> !node.onPage(pageNumber)).toList(),
node.getPages()));
}
SearchImplementation searchImplementation = new SearchImplementation(entryValues, true);
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
.stream().map(boundary -> entityCreationService.forceByTextRange(boundary, "temp", EntityType.ENTITY, node))
.collect(groupingBy(entity -> entity.getValue().toLowerCase(Locale.ROOT)));
}
private static boolean allValuesFound(Map<String, List<TextEntity>> entitiesByValue, Set<String> entryValues) {
return entitiesByValue.keySet().equals(entryValues);
}
private static boolean pagesMatch(TextEntity entity, List<RectangleWithPage> originalPositions) {
Set<Integer> entityPageNumbers = entity.getPositionsOnPagePerPage().stream().map(PositionOnPage::getPage).map(Page::getNumber).collect(Collectors.toSet());
Set<Integer> originalPageNumbers = originalPositions.stream().map(RectangleWithPage::pageNumber).collect(Collectors.toSet());
return entityPageNumbers.containsAll(originalPageNumbers);
}
private double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
if (originalPositions.size() != countRectangles(entity)) {
return Double.MAX_VALUE;
}
return originalPositions.stream()
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D()))
.sum();
}
private static long countRectangles(TextEntity entity) {
return entity.getPositionsOnPagePerPage().stream().mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
}
private double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
return entity.getPositionsOnPagePerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
.min()
.orElse(Double.MAX_VALUE);
}
private double calculateDistance(Rectangle2D rectangle, Rectangle2D rectangle2D) {
// mirrored coordinates safe comparison
double minX1 = Math.min(rectangle.getMinX(), rectangle.getMaxX());
double minY1 = Math.min(rectangle.getMinY(), rectangle.getMaxY());
double maxX1 = Math.max(rectangle.getMinX(), rectangle.getMaxX());
double maxY1 = Math.max(rectangle.getMinY(), rectangle.getMaxY());
double minX2 = Math.min(rectangle2D.getMinX(), rectangle2D.getMaxX());
double minY2 = Math.min(rectangle2D.getMinY(), rectangle2D.getMaxY());
double maxX2 = Math.max(rectangle2D.getMinX(), rectangle2D.getMaxX());
double maxY2 = Math.max(rectangle2D.getMinY(), rectangle2D.getMaxY());
return Math.abs(minX1 - minX2) //
+ Math.abs(minY1 - minY2) //
+ Math.abs(maxX1 - maxX2) //
+ Math.abs(maxY1 - maxY2);
}
}

View File

@ -11,10 +11,9 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.imported.ImportedRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
import com.iqser.red.service.redaction.v1.server.client.model.NerEntitiesModel;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundException;
import com.iqser.red.storage.commons.exception.StorageObjectDoesNotExist;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
@ -140,17 +139,6 @@ public class RedactionStorageService {
}
@Timed("redactmanager_getSectionGrid")
public SectionGrid getSectionGrid(String dossierId, String fileId) {
try {
return storageService.readJSONObject(TenantContext.getTenantId(), StorageIdUtils.getStorageId(dossierId, fileId, FileType.SECTION_GRID), SectionGrid.class);
} catch (StorageObjectDoesNotExist e) {
throw new NotFoundException("Section Grid is not available.");
}
}
public ComponentLog getComponentLog(String dossierId, String fileId) {
try {

View File

@ -0,0 +1,54 @@
package com.iqser.red.service.redaction.v1.server.utils;
import java.util.LinkedList;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import com.google.common.base.Functions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.migration.MigratedIds;
import com.iqser.red.service.redaction.v1.server.model.MigrationEntity;
public class MigratedIdsCollector implements Collector<MigrationEntity, MigratedIds, MigratedIds> {
@Override
public Supplier<MigratedIds> supplier() {
return () -> new MigratedIds(new LinkedList<>());
}
@Override
public BiConsumer<MigratedIds, MigrationEntity> accumulator() {
return (migratedIds, migrationEntity) -> migratedIds.addMapping(migrationEntity.getOldId(), migrationEntity.getNewId());
}
@Override
public BinaryOperator<MigratedIds> combiner() {
return (migratedIds, migratedIds2) -> {
migratedIds.getMappings().addAll(migratedIds2.getMappings());
return migratedIds;
};
}
@Override
public Function<MigratedIds, MigratedIds> finisher() {
return Functions.identity();
}
@Override
public Set<Characteristics> characteristics() {
return Set.of(Characteristics.IDENTITY_FINISH, Characteristics.CONCURRENT, Characteristics.UNORDERED);
}
}

View File

@ -1,5 +1,28 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.dictionarymerge.commons.DictionaryEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
@ -12,10 +35,10 @@ import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.controller.RedactionController;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.LayoutParsingRequestProvider;
import com.iqser.red.service.redaction.v1.server.utils.ResourceLoader;
import com.iqser.red.service.redaction.v1.server.utils.TextNormalizationUtilities;
import com.iqser.red.storage.commons.service.StorageService;
import com.iqser.red.storage.commons.utils.FileSystemBackedStorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
@ -23,23 +46,8 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
import com.knecon.fforesight.tenantcommons.TenantContext;
import com.knecon.fforesight.tenantcommons.TenantsClient;
import lombok.SneakyThrows;
import org.junit.jupiter.api.AfterEach;
import org.mockito.stubbing.Answer;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.core.io.ClassPathResource;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.time.OffsetDateTime;
import java.util.*;
import java.util.stream.Collectors;
import static org.mockito.Mockito.when;
public abstract class AbstractRedactionIntegrationTest {
@ -432,7 +440,6 @@ public abstract class AbstractRedactionIntegrationTest {
@SneakyThrows
protected LayoutParsingFinishedEvent analyzeDocumentStructure(LayoutParsingType layoutParsingType, AnalyzeRequest request) {
return layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(LayoutParsingRequestProvider.build(layoutParsingType, request));
}

View File

@ -0,0 +1,168 @@
package com.iqser.red.service.redaction.v1.server;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.context.annotation.Import;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.junit.jupiter.SpringExtension;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.document.graph.BuildDocumentIntegrationTest;
import com.iqser.red.service.redaction.v1.server.model.MigratedEntityLog;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.service.RedactionLogToEntityLogMigrationService;
import lombok.SneakyThrows;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(MigrationIntegrationTest.TestConfiguration.class)
public class MigrationIntegrationTest extends BuildDocumentIntegrationTest {
@Autowired
RedactionLogToEntityLogMigrationService redactionLogToEntityLogMigrationService;
@Autowired
ObjectMapper mapper;
@Test
@SneakyThrows
public void testMigration() {
String fileName = "files/migration/def8f960580f088b975ba806dfae1f87.ORIGIN.pdf";
String imageFileName = "files/migration/def8f960580f088b975ba806dfae1f87.IMAGE_INFO.json";
String tableFileName = "files/migration/def8f960580f088b975ba806dfae1f87.TABLES.json";
Document document = buildGraph(fileName, imageFileName, tableFileName);
RedactionLog redactionLog;
try (var in = new ClassPathResource("files/migration/def8f960580f088b975ba806dfae1f87.REDACTION_LOG.json").getInputStream()) {
redactionLog = mapper.readValue(in, RedactionLog.class);
}
MigratedEntityLog migratedEntityLog = redactionLogToEntityLogMigrationService.migrate(redactionLog, document);
redactionStorageService.storeObject(TEST_DOSSIER_ID, TEST_FILE_ID, FileType.ENTITY_LOG, migratedEntityLog.getEntityLog());
assertEquals(redactionLog.getRedactionLogEntry().size(), migratedEntityLog.getEntityLog().getEntityLogEntry().size());
assertEquals(redactionLog.getRedactionLogEntry().size(), migratedEntityLog.getMigratedIds().getMappings().size());
EntityLog entityLog = migratedEntityLog.getEntityLog();
assertEquals(redactionLog.getAnalysisNumber(), entityLog.getAnalysisNumber());
assertEquals(redactionLog.getAnalysisVersion(), entityLog.getAnalysisVersion());
assertEquals(redactionLog.getDictionaryVersion(), entityLog.getDictionaryVersion());
assertEquals(redactionLog.getDossierDictionaryVersion(), entityLog.getDossierDictionaryVersion());
assertEquals(redactionLog.getLegalBasisVersion(), entityLog.getLegalBasisVersion());
assertEquals(redactionLog.getRulesVersion(), entityLog.getRulesVersion());
assertEquals(redactionLog.getLegalBasis().size(), entityLog.getLegalBasis().size());
Map<String, String> migratedIds = migratedEntityLog.getMigratedIds().buildOldToNewMapping();
migratedIds.forEach((oldId, newId) -> assertEntryIsEqual(oldId, newId, redactionLog, entityLog, migratedIds));
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());
File outputFile = Path.of(OsUtils.getTemporaryDirectory()).resolve(Path.of(fileName.replaceAll(".pdf", "_MIGRATED.pdf")).getFileName()).toFile();
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
fileOutputStream.write(annotateResponse.getDocument());
}
}
private void assertEntryIsEqual(String oldId, String newId, RedactionLog redactionLog, EntityLog entityLog, Map<String, String> oldToNewMapping) {
RedactionLogEntry redactionLogEntry = redactionLog.getRedactionLogEntry().stream().filter(entry -> entry.getId().equals(oldId)).findAny().orElseThrow();
EntityLogEntry entityLogEntry = entityLog.getEntityLogEntry().stream().filter(entry -> entry.getId().equals(newId)).findAny().orElseThrow();
if (!redactionLogEntry.isImage()) {
assertEquals(redactionLogEntry.getValue().toLowerCase(Locale.ENGLISH), entityLogEntry.getValue().toLowerCase(Locale.ENGLISH));
}
assertEquals(redactionLogEntry.getChanges().size(), entityLogEntry.getChanges().size());
assertEquals(redactionLogEntry.getManualChanges().size(), entityLogEntry.getManualChanges().size());
assertEquals(redactionLogEntry.getPositions().size(), entityLogEntry.getPositions().size());
assertTrue(positionsAlmostEqual(redactionLogEntry.getPositions(), entityLogEntry.getPositions()));
assertEquals(redactionLogEntry.getColor(), entityLogEntry.getColor());
assertEqualsNullSafe(redactionLogEntry.getLegalBasis(), entityLogEntry.getLegalBasis());
assertEqualsNullSafe(redactionLogEntry.getReason(), entityLogEntry.getReason());
assertReferencesEqual(redactionLogEntry.getReference(), entityLogEntry.getReference(), oldToNewMapping);
assertEquals(redactionLogEntry.isDictionaryEntry(), entityLogEntry.isDictionaryEntry());
assertEquals(redactionLogEntry.isDossierDictionaryEntry(), entityLogEntry.isDossierDictionaryEntry());
assertEquals(redactionLogEntry.getEngines().stream().map(Enum::name).collect(Collectors.toSet()),
entityLogEntry.getEngines().stream().map(Enum::name).collect(Collectors.toSet()));
}
private boolean positionsAlmostEqual(List<Rectangle> positions1, List<Position> positions2) {
double tolerance = 2;
for (int i = 0; i < positions1.size(); i++) {
Rectangle p1 = positions1.get(0);
Position p2 = positions2.get(0);
if (p1.getPage() != p2.getPageNumber()) {
return false;
}
if (Math.abs(p1.getHeight() - p2.getRectangle()[3]) > tolerance) {
return false;
}
if (Math.abs(p1.getWidth() - p2.getRectangle()[2]) > tolerance) {
return false;
}
if (Math.abs(p1.getTopLeft().getX() - p2.getRectangle()[0]) > tolerance) {
return false;
}
if (Math.abs(p1.getTopLeft().getY() - p2.getRectangle()[1]) > tolerance) {
return false;
}
}
return true;
}
private void assertEqualsNullSafe(String string1, String string2) {
if (Objects.isNull(string1) && Objects.isNull(string2)) {
assertEquals(string1, string2);
} else if (Objects.isNull(string1)) {
assertEquals("", string2);
} else if (Objects.isNull(string2)) {
assertEquals("", string1);
} else {
assertEquals(string1, string2);
}
}
private void assertReferencesEqual(Set<String> reference, Set<String> reference1, Map<String, String> oldToNewMapping) {
if (reference == null) {
assertTrue(reference1.isEmpty());
return;
}
assertEquals(reference.stream().map(oldToNewMapping::get).collect(Collectors.toSet()), reference1);
}
}

View File

@ -266,7 +266,7 @@ public class RedactionIntegrationTest extends AbstractRedactionIntegrationTest {
@Test
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Metolachlor/S-Metolachlor_RAR_02_Volume_2_2018-09-06.pdf");
AnalyzeRequest request = uploadFileToStorage("files/migration/def8f960580f088b975ba806dfae1f87.ORIGIN.pdf");
System.out.println("Start Full integration test");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
System.out.println("Finished structure analysis");

View File

@ -29,7 +29,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.dossier.file.FileType;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
@ -41,11 +40,10 @@ import lombok.SneakyThrows;
public class AnnotationService {
private final RedactionStorageService redactionStorageService;
private final DictionaryService dictionaryService;
/**
* This method draws a PDF Text Markup Annotation for each RedactionLogEntry and the section grid.
* This method draws a PDF Text Markup Annotation for each EntityLogEntry and the Viewer document.
* A Text Markup Annotation has two main functionalities.
* First, it highlights text with a color and second, when hovered over in a PDF Viewer it displays a popup message.
* Where the Popup message appears is defined by the Rectangle provided to the Annotation.
@ -70,7 +68,6 @@ public class AnnotationService {
try (PDDocument pdDocument = Loader.loadPDF(storedObjectFile)) {
pdDocument.setAllSecurityToBeRemoved(true);
dictionaryService.updateDictionary(annotateRequest.getDossierTemplateId(), annotateRequest.getDossierId());
annotate(pdDocument, entityLog);
PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();

View File

@ -46,6 +46,7 @@ public class BuildDocumentIntegrationTest extends AbstractRedactionIntegrationTe
}
@SneakyThrows
protected Document buildGraph(String filename) {
@ -65,6 +66,20 @@ public class BuildDocumentIntegrationTest extends AbstractRedactionIntegrationTe
}
@SneakyThrows
protected Document buildGraph(String filename, String imageFileName, String tableFileName) {
String fileNameWithPdf = filename;
if (!filename.endsWith(".pdf")) {
fileNameWithPdf = filename + ".pdf";
}
AnalyzeRequest request = prepareStorage(fileNameWithPdf, tableFileName, imageFileName);
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
return DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId()));
}
@SneakyThrows
protected Document buildGraphNoImages(String filename) {

View File

@ -1,162 +0,0 @@
package com.iqser.red.service.redaction.v1.server.document.graph;
import static java.util.stream.Collectors.toMap;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.drools.io.ClassPathResource;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import com.google.common.base.Functions;
import com.google.common.collect.Sets;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.AnalyzeResult;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.RedactionLogEntry;
import com.iqser.red.service.redaction.v1.server.model.ManualEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.RedactionLogCreatorService;
import com.iqser.red.service.redaction.v1.server.service.document.DocumentGraphMapper;
import com.iqser.red.service.redaction.v1.server.service.document.ManualEntityCreationService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.tenantcommons.TenantContext;
import lombok.SneakyThrows;
public class MigrationPocTest extends BuildDocumentIntegrationTest {
private static final String RULES = loadFromClassPath("drools/rules.drl");
@Autowired
private ManualEntityCreationService redactionLogAdapter;
@Autowired
private RedactionLogCreatorService redactionLogCreatorService;
@BeforeEach
public void stubClients() {
TenantContext.setTenantId("redaction");
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(System.currentTimeMillis());
when(rulesClient.getRules(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.ENTITY)).thenReturn(JSONPrimitive.of(RULES));
when(rulesClient.getVersion(TEST_DOSSIER_TEMPLATE_ID, RuleFileType.COMPONENT)).thenReturn(-1L);
loadDictionaryForTest();
loadTypeForTest();
loadNerForTest();
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossierTemplate(TEST_DOSSIER_TEMPLATE_ID, false)).thenReturn(getTypeResponse());
when(dictionaryClient.getVersion(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(0L);
when(dictionaryClient.getAllTypesForDossier(TEST_DOSSIER_ID, false)).thenReturn(List.of(Type.builder()
.id(DOSSIER_REDACTIONS_INDICATOR + ":" + TEST_DOSSIER_TEMPLATE_ID)
.type(DOSSIER_REDACTIONS_INDICATOR)
.dossierTemplateId(TEST_DOSSIER_ID)
.hexColor("#ffe187")
.isHint(hintTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isCaseInsensitive(caseInSensitiveMap.get(DOSSIER_REDACTIONS_INDICATOR))
.isRecommendation(recommendationTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.rank(rankTypeMap.get(DOSSIER_REDACTIONS_INDICATOR))
.build()));
mockDictionaryCalls(null);
when(dictionaryClient.getColors(TEST_DOSSIER_TEMPLATE_ID)).thenReturn(colors);
}
@Test
@Disabled // Enable if you fix the TODO in EntityCreationService
@SneakyThrows
public void testMigration() {
AnalyzeRequest request = uploadFileToStorage("files/new/crafted document.pdf");
analyzeDocumentStructure(LayoutParsingType.REDACT_MANAGER, request);
AnalyzeResult result = analyzeService.analyze(request);
var newRedactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var originalRedactionLog = getOriginalRedactionLog();
// IMPORTANT: always use the graph which is mapped from the DocumentData, since rounding errors occur during storage.
Document document = DocumentGraphMapper.toDocumentGraph(redactionStorageService.getDocumentData(request.getDossierId(), request.getFileId()));
List<ManualEntity> notFoundManualRedactionEntries = redactionLogAdapter.toTextEntity(originalRedactionLog, document);
var migratedRedactionLogEntries = redactionLogCreatorService.createRedactionLog(document, TEST_DOSSIER_TEMPLATE_ID, notFoundManualRedactionEntries);
Map<String, RedactionLogEntry> migratedIds = migratedRedactionLogEntries.stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));
Map<String, RedactionLogEntry> newIds = newRedactionLog.getRedactionLogEntry().stream().collect(toMap(RedactionLogEntry::getId, Functions.identity()));
logPrecision(migratedIds, newIds);
logRecall(migratedIds, newIds);
}
private static void logPrecision(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
var precision = computePrecision(migratedIds, newIds);
System.out.printf("precision %.2f\n", precision);
System.out.println("New Entries");
getAddedEntries(migratedIds, newIds).forEach(System.out::println);
assertTrue(precision >= 0.85);
System.out.println();
}
private static void logRecall(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
var recall = computeRecall(migratedIds, newIds);
System.out.printf("recall %.2f\n", recall);
System.out.println("Missing entries");
getMissingEntries(migratedIds, newIds).forEach(System.out::println);
assertTrue(recall >= 0.85);
System.out.println();
}
private static Stream<RedactionLogEntry> getMissingEntries(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
return migratedIds.entrySet().stream().filter(entry -> !newIds.containsKey(entry.getKey())).map(Map.Entry::getValue);
}
private static Stream<RedactionLogEntry> getAddedEntries(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
return newIds.entrySet().stream().filter(entry -> !migratedIds.containsKey(entry.getKey())).map(Map.Entry::getValue);
}
private static double computePrecision(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
return (double) Sets.intersection(newIds.keySet(), migratedIds.keySet()).size() / (double) newIds.size();
}
private static double computeRecall(Map<String, RedactionLogEntry> migratedIds, Map<String, RedactionLogEntry> newIds) {
return (double) Sets.intersection(newIds.keySet(), migratedIds.keySet()).size() / (double) migratedIds.size();
}
private static RedactionLog getOriginalRedactionLog() throws IOException {
RedactionLog originalRedactionLog;
try (var inputStream = new ClassPathResource("files/migration/legacy_redactionlog.json").getInputStream()) {
originalRedactionLog = ObjectMapperFactory.create().readValue(inputStream, RedactionLog.class);
}
return originalRedactionLog;
}
}

View File

@ -0,0 +1,891 @@
{
"dossierId": "12778ddd-0d87-4127-90f4-a15802e8f967",
"fileId": "4a15b5c89a4310eaef6243fb6f1f4f6f",
"targetFileExtension": "ORIGIN.pdf.gz",
"responseFileExtension": "IMAGE_INFO.json.gz",
"X-TENANT-ID": "documine",
"data": [
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.7805,
"other": 0.1362,
"logo": 0.0629,
"formula": 0.0204
}
},
"representation": "F8F3FFCFCC30F0CFCFFCF37FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 1
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9955,
"other": 0.0023,
"logo": 0.0022,
"formula": 0.0
}
},
"representation": "97FFFFFFFF10608F7E18107EF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 2
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9998,
"logo": 0.0001,
"other": 0.0001,
"formula": 0.0
}
},
"representation": "FFFFF10EFFF3E0810608307FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 3
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9998,
"other": 0.0001,
"formula": 0.0,
"logo": 0.0
}
},
"representation": "93EFF76E08F0E88F1E187C7EF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 4
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9997,
"logo": 0.0001,
"other": 0.0001,
"formula": 0.0
}
},
"representation": "FFFFF33EFC3060818608787FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 5
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9085,
"logo": 0.0453,
"other": 0.0413,
"formula": 0.005
}
},
"representation": "FFF3F506F81060CF1E18FC7FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 6
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.7596,
"other": 0.1218,
"logo": 0.1073,
"formula": 0.0113
}
},
"representation": "FFFFFFF70C1070C10F3C10FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 7
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.4095,
"other": 0.2126,
"formula": 0.2008,
"logo": 0.1771
}
},
"representation": "FF70C1070C1070CFD70C10FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 8
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": true
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9282,
"logo": 0.0402,
"other": 0.0305,
"formula": 0.0011
}
},
"representation": "F4F1CF170CF170C1070CF7FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 9
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9699,
"logo": 0.02,
"other": 0.0097,
"formula": 0.0004
}
},
"representation": "FF70FF4E1D3070D1070CF0FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 10
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "other",
"probabilities": {
"other": 0.9181,
"signature": 0.0432,
"formula": 0.0355,
"logo": 0.0032
}
},
"representation": "F3F1F7DF1F10F0C1070CF3FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 11
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.663,
"other": 0.137,
"logo": 0.1327,
"formula": 0.0674
}
},
"representation": "B770C10F7C1070C10F0C97FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 12
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.5023,
"other": 0.2343,
"logo": 0.2255,
"formula": 0.0379
}
},
"representation": "B370C10FFC1070CF3F1CF0FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 13
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9999,
"other": 0.0001,
"formula": 0.0,
"logo": 0.0
}
},
"representation": "97FFFFFFFFFFFF810608907EF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 14
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9099,
"logo": 0.0548,
"other": 0.0339,
"formula": 0.0014
}
},
"representation": "FFFFF10F7DDD70C1070CF77FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 15
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.6538,
"other": 0.1607,
"logo": 0.1489,
"formula": 0.0366
}
},
"representation": "FF70CF370CFE70C1070CB3FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 16
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9637,
"logo": 0.0179,
"other": 0.0156,
"formula": 0.0028
}
},
"representation": "FF70CF170C10F0C1070CF7FFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 17
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "other",
"probabilities": {
"other": 1.0,
"formula": 0.0,
"logo": 0.0,
"signature": 0.0
}
},
"representation": "FFFFF7070E1870E18608106FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 18
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "other",
"probabilities": {
"other": 1.0,
"formula": 0.0,
"logo": 0.0,
"signature": 0.0
}
},
"representation": "9B70E1870E1870E1870810EFF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 19
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.3619,
"other": 0.2659,
"logo": 0.2169,
"formula": 0.1552
}
},
"representation": "FFFFF30FE910ECF1E708106FF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 20
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": true
},
"allPassed": false
}
},
{
"classification": {
"label": "signature",
"probabilities": {
"signature": 0.9984,
"other": 0.001,
"logo": 0.0006,
"formula": 0.0
}
},
"representation": "B7E9F70F3C73F1C78F3F706EF",
"position": {
"x1": 0,
"x2": 595,
"y1": 0,
"y2": 842,
"pageNumber": 21
},
"geometry": {
"width": 595,
"height": 842
},
"alpha": false,
"filters": {
"geometry": {
"imageSize": {
"quotient": 1.0,
"tooLarge": true,
"tooSmall": false
},
"imageFormat": {
"quotient": 0.7067,
"tooTall": false,
"tooWide": false
}
},
"probability": {
"unconfident": false
},
"allPassed": false
}
}
]
}

View File

@ -0,0 +1,682 @@
{
"dossierId": "12778ddd-0d87-4127-90f4-a15802e8f967",
"fileId": "4a15b5c89a4310eaef6243fb6f1f4f6f",
"targetFileExtension": "ORIGIN.pdf.gz",
"responseFileExtension": "TABLES.json.gz",
"X-TENANT-ID": "documine",
"operation": "table",
"data": [
{
"pageInfo": {
"number": 4,
"rotation": 0,
"width": 595.0,
"height": 842.0
},
"tableCells": [
{
"x0": 299.5199890136719,
"y0": 262.760009765625,
"x1": 359.6400146484375,
"y1": 284.0,
"width": 60.120025634765625,
"height": 21.239990234375
},
{
"x0": 360.7200012207031,
"y0": 262.760009765625,
"x1": 383.3999938964844,
"y1": 284.0,
"width": 22.67999267578125,
"height": 21.239990234375
},
{
"x0": 384.1199951171875,
"y0": 262.4000244140625,
"x1": 477.7200012207031,
"y1": 284.0,
"width": 93.60000610351562,
"height": 21.5999755859375
},
{
"x0": 114.12000274658203,
"y0": 264.20001220703125,
"x1": 140.0399932861328,
"y1": 274.6400146484375,
"width": 25.91999053955078,
"height": 10.44000244140625
},
{
"x0": 141.1199951171875,
"y0": 263.47998046875,
"x1": 208.8000030517578,
"y1": 274.6400146484375,
"width": 67.68000793457031,
"height": 11.1600341796875
},
{
"x0": 209.52000427246094,
"y0": 263.47998046875,
"x1": 256.67999267578125,
"y1": 274.6400146484375,
"width": 47.15998840332031,
"height": 11.1600341796875
},
{
"x0": 78.83999633789062,
"y0": 253.03997802734375,
"x1": 208.8000030517578,
"y1": 262.760009765625,
"width": 129.9600067138672,
"height": 9.72003173828125
},
{
"x0": 209.52000427246094,
"y0": 253.03997802734375,
"x1": 256.67999267578125,
"y1": 262.4000244140625,
"width": 47.15998840332031,
"height": 9.36004638671875
},
{
"x0": 299.5199890136719,
"y0": 250.52001953125,
"x1": 359.6400146484375,
"y1": 262.03997802734375,
"width": 60.120025634765625,
"height": 11.51995849609375
},
{
"x0": 399.6000061035156,
"y0": 250.52001953125,
"x1": 477.7200012207031,
"y1": 261.32000732421875,
"width": 78.1199951171875,
"height": 10.79998779296875
}
]
},
{
"pageInfo": {
"number": 11,
"rotation": 0,
"width": 595.0,
"height": 842.0
},
"tableCells": [
{
"x0": 217.8000030517578,
"y0": 147.55999755859375,
"x1": 398.5199890136719,
"y1": 171.67999267578125,
"width": 180.71998596191406,
"height": 24.1199951171875
},
{
"x0": 229.67999267578125,
"y0": 121.280029296875,
"x1": 306.7200012207031,
"y1": 148.280029296875,
"width": 77.04000854492188,
"height": 27.0
},
{
"x0": 307.44000244140625,
"y0": 121.280029296875,
"x1": 399.239990234375,
"y1": 147.55999755859375,
"width": 91.79998779296875,
"height": 26.27996826171875
}
]
},
{
"pageInfo": {
"number": 18,
"rotation": 0,
"width": 595.0,
"height": 842.0
},
"tableCells": [
{
"x0": 64.08000183105469,
"y0": 585.6799926757812,
"x1": 220.67999267578125,
"y1": 614.1199951171875,
"width": 156.59999084472656,
"height": 28.44000244140625
},
{
"x0": 221.75999450683594,
"y0": 584.9599609375,
"x1": 309.9599914550781,
"y1": 614.1199951171875,
"width": 88.19999694824219,
"height": 29.1600341796875
},
{
"x0": 311.3999938964844,
"y0": 584.239990234375,
"x1": 399.9599914550781,
"y1": 614.1199951171875,
"width": 88.55999755859375,
"height": 29.8800048828125
},
{
"x0": 401.0400085449219,
"y0": 583.52001953125,
"x1": 494.2799987792969,
"y1": 614.1199951171875,
"width": 93.239990234375,
"height": 30.5999755859375
},
{
"x0": 495.3599853515625,
"y0": 583.1600341796875,
"x1": 537.1199951171875,
"y1": 614.1199951171875,
"width": 41.760009765625,
"height": 30.9599609375
},
{
"x0": 64.08000183105469,
"y0": 262.760009765625,
"x1": 220.32000732421875,
"y1": 585.3200073242188,
"width": 156.24000549316406,
"height": 322.55999755859375
},
{
"x0": 221.39999389648438,
"y0": 543.5599975585938,
"x1": 309.9599914550781,
"y1": 584.239990234375,
"width": 88.55999755859375,
"height": 40.67999267578125
},
{
"x0": 311.0400085449219,
"y0": 542.47998046875,
"x1": 399.9599914550781,
"y1": 583.1600341796875,
"width": 88.91998291015625,
"height": 40.6800537109375
},
{
"x0": 400.67999267578125,
"y0": 541.760009765625,
"x1": 493.9200134277344,
"y1": 582.4400024414062,
"width": 93.24002075195312,
"height": 40.67999267578125
},
{
"x0": 495.0,
"y0": 541.760009765625,
"x1": 537.1199951171875,
"y1": 581.719970703125,
"width": 42.1199951171875,
"height": 39.9599609375
},
{
"x0": 220.67999267578125,
"y0": 485.6000061035156,
"x1": 309.239990234375,
"y1": 543.2000122070312,
"width": 88.55999755859375,
"height": 57.600006103515625
},
{
"x0": 310.32000732421875,
"y0": 484.8800048828125,
"x1": 399.239990234375,
"y1": 542.1199951171875,
"width": 88.91998291015625,
"height": 57.239990234375
},
{
"x0": 399.9599914550781,
"y0": 484.1600036621094,
"x1": 493.9200134277344,
"y1": 541.4000244140625,
"width": 93.96002197265625,
"height": 57.240020751953125
},
{
"x0": 494.6400146484375,
"y0": 483.79998779296875,
"x1": 537.1199951171875,
"y1": 540.6799926757812,
"width": 42.47998046875,
"height": 56.8800048828125
},
{
"x0": 219.9600067138672,
"y0": 438.0799865722656,
"x1": 308.8800048828125,
"y1": 485.239990234375,
"width": 88.91999816894531,
"height": 47.160003662109375
},
{
"x0": 309.6000061035156,
"y0": 437.3599853515625,
"x1": 398.8800048828125,
"y1": 484.1600036621094,
"width": 89.27999877929688,
"height": 46.800018310546875
},
{
"x0": 399.6000061035156,
"y0": 436.6400146484375,
"x1": 493.20001220703125,
"y1": 483.44000244140625,
"width": 93.60000610351562,
"height": 46.79998779296875
},
{
"x0": 493.9200134277344,
"y0": 436.2799987792969,
"x1": 537.1199951171875,
"y1": 482.7200012207031,
"width": 43.199981689453125,
"height": 46.44000244140625
},
{
"x0": 219.60000610351562,
"y0": 388.0400085449219,
"x1": 308.1600036621094,
"y1": 437.7200012207031,
"width": 88.55999755859375,
"height": 49.67999267578125
},
{
"x0": 309.239990234375,
"y0": 387.32000732421875,
"x1": 398.5199890136719,
"y1": 436.6400146484375,
"width": 89.27999877929688,
"height": 49.32000732421875
},
{
"x0": 398.8800048828125,
"y0": 386.6000061035156,
"x1": 492.8399963378906,
"y1": 435.9200134277344,
"width": 93.95999145507812,
"height": 49.32000732421875
},
{
"x0": 493.9200134277344,
"y0": 385.8800048828125,
"x1": 537.1199951171875,
"y1": 435.20001220703125,
"width": 43.199981689453125,
"height": 49.32000732421875
},
{
"x0": 218.8800048828125,
"y0": 339.0799865722656,
"x1": 307.79998779296875,
"y1": 387.67999267578125,
"width": 88.91998291015625,
"height": 48.600006103515625
},
{
"x0": 308.5199890136719,
"y0": 338.3599853515625,
"x1": 397.79998779296875,
"y1": 386.6000061035156,
"width": 89.27999877929688,
"height": 48.240020751953125
},
{
"x0": 398.5199890136719,
"y0": 337.6400146484375,
"x1": 492.4800109863281,
"y1": 385.8800048828125,
"width": 93.96002197265625,
"height": 48.239990234375
},
{
"x0": 493.20001220703125,
"y0": 337.2799987792969,
"x1": 537.1199951171875,
"y1": 385.1600036621094,
"width": 43.91998291015625,
"height": 47.8800048828125
},
{
"x0": 218.16000366210938,
"y0": 289.03997802734375,
"x1": 307.0799865722656,
"y1": 338.7200012207031,
"width": 88.91998291015625,
"height": 49.680023193359375
},
{
"x0": 308.1600036621094,
"y0": 288.32000732421875,
"x1": 397.44000244140625,
"y1": 337.6400146484375,
"width": 89.27999877929688,
"height": 49.32000732421875
},
{
"x0": 398.1600036621094,
"y0": 287.96002197265625,
"x1": 492.1199951171875,
"y1": 336.9200134277344,
"width": 93.95999145507812,
"height": 48.959991455078125
},
{
"x0": 492.8399963378906,
"y0": 287.239990234375,
"x1": 537.1199951171875,
"y1": 336.20001220703125,
"width": 44.279998779296875,
"height": 48.96002197265625
},
{
"x0": 217.8000030517578,
"y0": 262.760009765625,
"x1": 306.7200012207031,
"y1": 288.67999267578125,
"width": 88.91999816894531,
"height": 25.91998291015625
},
{
"x0": 307.79998779296875,
"y0": 262.760009765625,
"x1": 397.0799865722656,
"y1": 287.96002197265625,
"width": 89.27999877929688,
"height": 25.20001220703125
},
{
"x0": 397.79998779296875,
"y0": 262.760009765625,
"x1": 492.1199951171875,
"y1": 287.239990234375,
"width": 94.32000732421875,
"height": 24.47998046875
},
{
"x0": 492.8399963378906,
"y0": 262.760009765625,
"x1": 537.1199951171875,
"y1": 286.52001953125,
"width": 44.279998779296875,
"height": 23.760009765625
}
]
},
{
"pageInfo": {
"number": 19,
"rotation": 0,
"width": 595.0,
"height": 842.0
},
"tableCells": [
{
"x0": 71.27999877929688,
"y0": 620.239990234375,
"x1": 231.47999572753906,
"y1": 643.280029296875,
"width": 160.1999969482422,
"height": 23.0400390625
},
{
"x0": 232.55999755859375,
"y0": 618.7999877929688,
"x1": 333.3599853515625,
"y1": 643.280029296875,
"width": 100.79998779296875,
"height": 24.48004150390625
},
{
"x0": 334.0799865722656,
"y0": 617.719970703125,
"x1": 434.1600036621094,
"y1": 643.280029296875,
"width": 100.08001708984375,
"height": 25.56005859375
},
{
"x0": 435.6000061035156,
"y0": 616.6400146484375,
"x1": 537.1199951171875,
"y1": 643.280029296875,
"width": 101.51998901367188,
"height": 26.6400146484375
},
{
"x0": 71.27999877929688,
"y0": 47.84002685546875,
"x1": 230.75999450683594,
"y1": 620.5999755859375,
"width": 159.47999572753906,
"height": 572.7599487304688
},
{
"x0": 231.83999633789062,
"y0": 559.4000244140625,
"x1": 333.0,
"y1": 618.7999877929688,
"width": 101.16000366210938,
"height": 59.39996337890625
},
{
"x0": 333.7200012207031,
"y0": 557.9599609375,
"x1": 433.79998779296875,
"y1": 617.3599853515625,
"width": 100.07998657226562,
"height": 59.4000244140625
},
{
"x0": 434.8800048828125,
"y0": 556.8800048828125,
"x1": 537.1199951171875,
"y1": 615.9199829101562,
"width": 102.239990234375,
"height": 59.03997802734375
},
{
"x0": 230.39999389648438,
"y0": 488.4800109863281,
"x1": 332.2799987792969,
"y1": 559.0400390625,
"width": 101.8800048828125,
"height": 70.56002807617188
},
{
"x0": 332.6400146484375,
"y0": 487.0400085449219,
"x1": 433.0799865722656,
"y1": 557.9599609375,
"width": 100.43997192382812,
"height": 70.91995239257812
},
{
"x0": 433.79998779296875,
"y0": 485.9599914550781,
"x1": 537.1199951171875,
"y1": 556.52001953125,
"width": 103.32000732421875,
"height": 70.56002807617188
},
{
"x0": 229.32000732421875,
"y0": 417.20001220703125,
"x1": 331.55999755859375,
"y1": 488.1199951171875,
"width": 102.239990234375,
"height": 70.91998291015625
},
{
"x0": 331.55999755859375,
"y0": 416.1199951171875,
"x1": 432.0,
"y1": 487.0400085449219,
"width": 100.44000244140625,
"height": 70.92001342773438
},
{
"x0": 432.7200012207031,
"y0": 415.0400085449219,
"x1": 537.1199951171875,
"y1": 485.6000061035156,
"width": 104.39999389648438,
"height": 70.55999755859375
},
{
"x0": 228.24000549316406,
"y0": 346.6400146484375,
"x1": 330.1199951171875,
"y1": 417.20001220703125,
"width": 101.87998962402344,
"height": 70.55999755859375
},
{
"x0": 330.4800109863281,
"y0": 345.20001220703125,
"x1": 431.2799987792969,
"y1": 416.1199951171875,
"width": 100.79998779296875,
"height": 70.91998291015625
},
{
"x0": 432.0,
"y0": 344.1199951171875,
"x1": 537.1199951171875,
"y1": 414.67999267578125,
"width": 105.1199951171875,
"height": 70.55999755859375
},
{
"x0": 227.16000366210938,
"y0": 276.08001708984375,
"x1": 329.3999938964844,
"y1": 346.2799987792969,
"width": 102.239990234375,
"height": 70.19998168945312
},
{
"x0": 329.760009765625,
"y0": 274.6400146484375,
"x1": 430.55999755859375,
"y1": 345.20001220703125,
"width": 100.79998779296875,
"height": 70.55999755859375
},
{
"x0": 431.2799987792969,
"y0": 273.55999755859375,
"x1": 537.1199951171875,
"y1": 343.760009765625,
"width": 105.83999633789062,
"height": 70.20001220703125
},
{
"x0": 226.0800018310547,
"y0": 205.15997314453125,
"x1": 328.32000732421875,
"y1": 275.719970703125,
"width": 102.24000549316406,
"height": 70.55999755859375
},
{
"x0": 328.67999267578125,
"y0": 203.719970703125,
"x1": 429.4800109863281,
"y1": 274.6400146484375,
"width": 100.80001831054688,
"height": 70.9200439453125
},
{
"x0": 430.55999755859375,
"y0": 203.0,
"x1": 537.1199951171875,
"y1": 273.20001220703125,
"width": 106.55999755859375,
"height": 70.20001220703125
},
{
"x0": 224.63999938964844,
"y0": 133.52001953125,
"x1": 327.239990234375,
"y1": 205.15997314453125,
"width": 102.59999084472656,
"height": 71.63995361328125
},
{
"x0": 327.6000061035156,
"y0": 132.08001708984375,
"x1": 428.760009765625,
"y1": 203.719970703125,
"width": 101.16000366210938,
"height": 71.63995361328125
},
{
"x0": 429.1199951171875,
"y0": 131.0,
"x1": 537.1199951171875,
"y1": 202.280029296875,
"width": 108.0,
"height": 71.280029296875
},
{
"x0": 223.9199981689453,
"y0": 61.8800048828125,
"x1": 326.5199890136719,
"y1": 133.52001953125,
"width": 102.59999084472656,
"height": 71.6400146484375
},
{
"x0": 326.5199890136719,
"y0": 60.44000244140625,
"x1": 427.67999267578125,
"y1": 132.44000244140625,
"width": 101.16000366210938,
"height": 72.0
},
{
"x0": 428.3999938964844,
"y0": 59.3599853515625,
"x1": 537.1199951171875,
"y1": 131.0,
"width": 108.72000122070312,
"height": 71.6400146484375
},
{
"x0": 223.55999755859375,
"y0": 47.84002685546875,
"x1": 325.79998779296875,
"y1": 61.8800048828125,
"width": 102.239990234375,
"height": 14.03997802734375
},
{
"x0": 326.1600036621094,
"y0": 47.84002685546875,
"x1": 426.9599914550781,
"y1": 60.44000244140625,
"width": 100.79998779296875,
"height": 12.5999755859375
},
{
"x0": 428.0400085449219,
"y0": 47.84002685546875,
"x1": 537.1199951171875,
"y1": 59.3599853515625,
"width": 109.07998657226562,
"height": 11.51995849609375
}
]
}
]
}