From 5deb2092b1dcfc6012d5bacbbb201121b7cd82df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Tue, 4 Jun 2024 17:16:03 +0200 Subject: [PATCH] RED-9169: duplicate entities in component rules --- .../v1/server/RedactionServiceSettings.java | 2 + .../v1/server/service/AnalyzeService.java | 5 +-- .../document/EntityFindingUtility.java | 38 ++++++++----------- .../ComponentDroolsExecutionService.java | 29 +++++++++++++- .../utils/RectangleTransformations.java | 3 +- .../v1/server/AnalysisEnd2EndTest.java | 38 ++++++++++++++----- 6 files changed, 77 insertions(+), 38 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java index 7accb9d9..3c126fae 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java @@ -32,4 +32,6 @@ public class RedactionServiceSettings { private boolean ruleExecutionSecured = true; + private boolean annotationMode; + } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java index 226ded0a..5c5af376 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/AnalyzeService.java @@ -312,13 +312,12 @@ public class AnalyzeService { } // We need the latest EntityLog entries for components rules execution - entityLog.getEntityLogEntry().addAll(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry()); + entityLog.setEntityLogEntry(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry()); List components = componentDroolsExecutionService.executeRules(kieWrapperComponentRules.container(), entityLog, document, - addedFileAttributes.stream() - .toList(), + addedFileAttributes, analyzeRequest.getComponentMappings()); log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index ecf0983e..70797490 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -4,7 +4,6 @@ import static java.lang.String.format; import static java.util.stream.Collectors.groupingBy; import java.awt.geom.Rectangle2D; -import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.List; @@ -28,6 +27,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.service.DictionaryService; +import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations; import lombok.extern.slf4j.Slf4j; @@ -113,33 +113,27 @@ public class EntityFindingUtility { public static double calculateMinDistance(List originalPositions, TextEntity entity) { - if (originalPositions.size() != countRectangles(entity)) { - return Double.MAX_VALUE; - } - return originalPositions.stream() - .mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average() + Map originalBBoxPerPage = buildBBoxPerPage(originalPositions); + + return entity.getPositionsOnPagePerPage() + .stream() + .mapToDouble(positionOnPage -> calculateDistance(RectangleTransformations.rectangle2DBBox(positionOnPage.getRectanglePerLine()), + originalBBoxPerPage.getOrDefault(positionOnPage.getPage().getNumber(), new Rectangle2D.Double()))).average() .orElse(Double.MAX_VALUE); } - private static long countRectangles(TextEntity entity) { + private static Map buildBBoxPerPage(List originalPositions) { - return entity.getPositionsOnPagePerPage() + Map> originalPositionsPerPage = originalPositions.stream() + .collect(Collectors.groupingBy(RectangleWithPage::pageNumber)); + return originalPositionsPerPage.entrySet() .stream() - .mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum(); - } - - - private static double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) { - - return entity.getPositionsOnPagePerPage() - .stream() - .filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber) - .map(PositionOnPage::getRectanglePerLine) - .flatMap(Collection::stream) - .mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle)) - .min() - .orElse(Double.MAX_VALUE); + .collect(Collectors.toMap(Map.Entry::getKey, + entry -> entry.getValue() + .stream() + .map(RectangleWithPage::rectangle2D) + .collect(RectangleTransformations.collectBBox()))); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index 0fcec3b4..a0fa79dd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.service.drools; import java.util.LinkedList; import java.util.List; +import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -16,6 +17,7 @@ import org.springframework.stereotype.Service; import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute; import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog; +import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry; import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState; import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; @@ -49,7 +51,7 @@ public class ComponentDroolsExecutionService { public List executeRules(KieContainer kieContainer, EntityLog entityLog, Document document, - List fileAttributes, + Set fileAttributes, List componentMappings) { KieSession kieSession = kieContainer.newKieSession(); @@ -64,7 +66,7 @@ public class ComponentDroolsExecutionService { entityLog.getEntityLogEntry() .stream() - .filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED)) + .filter(this::isApplied) .map(entry -> Entity.fromEntityLogEntry(entry, document)) .forEach(kieSession::insert); fileAttributes.stream() @@ -94,14 +96,37 @@ public class ComponentDroolsExecutionService { } List resultingFileAttributes = getFileAttributes(kieSession); + + addOrUpdate(fileAttributes, resultingFileAttributes); + List components = getComponents(kieSession).stream() .sorted(ComponentComparator.first()) .toList(); + kieSession.dispose(); + return components; } + private static void addOrUpdate(Set fileAttributes, List resultingFileAttributes) { + + for (FileAttribute resultingFileAttribute : resultingFileAttributes) { + fileAttributes.remove(resultingFileAttribute); + fileAttributes.add(resultingFileAttribute); + } + } + + + private boolean isApplied(EntityLogEntry entityLogEntry) { + + if (settings.isAnnotationMode()) { + return entityLogEntry.getState().equals(EntryState.APPLIED) || entityLogEntry.getState().equals(EntryState.SKIPPED); + } + return entityLogEntry.getState().equals(EntryState.APPLIED); + } + + private static boolean hasComponentMappingServiceGlobal(KieSession kieSession) { return kieSession.getKieBase().getKiePackages() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RectangleTransformations.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RectangleTransformations.java index d4746f21..d92829b6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RectangleTransformations.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RectangleTransformations.java @@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.utils; import java.awt.geom.Rectangle2D; import java.awt.geom.RectangularShape; +import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; @@ -37,7 +38,7 @@ public class RectangleTransformations { } - public static Rectangle2D rectangle2DBBox(List rectangle2DList) { + public static Rectangle2D rectangle2DBBox(Collection rectangle2DList) { return rectangle2DList.stream() .collect(new Rectangle2DBBoxCollector()); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java index 911d4cdb..ebb83325 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java @@ -17,6 +17,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; @@ -79,7 +80,7 @@ import lombok.extern.slf4j.Slf4j; * This way you can recreate what is happening on the stack almost exactly. */ public class AnalysisEnd2EndTest { - Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/testing dossier templates/Production DocuMine"); // Add your dossier-template here + Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here ObjectMapper mapper = ObjectMapperFactory.create(); final String TENANT_ID = "tenant"; @@ -120,7 +121,7 @@ import lombok.extern.slf4j.Slf4j; @SneakyThrows public void runAnalysisEnd2End() { - String folder = "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/end2end/broken"; // Should contain all files from minio directly, still zipped. Can contain multiple files. + String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files. Path absoluteFolderPath; if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path @@ -169,7 +170,7 @@ import lombok.extern.slf4j.Slf4j; when(dictionaryService.getDictionaryIncrements(any(), any(), any())).thenReturn(new DictionaryIncrement(Collections.emptySet(), new DictionaryVersion(0, 0))); when(dictionaryService.isHint(any(String.class), any())).thenAnswer(invocation -> { String type = invocation.getArgument(0); - return testDossierTemplate.testDictionary.getType(type).isHint(); + return testDossierTemplate.testDictionary.isHint(type); }); when(dictionaryService.getColor(any(String.class), any())).thenAnswer(invocation -> { String type = invocation.getArgument(0); @@ -208,9 +209,15 @@ import lombok.extern.slf4j.Slf4j; request.setDossierId(UUID.randomUUID().toString()); request.setFileId(UUID.randomUUID().toString()); request.setDossierTemplateId(testDossierTemplate.id); - request.setManualRedactions(new ManualRedactions()); request.setAnalysisNumber(-1); + Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json"); + if (Files.exists(manualRedactionFile)) { + request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class)); + } else { + request.setManualRedactions(new ManualRedactions()); + } + Set endingsToUpload = Set.of("ORIGIN", "DOCUMENT_PAGES", "DOCUMENT_POSITION", @@ -226,8 +233,11 @@ import lombok.extern.slf4j.Slf4j; Set uploadedFileTypes = Files.walk(folder) .filter(path -> path.toFile().isFile()) - .filter(path -> endingsToUpload.contains(parseFileTypeFromPath(path))) + .filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains) + .orElse(false)) .map(filePath -> uploadFile(filePath, request)) + .filter(Optional::isPresent) + .map(Optional::get) .collect(Collectors.toUnmodifiableSet()); Set missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes); @@ -243,18 +253,26 @@ import lombok.extern.slf4j.Slf4j; } - private static FileType parseFileTypeFromPath(Path path) { + private static Optional parseFileTypeFromPath(Path path) { - return FileType.valueOf(path.getFileName().toString().split("\\.")[1]); + String fileType = path.getFileName().toString().split("\\.")[1]; + try { + return Optional.of(FileType.valueOf(fileType)); + } catch (IllegalArgumentException e) { + return Optional.empty(); + } } @SneakyThrows - private FileType uploadFile(Path path, AnalyzeRequest request) { + private Optional uploadFile(Path path, AnalyzeRequest request) { - FileType fileType = parseFileTypeFromPath(path); + Optional fileType = parseFileTypeFromPath(path); + if (fileType.isEmpty()) { + return Optional.empty(); + } try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) { - storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType), in); + storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in); } return fileType;