Merge branch 'RED-9169' into 'master'

RED-9169: duplicate entities in component rules

Closes RED-9169

See merge request redactmanager/redaction-service!417
This commit is contained in:
Kilian Schüttler 2024-06-04 17:16:04 +02:00
commit 3de5d7e356
6 changed files with 77 additions and 38 deletions

View File

@ -32,4 +32,6 @@ public class RedactionServiceSettings {
private boolean ruleExecutionSecured = true;
private boolean annotationMode;
}

View File

@ -312,13 +312,12 @@ public class AnalyzeService {
}
// We need the latest EntityLog entries for components rules execution
entityLog.getEntityLogEntry().addAll(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry());
entityLog.setEntityLogEntry(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry());
List<Component> components = componentDroolsExecutionService.executeRules(kieWrapperComponentRules.container(),
entityLog,
document,
addedFileAttributes.stream()
.toList(),
addedFileAttributes,
analyzeRequest.getComponentMappings());
log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());

View File

@ -4,7 +4,6 @@ import static java.lang.String.format;
import static java.util.stream.Collectors.groupingBy;
import java.awt.geom.Rectangle2D;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
@ -28,6 +27,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
import lombok.extern.slf4j.Slf4j;
@ -113,33 +113,27 @@ public class EntityFindingUtility {
public static double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
if (originalPositions.size() != countRectangles(entity)) {
return Double.MAX_VALUE;
}
return originalPositions.stream()
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average()
Map<Integer, Rectangle2D> originalBBoxPerPage = buildBBoxPerPage(originalPositions);
return entity.getPositionsOnPagePerPage()
.stream()
.mapToDouble(positionOnPage -> calculateDistance(RectangleTransformations.rectangle2DBBox(positionOnPage.getRectanglePerLine()),
originalBBoxPerPage.getOrDefault(positionOnPage.getPage().getNumber(), new Rectangle2D.Double()))).average()
.orElse(Double.MAX_VALUE);
}
private static long countRectangles(TextEntity entity) {
private static Map<Integer, Rectangle2D> buildBBoxPerPage(List<RectangleWithPage> originalPositions) {
return entity.getPositionsOnPagePerPage()
Map<Integer, List<RectangleWithPage>> originalPositionsPerPage = originalPositions.stream()
.collect(Collectors.groupingBy(RectangleWithPage::pageNumber));
return originalPositionsPerPage.entrySet()
.stream()
.mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
}
private static double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
return entity.getPositionsOnPagePerPage()
.stream()
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
.map(PositionOnPage::getRectanglePerLine)
.flatMap(Collection::stream)
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
.min()
.orElse(Double.MAX_VALUE);
.collect(Collectors.toMap(Map.Entry::getKey,
entry -> entry.getValue()
.stream()
.map(RectangleWithPage::rectangle2D)
.collect(RectangleTransformations.collectBBox())));
}

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.service.drools;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
@ -16,6 +17,7 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
@ -49,7 +51,7 @@ public class ComponentDroolsExecutionService {
public List<Component> executeRules(KieContainer kieContainer,
EntityLog entityLog,
Document document,
List<FileAttribute> fileAttributes,
Set<FileAttribute> fileAttributes,
List<ComponentMappingMetadata> componentMappings) {
KieSession kieSession = kieContainer.newKieSession();
@ -64,7 +66,7 @@ public class ComponentDroolsExecutionService {
entityLog.getEntityLogEntry()
.stream()
.filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED))
.filter(this::isApplied)
.map(entry -> Entity.fromEntityLogEntry(entry, document))
.forEach(kieSession::insert);
fileAttributes.stream()
@ -94,14 +96,37 @@ public class ComponentDroolsExecutionService {
}
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
addOrUpdate(fileAttributes, resultingFileAttributes);
List<Component> components = getComponents(kieSession).stream()
.sorted(ComponentComparator.first())
.toList();
kieSession.dispose();
return components;
}
private static void addOrUpdate(Set<FileAttribute> fileAttributes, List<FileAttribute> resultingFileAttributes) {
for (FileAttribute resultingFileAttribute : resultingFileAttributes) {
fileAttributes.remove(resultingFileAttribute);
fileAttributes.add(resultingFileAttribute);
}
}
private boolean isApplied(EntityLogEntry entityLogEntry) {
if (settings.isAnnotationMode()) {
return entityLogEntry.getState().equals(EntryState.APPLIED) || entityLogEntry.getState().equals(EntryState.SKIPPED);
}
return entityLogEntry.getState().equals(EntryState.APPLIED);
}
private static boolean hasComponentMappingServiceGlobal(KieSession kieSession) {
return kieSession.getKieBase().getKiePackages()

View File

@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.utils;
import java.awt.geom.Rectangle2D;
import java.awt.geom.RectangularShape;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@ -37,7 +38,7 @@ public class RectangleTransformations {
}
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
public static Rectangle2D rectangle2DBBox(Collection<Rectangle2D> rectangle2DList) {
return rectangle2DList.stream()
.collect(new Rectangle2DBBoxCollector());

View File

@ -17,6 +17,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
@ -79,7 +80,7 @@ import lombok.extern.slf4j.Slf4j;
* This way you can recreate what is happening on the stack almost exactly.
*/ public class AnalysisEnd2EndTest {
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/testing dossier templates/Production DocuMine"); // Add your dossier-template here
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
@ -120,7 +121,7 @@ import lombok.extern.slf4j.Slf4j;
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/end2end/broken"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
@ -169,7 +170,7 @@ import lombok.extern.slf4j.Slf4j;
when(dictionaryService.getDictionaryIncrements(any(), any(), any())).thenReturn(new DictionaryIncrement(Collections.emptySet(), new DictionaryVersion(0, 0)));
when(dictionaryService.isHint(any(String.class), any())).thenAnswer(invocation -> {
String type = invocation.getArgument(0);
return testDossierTemplate.testDictionary.getType(type).isHint();
return testDossierTemplate.testDictionary.isHint(type);
});
when(dictionaryService.getColor(any(String.class), any())).thenAnswer(invocation -> {
String type = invocation.getArgument(0);
@ -208,9 +209,15 @@ import lombok.extern.slf4j.Slf4j;
request.setDossierId(UUID.randomUUID().toString());
request.setFileId(UUID.randomUUID().toString());
request.setDossierTemplateId(testDossierTemplate.id);
request.setManualRedactions(new ManualRedactions());
request.setAnalysisNumber(-1);
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
if (Files.exists(manualRedactionFile)) {
request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class));
} else {
request.setManualRedactions(new ManualRedactions());
}
Set<FileType> endingsToUpload = Set.of("ORIGIN",
"DOCUMENT_PAGES",
"DOCUMENT_POSITION",
@ -226,8 +233,11 @@ import lombok.extern.slf4j.Slf4j;
Set<FileType> uploadedFileTypes = Files.walk(folder)
.filter(path -> path.toFile().isFile())
.filter(path -> endingsToUpload.contains(parseFileTypeFromPath(path)))
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
.orElse(false))
.map(filePath -> uploadFile(filePath, request))
.filter(Optional::isPresent)
.map(Optional::get)
.collect(Collectors.toUnmodifiableSet());
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
@ -243,18 +253,26 @@ import lombok.extern.slf4j.Slf4j;
}
private static FileType parseFileTypeFromPath(Path path) {
private static Optional<FileType> parseFileTypeFromPath(Path path) {
return FileType.valueOf(path.getFileName().toString().split("\\.")[1]);
String fileType = path.getFileName().toString().split("\\.")[1];
try {
return Optional.of(FileType.valueOf(fileType));
} catch (IllegalArgumentException e) {
return Optional.empty();
}
}
@SneakyThrows
private FileType uploadFile(Path path, AnalyzeRequest request) {
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
FileType fileType = parseFileTypeFromPath(path);
Optional<FileType> fileType = parseFileTypeFromPath(path);
if (fileType.isEmpty()) {
return Optional.empty();
}
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType), in);
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
}
return fileType;