Merge branch 'RED-9169' into 'master'
RED-9169: duplicate entities in component rules Closes RED-9169 See merge request redactmanager/redaction-service!417
This commit is contained in:
commit
3de5d7e356
@ -32,4 +32,6 @@ public class RedactionServiceSettings {
|
||||
|
||||
private boolean ruleExecutionSecured = true;
|
||||
|
||||
private boolean annotationMode;
|
||||
|
||||
}
|
||||
|
||||
@ -312,13 +312,12 @@ public class AnalyzeService {
|
||||
}
|
||||
|
||||
// We need the latest EntityLog entries for components rules execution
|
||||
entityLog.getEntityLogEntry().addAll(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry());
|
||||
entityLog.setEntityLogEntry(redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId()).getEntityLogEntry());
|
||||
|
||||
List<Component> components = componentDroolsExecutionService.executeRules(kieWrapperComponentRules.container(),
|
||||
entityLog,
|
||||
document,
|
||||
addedFileAttributes.stream()
|
||||
.toList(),
|
||||
addedFileAttributes,
|
||||
analyzeRequest.getComponentMappings());
|
||||
|
||||
log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
|
||||
|
||||
@ -4,7 +4,6 @@ import static java.lang.String.format;
|
||||
import static java.util.stream.Collectors.groupingBy;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
@ -28,6 +27,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntit
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
|
||||
import com.iqser.red.service.redaction.v1.server.utils.RectangleTransformations;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@ -113,33 +113,27 @@ public class EntityFindingUtility {
|
||||
|
||||
public static double calculateMinDistance(List<RectangleWithPage> originalPositions, TextEntity entity) {
|
||||
|
||||
if (originalPositions.size() != countRectangles(entity)) {
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
return originalPositions.stream()
|
||||
.mapToDouble(rectangleWithPage -> calculateMinDistancePerRectangle(entity, rectangleWithPage.pageNumber(), rectangleWithPage.rectangle2D())).average()
|
||||
Map<Integer, Rectangle2D> originalBBoxPerPage = buildBBoxPerPage(originalPositions);
|
||||
|
||||
return entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.mapToDouble(positionOnPage -> calculateDistance(RectangleTransformations.rectangle2DBBox(positionOnPage.getRectanglePerLine()),
|
||||
originalBBoxPerPage.getOrDefault(positionOnPage.getPage().getNumber(), new Rectangle2D.Double()))).average()
|
||||
.orElse(Double.MAX_VALUE);
|
||||
}
|
||||
|
||||
|
||||
private static long countRectangles(TextEntity entity) {
|
||||
private static Map<Integer, Rectangle2D> buildBBoxPerPage(List<RectangleWithPage> originalPositions) {
|
||||
|
||||
return entity.getPositionsOnPagePerPage()
|
||||
Map<Integer, List<RectangleWithPage>> originalPositionsPerPage = originalPositions.stream()
|
||||
.collect(Collectors.groupingBy(RectangleWithPage::pageNumber));
|
||||
return originalPositionsPerPage.entrySet()
|
||||
.stream()
|
||||
.mapToLong(redactionPosition -> redactionPosition.getRectanglePerLine().size()).sum();
|
||||
}
|
||||
|
||||
|
||||
private static double calculateMinDistancePerRectangle(TextEntity entity, int pageNumber, Rectangle2D originalRectangle) {
|
||||
|
||||
return entity.getPositionsOnPagePerPage()
|
||||
.stream()
|
||||
.filter(redactionPosition -> redactionPosition.getPage().getNumber() == pageNumber)
|
||||
.map(PositionOnPage::getRectanglePerLine)
|
||||
.flatMap(Collection::stream)
|
||||
.mapToDouble(rectangle -> calculateDistance(rectangle, originalRectangle))
|
||||
.min()
|
||||
.orElse(Double.MAX_VALUE);
|
||||
.collect(Collectors.toMap(Map.Entry::getKey,
|
||||
entry -> entry.getValue()
|
||||
.stream()
|
||||
.map(RectangleWithPage::rectangle2D)
|
||||
.collect(RectangleTransformations.collectBBox())));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.service.drools;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@ -16,6 +17,7 @@ import org.springframework.stereotype.Service;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribute;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLog;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntityLogEntry;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.EntryState;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata;
|
||||
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
|
||||
@ -49,7 +51,7 @@ public class ComponentDroolsExecutionService {
|
||||
public List<Component> executeRules(KieContainer kieContainer,
|
||||
EntityLog entityLog,
|
||||
Document document,
|
||||
List<FileAttribute> fileAttributes,
|
||||
Set<FileAttribute> fileAttributes,
|
||||
List<ComponentMappingMetadata> componentMappings) {
|
||||
|
||||
KieSession kieSession = kieContainer.newKieSession();
|
||||
@ -64,7 +66,7 @@ public class ComponentDroolsExecutionService {
|
||||
|
||||
entityLog.getEntityLogEntry()
|
||||
.stream()
|
||||
.filter(entityLogEntry -> entityLogEntry.getState().equals(EntryState.APPLIED))
|
||||
.filter(this::isApplied)
|
||||
.map(entry -> Entity.fromEntityLogEntry(entry, document))
|
||||
.forEach(kieSession::insert);
|
||||
fileAttributes.stream()
|
||||
@ -94,14 +96,37 @@ public class ComponentDroolsExecutionService {
|
||||
}
|
||||
|
||||
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);
|
||||
|
||||
addOrUpdate(fileAttributes, resultingFileAttributes);
|
||||
|
||||
List<Component> components = getComponents(kieSession).stream()
|
||||
.sorted(ComponentComparator.first())
|
||||
.toList();
|
||||
|
||||
kieSession.dispose();
|
||||
|
||||
return components;
|
||||
}
|
||||
|
||||
|
||||
private static void addOrUpdate(Set<FileAttribute> fileAttributes, List<FileAttribute> resultingFileAttributes) {
|
||||
|
||||
for (FileAttribute resultingFileAttribute : resultingFileAttributes) {
|
||||
fileAttributes.remove(resultingFileAttribute);
|
||||
fileAttributes.add(resultingFileAttribute);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isApplied(EntityLogEntry entityLogEntry) {
|
||||
|
||||
if (settings.isAnnotationMode()) {
|
||||
return entityLogEntry.getState().equals(EntryState.APPLIED) || entityLogEntry.getState().equals(EntryState.SKIPPED);
|
||||
}
|
||||
return entityLogEntry.getState().equals(EntryState.APPLIED);
|
||||
}
|
||||
|
||||
|
||||
private static boolean hasComponentMappingServiceGlobal(KieSession kieSession) {
|
||||
|
||||
return kieSession.getKieBase().getKiePackages()
|
||||
|
||||
@ -2,6 +2,7 @@ package com.iqser.red.service.redaction.v1.server.utils;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.awt.geom.RectangularShape;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -37,7 +38,7 @@ public class RectangleTransformations {
|
||||
}
|
||||
|
||||
|
||||
public static Rectangle2D rectangle2DBBox(List<Rectangle2D> rectangle2DList) {
|
||||
public static Rectangle2D rectangle2DBBox(Collection<Rectangle2D> rectangle2DList) {
|
||||
|
||||
return rectangle2DList.stream()
|
||||
.collect(new Rectangle2DBBoxCollector());
|
||||
|
||||
@ -17,6 +17,7 @@ import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
@ -79,7 +80,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
* This way you can recreate what is happening on the stack almost exactly.
|
||||
*/ public class AnalysisEnd2EndTest {
|
||||
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/testing dossier templates/Production DocuMine"); // Add your dossier-template here
|
||||
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/fforesight/dossier-templates-v2/dev/LayoutParsingDatasetEvaluation"); // Add your dossier-template here
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
final String TENANT_ID = "tenant";
|
||||
|
||||
@ -120,7 +121,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@SneakyThrows
|
||||
public void runAnalysisEnd2End() {
|
||||
|
||||
String folder = "/home/kschuettler/iqser/redaction/redaction-service/redaction-service-v1/redaction-service-server-v1/src/test/resources/files/end2end/broken"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
String folder = "/home/kschuettler/Dokumente/analysisend2end/file1"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
|
||||
|
||||
Path absoluteFolderPath;
|
||||
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
|
||||
@ -169,7 +170,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
when(dictionaryService.getDictionaryIncrements(any(), any(), any())).thenReturn(new DictionaryIncrement(Collections.emptySet(), new DictionaryVersion(0, 0)));
|
||||
when(dictionaryService.isHint(any(String.class), any())).thenAnswer(invocation -> {
|
||||
String type = invocation.getArgument(0);
|
||||
return testDossierTemplate.testDictionary.getType(type).isHint();
|
||||
return testDossierTemplate.testDictionary.isHint(type);
|
||||
});
|
||||
when(dictionaryService.getColor(any(String.class), any())).thenAnswer(invocation -> {
|
||||
String type = invocation.getArgument(0);
|
||||
@ -208,9 +209,15 @@ import lombok.extern.slf4j.Slf4j;
|
||||
request.setDossierId(UUID.randomUUID().toString());
|
||||
request.setFileId(UUID.randomUUID().toString());
|
||||
request.setDossierTemplateId(testDossierTemplate.id);
|
||||
request.setManualRedactions(new ManualRedactions());
|
||||
request.setAnalysisNumber(-1);
|
||||
|
||||
Path manualRedactionFile = folder.resolve(fileId + ".MANUAL_REDACTIONS.json");
|
||||
if (Files.exists(manualRedactionFile)) {
|
||||
request.setManualRedactions(mapper.readValue(manualRedactionFile.toFile(), ManualRedactions.class));
|
||||
} else {
|
||||
request.setManualRedactions(new ManualRedactions());
|
||||
}
|
||||
|
||||
Set<FileType> endingsToUpload = Set.of("ORIGIN",
|
||||
"DOCUMENT_PAGES",
|
||||
"DOCUMENT_POSITION",
|
||||
@ -226,8 +233,11 @@ import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
Set<FileType> uploadedFileTypes = Files.walk(folder)
|
||||
.filter(path -> path.toFile().isFile())
|
||||
.filter(path -> endingsToUpload.contains(parseFileTypeFromPath(path)))
|
||||
.filter(path -> parseFileTypeFromPath(path).map(endingsToUpload::contains)
|
||||
.orElse(false))
|
||||
.map(filePath -> uploadFile(filePath, request))
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.collect(Collectors.toUnmodifiableSet());
|
||||
|
||||
Set<FileType> missingFileTypes = Sets.difference(endingsToUpload, uploadedFileTypes);
|
||||
@ -243,18 +253,26 @@ import lombok.extern.slf4j.Slf4j;
|
||||
}
|
||||
|
||||
|
||||
private static FileType parseFileTypeFromPath(Path path) {
|
||||
private static Optional<FileType> parseFileTypeFromPath(Path path) {
|
||||
|
||||
return FileType.valueOf(path.getFileName().toString().split("\\.")[1]);
|
||||
String fileType = path.getFileName().toString().split("\\.")[1];
|
||||
try {
|
||||
return Optional.of(FileType.valueOf(fileType));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private FileType uploadFile(Path path, AnalyzeRequest request) {
|
||||
private Optional<FileType> uploadFile(Path path, AnalyzeRequest request) {
|
||||
|
||||
FileType fileType = parseFileTypeFromPath(path);
|
||||
Optional<FileType> fileType = parseFileTypeFromPath(path);
|
||||
if (fileType.isEmpty()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
try (var fis = new FileInputStream(path.toFile()); var in = new GZIPInputStream(fis);) {
|
||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType), in);
|
||||
storageService.storeObject(TENANT_ID, RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), fileType.get()), in);
|
||||
|
||||
}
|
||||
return fileType;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user