Merge branch 'DM-285' into 'master'

DM-285: fix component drools execution

Closes DM-285

See merge request redactmanager/redaction-service!130
This commit is contained in:
Dominique Eifländer 2023-09-15 13:11:11 +02:00
commit 636066e8e7
11 changed files with 57 additions and 59 deletions

View File

@ -108,6 +108,10 @@ public class ManualChangeOverwrite {
type = recategorization.getType();
}
if (manualChange instanceof ManualRedactionEntry redactionEntry) {
legalBasis = redactionEntry.getLegalBasis();
}
descriptions.add(MANUAL_CHANGE_DESCRIPTIONS.get(manualChange.getClass()));
}
changed = false;

View File

@ -1,9 +1,6 @@
package com.iqser.red.service.redaction.v1.server.model.drools;
import java.util.Arrays;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.NonNull;
@ -35,18 +32,6 @@ public record RuleIdentifier(@NonNull RuleType type, Integer unit, Integer id) {
}
public static Set<RuleIdentifier> fromListOfIdentifiersString(String input) {
return Arrays.stream(input.split(",")).map(String::trim).map(RuleIdentifier::fromString).collect(Collectors.toSet());
}
private static Integer parseIntOrStar(String value) {
return !value.equals("*") ? Integer.parseInt(value) : null;
}
/**
* This is used to filter rules, if the field Integer unit or Integer id is null, the field will match any other RuleIdentifier.
* Therefore, to compare RuleIdentifiers one should always use the function matches.
@ -66,6 +51,10 @@ public record RuleIdentifier(@NonNull RuleType type, Integer unit, Integer id) {
@Override
public String toString() {
if (type.name().isBlank() || type.name().isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
sb.append(type().name());
if (Objects.nonNull(unit())) {

View File

@ -120,9 +120,15 @@ public class AnalyzeService {
RedactionLog redactionLog = createRedactionLog(analyzeRequest, document, notFoundManualRedactionEntries, dictionary, kieWrapperEntityRules);
EntityLog entityLog = createEntityLog(analyzeRequest, document, notFoundManualRedactionEntries, dictionary, kieWrapperEntityRules);
return finalizeAnalysis(analyzeRequest, startTime, kieWrapperComponentRules, entityLog,
return finalizeAnalysis(analyzeRequest,
startTime,
kieWrapperComponentRules,
entityLog,
redactionLog,
document.getNumberOfPages(), dictionary.getVersion(), false, new HashSet<>(allFileAttributes));
document.getNumberOfPages(),
dictionary.getVersion(),
false,
new HashSet<>(allFileAttributes));
}
@ -191,9 +197,7 @@ public class AnalyzeService {
if (sectionsToReAnalyse.isEmpty()) {
return finalizeAnalysis(analyzeRequest,
startTime,
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
previousEntityLog,
previousRedactionLog,
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT), previousEntityLog, previousRedactionLog,
document.getNumberOfPages(),
dictionaryIncrement.getDictionaryVersion(),
true,
@ -226,9 +230,7 @@ public class AnalyzeService {
EntityLog entityLog = updatePreviousEntityLog(analyzeRequest, document, notFoundManualRedactionEntries, previousEntityLog, sectionsToReanalyseIds);
return finalizeAnalysis(analyzeRequest,
startTime,
kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT),
entityLog,
startTime, kieContainerCreationService.getLatestKieContainer(analyzeRequest.getDossierTemplateId(), RuleFileType.COMPONENT), entityLog,
redactionLog,
document.getNumberOfPages(),
dictionaryIncrement.getDictionaryVersion(),
@ -283,7 +285,10 @@ public class AnalyzeService {
}
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest, long startTime, KieWrapper kieWrapperComponentRules, EntityLog entityLog,
private AnalyzeResult finalizeAnalysis(AnalyzeRequest analyzeRequest,
long startTime,
KieWrapper kieWrapperComponentRules,
EntityLog entityLog,
RedactionLog redactionLog,
int numberOfPages,
DictionaryVersion dictionaryVersion,
@ -292,8 +297,8 @@ public class AnalyzeService {
EntityLogChanges entityLogChanges = finalizeEntityLog(analyzeRequest, entityLog, redactionLog, dictionaryVersion);
if (entityLogChanges.isHasChanges()) {
computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, addedFileAttributes, entityLogChanges);
if (entityLogChanges.isHasChanges() || !isReanalysis) {
computeComponentsWhenRulesArePresent(analyzeRequest, kieWrapperComponentRules, addedFileAttributes, entityLogChanges, dictionaryVersion);
}
log.info("Stored analysis logs for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
@ -310,7 +315,8 @@ public class AnalyzeService {
.hasUpdates(entityLogChanges.isHasChanges())
.analysisVersion(redactionServiceSettings.getAnalysisVersion())
.analysisNumber(analyzeRequest.getAnalysisNumber())
.rulesVersion(entityLog.getRulesVersion()).componentRulesVersion(kieWrapperComponentRules.rulesVersion())
.rulesVersion(entityLog.getRulesVersion())
.componentRulesVersion(kieWrapperComponentRules.rulesVersion())
.componentRulesVersion(kieWrapperComponentRules.rulesVersion())
.dictionaryVersion(entityLog.getDictionaryVersion())
.legalBasisVersion(entityLog.getLegalBasisVersion())
@ -325,7 +331,8 @@ public class AnalyzeService {
private void computeComponentsWhenRulesArePresent(AnalyzeRequest analyzeRequest,
KieWrapper kieWrapperComponentRules,
Set<FileAttribute> addedFileAttributes,
EntityLogChanges entityLogChanges) {
EntityLogChanges entityLogChanges,
DictionaryVersion dictionaryVersion) {
if (!kieWrapperComponentRules.isPresent()) {
return;
@ -336,34 +343,37 @@ public class AnalyzeService {
addedFileAttributes.stream().toList());
log.info("Finished component rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
ComponentLog componentLog = componentLogCreatorService.buildComponentLog(analyzeRequest.getAnalysisNumber(), components);
ComponentLog componentLog = componentLogCreatorService.buildComponentLog(analyzeRequest.getAnalysisNumber(),
components,
dictionaryVersion,
kieWrapperComponentRules.rulesVersion());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.COMPONENT_LOG, componentLog);
}
private EntityLogChanges finalizeEntityLog(AnalyzeRequest analyzeRequest, EntityLog entityLog, RedactionLog redactionLog, DictionaryVersion dictionaryVersion) {
private EntityLogChanges finalizeEntityLog(AnalyzeRequest analyzeRequest,
EntityLog entityLog,
RedactionLog redactionLog,
DictionaryVersion dictionaryVersion) {
EntityLog previousEntityLog = redactionStorageService.getEntityLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(analyzeRequest.getDossierId(), analyzeRequest.getFileId());
// TODO: remove redactionLog related stuff
redactionLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
redactionLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
excludeExcludedPages(redactionLog, analyzeRequest.getExcludedPages());
RedactionLogChanges redactionLogChange = redactionChangeLogService.computeChanges(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
redactionLog,
analyzeRequest.getAnalysisNumber());
RedactionLogChanges redactionLogChange = redactionChangeLogService.computeChanges(previousRedactionLog, redactionLog, analyzeRequest.getAnalysisNumber());
entityLog.setDictionaryVersion(dictionaryVersion.getDossierTemplateVersion());
entityLog.setDossierDictionaryVersion(dictionaryVersion.getDossierVersion());
excludeExcludedPages(entityLog, analyzeRequest.getExcludedPages());
EntityLogChanges entityLogChanges = entityChangeLogService.computeChanges(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
entityLog,
analyzeRequest.getAnalysisNumber());
EntityLogChanges entityLogChanges = entityChangeLogService.computeChanges(previousEntityLog, entityLog, analyzeRequest.getAnalysisNumber());
log.info("Created entity log for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
redactionStorageService.storeObject(analyzeRequest.getDossierId(), analyzeRequest.getFileId(), FileType.REDACTION_LOG, redactionLogChange.getRedactionLog());

View File

@ -12,12 +12,13 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Position;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.service.document.EntityComparators;
@Service
public class ComponentLogCreatorService {
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components) {
public ComponentLog buildComponentLog(int analysisNumber, List<Component> components, DictionaryVersion dictionaryVersion, long rulesVersion) {
List<ComponentLogCategory> componentLogCategories = components.stream()
.collect(Collectors.groupingBy(Component::getCategory, Collectors.mapping(this::buildComponentLogEntry, Collectors.toList())))
@ -25,7 +26,7 @@ public class ComponentLogCreatorService {
.stream()
.map(entry -> new ComponentLogCategory(entry.getKey(), entry.getValue()))
.toList();
return new ComponentLog(analysisNumber, componentLogCategories, -1, -1, -1);
return new ComponentLog(analysisNumber, componentLogCategories, dictionaryVersion.getDossierVersion(), dictionaryVersion.getDossierTemplateVersion(), rulesVersion);
}
@ -47,8 +48,7 @@ public class ComponentLogCreatorService {
private ComponentEntityReference toComponentEntityReference(Entity entity) {
return ComponentEntityReference.builder()
.id(entity.getId()).value(entity.getValue())
return ComponentEntityReference.builder().id(entity.getId()).value(entity.getValue())
.page(entity.getPositions().stream().findFirst().map(Position::getPageNumber).orElse(0))
.reason(entity.getReason())
.ruleIdentifier(entity.getMatchedRule())

View File

@ -31,12 +31,10 @@ public class EntityChangeLogService {
@Timed("redactmanager_computeChanges")
public EntityLogChanges computeChanges(String dossierId, String fileId, EntityLog currentEntityLog, int analysisNumber) {
public EntityLogChanges computeChanges(EntityLog previousEntityLog, EntityLog currentEntityLog, int analysisNumber) {
long start = System.currentTimeMillis();
EntityLog previousEntityLog = redactionStorageService.getEntityLog(dossierId, fileId);
if (previousEntityLog == null) {
currentEntityLog.getEntityLogEntry().forEach(entry -> {
entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, OffsetDateTime.now()));

View File

@ -31,12 +31,10 @@ public class RedactionChangeLogService {
@Timed("redactmanager_computeChanges")
public RedactionLogChanges computeChanges(String dossierId, String fileId, RedactionLog currentRedactionLog, int analysisNumber) {
public RedactionLogChanges computeChanges(RedactionLog previousRedactionLog, RedactionLog currentRedactionLog, int analysisNumber) {
long start = System.currentTimeMillis();
RedactionLog previousRedactionLog = redactionStorageService.getRedactionLog(dossierId, fileId);
if (previousRedactionLog == null) {
currentRedactionLog.getRedactionLogEntry().forEach(entry -> {
entry.getChanges().add(new Change(analysisNumber, ChangeType.ADDED, OffsetDateTime.now()));

View File

@ -286,9 +286,8 @@ public class RedactionLogCreatorService {
private Rectangle toRedactionLogRectangle(Rectangle2D rectangle2D, int pageNumber) {
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) (rectangle2D.getMinY() + rectangle2D.getHeight())),
(float) rectangle2D.getWidth(),
-(float) rectangle2D.getHeight(),
return new Rectangle(new Point((float) rectangle2D.getMinX(), (float) rectangle2D.getMinY()),
(float) rectangle2D.getWidth(), (float) rectangle2D.getHeight(),
pageNumber);
}

View File

@ -79,7 +79,9 @@ public class ComponentCreationService {
public void createComponentsForUnMappedEntities(String ruleIdentifier, Collection<Entity> entities) {
entities.forEach(entity -> create(ruleIdentifier, entity.getType(), entity.getValue(), "Unmapped Entity", List.of(entity)));
entities.stream()
.filter(entity -> !referencedEntities.contains(entity))
.forEach(entity -> create(ruleIdentifier, entity.getType(), entity.getValue(), "Unmapped Entity", List.of(entity)));
}
@ -91,8 +93,7 @@ public class ComponentCreationService {
.matchedRule(RuleIdentifier.fromString(ruleIdentifier))
.category(category)
.value(value)
.transformation(transformation)
.references(new LinkedList<>(references)));
.transformation(transformation).references(new LinkedList<>(references)).build());
}

View File

@ -118,7 +118,7 @@ public class ManualEntityCreationService {
private Optional<TextEntity> findClosestEntityAndReturnEmptyIfNotFound(ManualEntity identifier, Map<String, List<TextEntity>> entitiesWithSameValue) {
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ROOT));
List<TextEntity> possibleEntities = entitiesWithSameValue.get(identifier.getValue().toLowerCase(Locale.ENGLISH));
if (entityIdentifierValueNotFound(possibleEntities)) {
log.warn("Entity could not be created with identifier: {}, due to the value {} not being found anywhere.", identifier, identifier.getValue());

View File

@ -1,6 +1,5 @@
package com.iqser.red.service.redaction.v1.server;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.when;
import java.io.FileOutputStream;
@ -30,7 +29,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateRequest;
import com.iqser.red.service.redaction.v1.server.annotate.AnnotateResponse;
import com.iqser.red.service.redaction.v1.server.redaction.utils.OsUtils;
import com.iqser.red.service.redaction.v1.server.utils.ExceptionProvider;
import com.iqser.red.storage.commons.StorageAutoConfiguration;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
@ -100,7 +98,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Test
@Disabled
// @Disabled
public void testTopOfPage13InNotHeader() throws IOException {
// Fix In BodyTextFrameService destroys header detection in files/new/SYNGENTA_EFSA_sanitisation_GFL_v1_moreSections.pdf
@ -114,6 +112,7 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
AnalyzeResult result = analyzeService.analyze(request);
System.out.println("Finished analysis");
var redactionLog = redactionStorageService.getRedactionLog(TEST_DOSSIER_ID, TEST_FILE_ID);
var componentLog = redactionStorageService.getComponentLog(TEST_DOSSIER_ID, TEST_FILE_ID);
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder().dossierId(TEST_DOSSIER_ID).fileId(TEST_FILE_ID).build());

View File

@ -228,7 +228,7 @@ rule "DefaultComponents.999.0: Create components for all unmapped entities."
rule "X.0.0: merge duplicate component references"
when
$first: Component()
$duplicate: Component(this != $first, category == category, value == value)
$duplicate: Component(this != $first, category == $first.category, value == $first.value)
then
$first.getReferences().addAll($duplicate.getReferences());
retract($duplicate);