RED-10200: stuff, comments, and more stuff, and even more comments

This commit is contained in:
maverickstuder 2024-11-26 12:44:31 +01:00 committed by Kilian Schuettler
parent db59ae014b
commit 00d83f58fd
22 changed files with 525 additions and 65 deletions

View File

@ -51,6 +51,12 @@ public interface IEntity {
String type();
/**
* Marks this entity and all its intersecting nodes as updated
*/
void update();
/**
* An Entity is valid, when it active and not a false recommendation, a false positive or a dictionary removal.
*
@ -339,7 +345,12 @@ public interface IEntity {
*/
default void addMatchedRule(MatchedRule matchedRule) {
boolean valid = valid();
getMatchedRuleList().add(matchedRule);
if (valid() == valid) {
return;
}
update();
}
@ -353,7 +364,12 @@ public interface IEntity {
if (getMatchedRuleList().equals(matchedRules)) {
return;
}
boolean valid = valid();
getMatchedRuleList().addAll(matchedRules);
if (valid() == valid) {
return;
}
update();
}

View File

@ -0,0 +1,27 @@
package com.iqser.red.service.redaction.v1.server.model.document.entity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
public interface IKieSessionUpdater {
/**
* Inserts a TextEntity into the KieSession and updates intersecting nodes.
*
* @param textEntity the TextEntity to insert
*/
void insert(TextEntity textEntity);
/**
* Updates a TextEntity in the KieSession and updates intersecting nodes.
*
* @param textEntity the TextEntity to update
*/
void update(TextEntity textEntity);
/**
* Updates an Image in the KieSession and recursively updates its parent nodes.
*
* @param image the Image to update
*/
void update(Image image);
}

View File

@ -8,11 +8,13 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.utils.IdBuilder;
@ -22,6 +24,7 @@ import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
/**
@ -68,6 +71,8 @@ public class TextEntity implements IEntity {
List<SemanticNode> intersectingNodes = new LinkedList<>();
SemanticNode deepestFullyContainingNode;
List<Relation> relations = new LinkedList<>();
public static TextEntity initialEntityNode(TextRange textRange, String type, EntityType entityType, SemanticNode node) {
@ -311,4 +316,25 @@ public class TextEntity implements IEntity {
.orElse(getMatchedRule().isWriteValueWithLineBreaks() ? getValueWithLineBreaks() : value);
}
public void update() {
getKieSessionUpdater().ifPresent(updater -> updater.update(this));
}
private @NonNull Optional<IKieSessionUpdater> getKieSessionUpdater() {
if (intersectingNodes.isEmpty()) {
return Optional.empty();
}
if (intersectingNodes.get(0) instanceof Document document) {
if (document.getKieSessionUpdater() == null) {
return Optional.empty();
}
return Optional.of(document.getKieSessionUpdater());
}
return Optional.empty();
}
}

View File

@ -11,6 +11,7 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IKieSessionUpdater;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.AccessLevel;
@ -39,6 +40,7 @@ public class Document extends AbstractSemanticNode {
@Builder.Default
static final SectionIdentifier sectionIdentifier = SectionIdentifier.document();
IKieSessionUpdater kieSessionUpdater;
@Override
public NodeType getType() {

View File

@ -5,12 +5,14 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.NodeVisitor;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IKieSessionUpdater;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -21,6 +23,7 @@ import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.experimental.FieldDefaults;
import lombok.experimental.SuperBuilder;
@ -108,6 +111,13 @@ public class Image extends AbstractSemanticNode implements IEntity {
}
@Override
public void update() {
getKieSessionUpdater().ifPresent(updater -> updater.update(this));
}
@Override
public String toString() {
@ -176,4 +186,18 @@ public class Image extends AbstractSemanticNode implements IEntity {
return true;
}
private @NonNull Optional<IKieSessionUpdater> getKieSessionUpdater() {
if (getDocumentTree() == null) {
return Optional.empty();
}
if (getDocumentTree().getRoot().getNode() instanceof Document document) {
if (document.getKieSessionUpdater() == null) {
return Optional.empty();
}
return Optional.of(document.getKieSessionUpdater());
}
return Optional.empty();
}
}

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.191.0"
val layoutParserVersion = "0.193.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"

View File

@ -40,7 +40,7 @@ public class RedactionServiceSettings {
private boolean annotationMode;
private boolean droolsDebug;
private boolean droolsDebug = true;
private boolean protobufJsonFallback = true;

View File

@ -182,6 +182,13 @@ public class PrecursorEntity implements IEntity {
}
@Override
public void update() {
// not in KieSession, do nothing
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/

View File

@ -205,6 +205,7 @@ public class AnalyzeService {
dictionarySearchService.addDictionaryEntities(analysisData.dictionary(), analysisData.document());
log.info("Finished Dictionary Search for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long start = System.currentTimeMillis();
// we could add the imported redactions similar to the manual redactions here as well for additional processing
List<FileAttribute> allFileAttributes = entityDroolsExecutionService.executeRules(analysisData.kieWrapperEntityRules().container(),
analysisData.document(),
@ -214,6 +215,8 @@ public class AnalyzeService {
analysisData.nerEntities(),
context);
log.info("Finished entity rule execution for file {} in dossier {}", analyzeRequest.getFileId(), analyzeRequest.getDossierId());
long end = System.currentTimeMillis();
System.out.println("Rule exec duration: " + (end - start));
EntityLogChanges entityLogChanges = entityLogCreatorService.createInitialEntityLog(analyzeRequest,
analysisData.document(),

View File

@ -7,7 +7,6 @@ import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.SearchImplementation;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;

View File

@ -21,7 +21,6 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair;
import org.kie.api.runtime.KieSession;
import com.google.common.base.Functions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog.entitylog.Engine;
@ -31,8 +30,10 @@ import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveBound
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IKieSessionUpdater;
import com.iqser.red.service.redaction.v1.server.model.document.entity.ManualChangeOverwrite;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
@ -50,22 +51,12 @@ import lombok.extern.slf4j.Slf4j;
public class EntityCreationService {
private final EntityEnrichmentService entityEnrichmentService;
private final KieSession kieSession;
private final Set<SemanticNode> nodesInKieSession; // empty set means all nodes are in kieSession
public EntityCreationService(EntityEnrichmentService entityEnrichmentService) {
this.entityEnrichmentService = entityEnrichmentService;
this.kieSession = null;
this.nodesInKieSession = Collections.emptySet();
}
public EntityCreationService(EntityEnrichmentService entityEnrichmentService, KieSession kieSession) {
this.entityEnrichmentService = entityEnrichmentService;
this.kieSession = kieSession;
this.nodesInKieSession = Collections.emptySet();
}
@ -1017,7 +1008,7 @@ public class EntityCreationService {
return Optional.empty();
}
entity.addEngines(engines);
insertToKieSession(entity);
insertToKieSession(entity, node);
return Optional.of(entity);
}
@ -1092,7 +1083,7 @@ public class EntityCreationService {
entityEnrichmentService.enrichEntity(mergedEntity, node.getTextBlock());
addEntityToGraph(mergedEntity, node);
insertToKieSession(mergedEntity);
insertToKieSession(mergedEntity, node);
entitiesToMerge.stream()
.filter(e -> !e.equals(mergedEntity))
@ -1159,10 +1150,14 @@ public class EntityCreationService {
*
* @param textEntity The merged text entity to insert.
*/
public void insertToKieSession(TextEntity textEntity) {
public void insertToKieSession(TextEntity textEntity, SemanticNode node) {
if (kieSession != null) {
kieSession.insert(textEntity);
if (node.getDocumentTree().getRoot().getNode() instanceof Document document) {
IKieSessionUpdater updater = document.getKieSessionUpdater();
if (updater == null) {
return;
}
updater.insert(textEntity);
}
}
@ -1510,18 +1505,19 @@ public class EntityCreationService {
documentTree.getRoot().getNode().addThisToEntityIfIntersects(entity);
if (!nodesInKieSession.isEmpty() && entity.getIntersectingNodes()
.stream()
.anyMatch(node -> !nodesInKieSession.contains(node))) {
entity.removeFromGraph();
return false;
}
// if (!nodesInKieSession.isEmpty() && entity.getIntersectingNodes()
// .stream()
// .anyMatch(node -> !nodesInKieSession.contains(node))) {
// entity.removeFromGraph();
// return false;
// }
TextBlock textBlock = entity.getDeepestFullyContainingNode().getTextBlock();
entityEnrichmentService.enrichEntity(entity, textBlock);
addToPages(entity);
addEntityToNodeEntitySets(entity);
// compute relationships, by looping through deepestFullyContainingNode's entities
return true;
}

View File

@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.service.drools;
import static com.iqser.red.service.redaction.v1.server.service.drools.ComponentDroolsExecutionService.RULES_LOGGER_GLOBAL;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@ -95,10 +94,12 @@ public class EntityDroolsExecutionService {
KieSession kieSession = kieContainer.newKieSession();
Set<SemanticNode> nodesInKieSession = sectionsToAnalyze.size() == document.streamAllSubNodes()
.count() ? Collections.emptySet() : buildSet(sectionsToAnalyze, document);
Set<SemanticNode> nodesInKieSession = new HashSet<>();
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession, nodesInKieSession);
KieSessionUpdater kieSessionUpdater = new KieSessionUpdater(nodesInKieSession, kieSession);
document.setKieSessionUpdater(kieSessionUpdater);
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, nodesInKieSession);
RulesLogger logger = new RulesLogger(webSocketService, context);
if (settings.isDroolsDebug()) {
logger.enableAgendaTracking();
@ -116,23 +117,26 @@ public class EntityDroolsExecutionService {
kieSession.setGlobal(RULES_LOGGER_GLOBAL, logger);
}
kieSession.insert(document);
nodesInKieSession.add(document);
nodesInKieSession.addAll(sectionsToAnalyze);
nodesInKieSession.addAll(sectionsToAnalyze.stream()
.flatMap(SemanticNode::streamAllSubNodes)
.toList());
nodesInKieSession.forEach(kieSession::insert);
System.out.println("after document insert : " + kieSession.getFactCount());
document.getEntities()
.forEach(kieSession::insert);
sectionsToAnalyze.forEach(kieSession::insert);
sectionsToAnalyze.stream()
.flatMap(SemanticNode::streamAllSubNodes)
.forEach(kieSession::insert);
System.out.println("after getEntities insert : " + kieSession.getFactCount());
document.getPages()
.forEach(kieSession::insert);
System.out.println("after getPages insert : " + kieSession.getFactCount());
fileAttributes.stream()
.filter(f -> f.getValue() != null)
.forEach(kieSession::insert);
System.out.println("after fileAttributes insert : " + kieSession.getFactCount());
if (manualRedactions != null) {
manualRedactions.buildAll()
@ -140,8 +144,10 @@ public class EntityDroolsExecutionService {
.filter(BaseAnnotation::isLocal)
.forEach(kieSession::insert);
}
System.out.println("after manualRedactions insert : " + kieSession.getFactCount());
kieSession.insert(nerEntities);
System.out.println("after nerEntities insert : " + kieSession.getFactCount());
kieSession.getAgenda().getAgendaGroup("LOCAL_DICTIONARY_ADDS").setFocus();
@ -152,11 +158,30 @@ public class EntityDroolsExecutionService {
try {
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
//
// nodesInKieSession = sectionsToAnalyze.size() == document.streamAllSubNodes()
// .count() ? Collections.emptySet() : buildSet(sectionsToAnalyze, document);
//
// kieSessionUpdater = new KieSessionUpdater(nodesInKieSession, kieSession);
//
// document.setKieSessionUpdater(kieSessionUpdater);
//
// kieSession.insert(document);
//
// kieSession.setGlobal("entityCreationService", entityCreationService);
//
// sectionsToAnalyze.stream()
// .flatMap(SemanticNode::streamAllSubNodes)
// .forEach(kieSession::insert);
// System.out.println("after SemanticNode insert : " + kieSession.getFactCount());
//
// completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
} catch (ExecutionException e) {
logger.error(e, "Exception during rule execution");
kieSession.dispose();
if (e.getCause() instanceof TimeoutException) {
throw new DroolsTimeoutException(String.format("The file %s caused a timeout",context.getFileId()), e, false, RuleFileType.ENTITY);
throw new DroolsTimeoutException(String.format("The file %s caused a timeout", context.getFileId()), e, false, RuleFileType.ENTITY);
}
throw new RuntimeException(e);
} catch (InterruptedException e) {
@ -164,7 +189,7 @@ public class EntityDroolsExecutionService {
kieSession.dispose();
throw new RuntimeException(e);
} catch (TimeoutException e) {
throw new DroolsTimeoutException(String.format("The file %s caused a timeout",context.getFileId()), e, false, RuleFileType.ENTITY);
throw new DroolsTimeoutException(String.format("The file %s caused a timeout", context.getFileId()), e, false, RuleFileType.ENTITY);
}
List<FileAttribute> resultingFileAttributes = getFileAttributes(kieSession);

View File

@ -0,0 +1,58 @@
package com.iqser.red.service.redaction.v1.server.service.drools;
import java.util.Set;
import org.kie.api.runtime.KieSession;
import org.kie.api.runtime.rule.FactHandle;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IKieSessionUpdater;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.experimental.FieldDefaults;
@RequiredArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class KieSessionUpdater implements IKieSessionUpdater {
Set<SemanticNode> nodesInKieSession;
KieSession kieSession;
public void insert(TextEntity textEntity) {
kieSession.insert(textEntity);
updateIntersectingNodes(textEntity);
}
public void update(TextEntity textEntity) {
kieSession.update(kieSession.getFactHandle(textEntity), textEntity);
updateIntersectingNodes(textEntity);
}
public void update(Image image) {
kieSession.update(kieSession.getFactHandle(image), image);
SemanticNode parent = image;
while (parent.hasParent()) {
parent = parent.getParent();
kieSession.update(kieSession.getFactHandle(parent), parent);
}
}
private void updateIntersectingNodes(TextEntity textEntity) {
textEntity.getIntersectingNodes()
.stream()
.filter(nodesInKieSession::contains)
.forEach(o -> kieSession.update(kieSession.getFactHandle(o), o));
}
}

View File

@ -14,4 +14,5 @@
<appender-ref ref="${logType}"/>
</root>
<logger name="org.drools.mvel" level="ERROR"/>
<logger name="org.springframework.web.socket.config" level="WARN"/>
</configuration>

View File

@ -32,8 +32,8 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate;
@ -61,23 +61,30 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemp
import com.iqser.red.service.redaction.v1.server.client.DictionaryClient;
import com.iqser.red.service.redaction.v1.server.client.LegalBasisClient;
import com.iqser.red.service.redaction.v1.server.client.RulesClient;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryFactory;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryIncrement;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryVersion;
import com.iqser.red.service.redaction.v1.server.queue.RedactionMessageReceiver;
import com.iqser.red.service.redaction.v1.server.service.AnalyzeService;
import com.iqser.red.service.redaction.v1.server.service.DictionaryService;
import com.iqser.red.service.redaction.v1.server.service.websocket.RedisSyncedWebSocketService;
import com.iqser.red.service.redaction.v1.server.storage.RedactionStorageService;
import com.iqser.red.service.redaction.v1.server.testcontainers.MongoDBTestContainer;
import com.iqser.red.service.redaction.v1.server.utils.LayoutParsingRequestProvider;
import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.keycloakcommons.security.TenantAuthenticationManagerResolver;
import com.knecon.fforesight.mongo.database.commons.liquibase.TenantMongoLiquibaseExecutor;
import com.knecon.fforesight.mongo.database.commons.service.MongoConnectionProvider;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
import com.knecon.fforesight.tenantcommons.TenantContext;
import com.knecon.fforesight.tenantcommons.TenantProvider;
import com.knecon.fforesight.tenantcommons.model.MongoDBConnection;
import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@ -86,7 +93,7 @@ import lombok.extern.slf4j.Slf4j;
@ExtendWith(SpringExtension.class)
@SpringBootTest(classes = Application.class, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
@Import(AbstractRedactionIntegrationTest.TestConfiguration.class)
@Disabled
//@Disabled
/*
* This test is meant to be used directly with a download from blob storage (e.g. minio). You need to define the dossier template you want to use by supplying an absolute path.
* The dossier template will then be parsed for dictionaries, colors, entities, and rules. This is defined for the all tests once.
@ -114,7 +121,7 @@ import lombok.extern.slf4j.Slf4j;
FileType.DOCUMENT_POSITION,
FileType.DOCUMENT_STRUCTURE,
FileType.DOCUMENT_TEXT);
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/New Folder/DOSSIER_TEMPLATE"); // Add your dossier-template here
Path dossierTemplateToUse = Path.of("/home/kschuettler/iqser/business-logic/redactmanager/prod-cp-eu-reg/EFSA_sanitisation_pre_GFL_v1"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
TestDossierTemplate testDossierTemplate;
@ -151,13 +158,26 @@ import lombok.extern.slf4j.Slf4j;
private TenantProvider tenantProvider;
@Autowired
private DictionaryFactory dictionaryFactory;
@Autowired
private LayoutParsingPipeline layoutParsingPipeline;
@MockBean
private RedactionMessageReceiver redactionMessageReceiver;
@BeforeAll
public static void init() {
synchronized (PDFNet.class) {
PDFNet.initialize("demo:1650351709282:7bd235e003000000004ec28a6743e1163a085e2115de2536ab6e2cfe5a");
}
}
@Test
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "/home/kschuettler/Downloads/New Folder/436e4a2a-0ba3-4d3c-9944-c355f5c1cca2"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
String folder = "/home/kschuettler/Dokumente/analysisend2end/file0"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
@ -171,8 +191,18 @@ import lombok.extern.slf4j.Slf4j;
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
for (int i = 0; i < analyzeRequests.size(); i++) {
long start = System.currentTimeMillis();
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
Path nerEntitiesFile = absoluteFolderPath.resolve(analyzeRequest.getFileId() + ".NER_ENTITIES.json");
if (!Files.exists(nerEntitiesFile)) {
storageService.storeJSONObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(analyzeRequest.getDossierId(),
analyzeRequest.getFileId(),
FileType.NER_ENTITIES),
new NerEntities());
}
long start = System.currentTimeMillis();
log.info("----------------------------------------------------------------------------------");
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
analyzeService.analyze(analyzeRequest);
@ -303,7 +333,7 @@ import lombok.extern.slf4j.Slf4j;
AnalyzeRequest request = new AnalyzeRequest();
request.setDossierId(UUID.randomUUID().toString());
request.setFileId(UUID.randomUUID().toString());
request.setFileId(fileName);
request.setDossierTemplateId(testDossierTemplate.id);
request.setAnalysisNumber(-1);
@ -320,18 +350,72 @@ import lombok.extern.slf4j.Slf4j;
Set<FileType> missingFileTypes = Sets.difference(REQUIRED_FILES, uploadedFileTypes);
if (!missingFileTypes.isEmpty()) {
log.error("Folder {} is missing files of type {}",
folder.toFile(),
missingFileTypes.stream()
.map(Enum::toString)
.collect(Collectors.joining(", ")));
return Optional.empty();
if (!missingFileTypes.isEmpty() && !missingFileTypes.contains(FileType.ORIGIN)) {
runLayoutParsingAndSaveFilesToFolder(folder, uploadedFileTypes, request);
}
// if (!missingFileTypes.isEmpty()) {
// log.error("Folder {} is missing files of type {}",
// folder.toFile(),
// missingFileTypes.stream()
// .map(Enum::toString)
// .collect(Collectors.joining(", ")));
// return Optional.empty();
// }
return Optional.of(request);
}
private void runLayoutParsingAndSaveFilesToFolder(Path folder, Set<FileType> uploadedFileTypes, AnalyzeRequest request) throws IOException {
uploadImageAndTableFilesIfMissing(uploadedFileTypes, request);
LayoutParsingRequest layoutParsingRequest = LayoutParsingRequestProvider.build(LayoutParsingType.DOCUMINE_OLD, request);
layoutParsingPipeline.parseLayoutAndSaveFilesToStorage(layoutParsingRequest);
try {
storeFileFromStorage(TENANT_ID, layoutParsingRequest.structureFileStorageId(), folder);
storeFileFromStorage(TENANT_ID, layoutParsingRequest.textBlockFileStorageId(), folder);
storeFileFromStorage(TENANT_ID, layoutParsingRequest.positionBlockFileStorageId(), folder);
storeFileFromStorage(TENANT_ID, layoutParsingRequest.pageFileStorageId(), folder);
} catch (IOException e) {
log.error("Failed to store files from storage to folder {}", folder, e);
}
}
private void uploadImageAndTableFilesIfMissing(Set<FileType> uploadedFileTypes, AnalyzeRequest request) throws IOException {
if (!uploadedFileTypes.contains(FileType.TABLES)) {
var cvServiceResponse = "files/cv_service_empty_response.json";
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponse);
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.TABLES),
cvServiceResponseFileResource.getInputStream());
}
if (!uploadedFileTypes.contains(FileType.IMAGE_INFO)) {
var imageServiceResponse = "files/empty_image_response.json";
ClassPathResource imageServiceResponseFileResource = new ClassPathResource(imageServiceResponse);
storageService.storeObject(TenantContext.getTenantId(),
RedactionStorageService.StorageIdUtils.getStorageId(request.getDossierId(), request.getFileId(), FileType.IMAGE_INFO),
imageServiceResponseFileResource.getInputStream());
}
}
private void storeFileFromStorage(String tenantId, String storageId, Path folder) throws IOException {
var inputStream = storageService.getObject(tenantId, storageId);
try (FileOutputStream fileOut = new FileOutputStream(folder.toString() + "/" + storageId.split("/")[1])) {
fileOut.write(inputStream.getContentAsByteArray());
} catch (IOException e) {
e.printStackTrace();
}
log.info("Stored file {} to {}", storageId, folder);
}
private static Stream<FileToUpload> findFilesToUpload(String fileName, Path folder, Set<FileType> endingsToUpload) throws IOException {
return Files.walk(folder)

View File

@ -46,7 +46,7 @@ public class DocumentIEntityInsertionIntegrationTest extends BuildDocumentIntegr
public void createEntityCreationService() {
MockitoAnnotations.initMocks(this);
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
entityCreationService = new EntityCreationService(entityEnrichmentService);
}

View File

@ -82,7 +82,7 @@ public class RulesIntegrationTest extends BuildDocumentIntegrationTest {
Dictionary dict = Mockito.mock(Dictionary.class);
kieSession = kieContainer.newKieSession();
entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession);
entityCreationService = new EntityCreationService(entityEnrichmentService);
kieSession.setGlobal("manualChangesApplicationService", manualChangesApplicationService);
kieSession.setGlobal("entityCreationService", entityCreationService);
kieSession.setGlobal("dictionary", dict);

View File

@ -1660,4 +1660,104 @@ Zyma SA
Zyma SA, Nyon, Switzerland
Mambo-Tox Ltd. Biomedical Sciences Building Bassett Crescent East Southampton SO16 7PX UK
Syngenta Environmental Sciences Jealotts Hill International Research Centre Bracknell, Berkshire RG42 6EY UK
Test Ignored Hint CBI_ADDRESS
Test Ignored Hint CBI_ADDRESS
the
be
to
of
and
a
in
that
have
I
it
for
not
on
with
he
as
you
do
at
this
but
his
by
from
they
we
say
her
she
or
an
will
my
one
all
would
there
their
what
so
up
out
if
about
who
get
which
go
me
when
make
can
like
time
no
just
him
know
take
people
into
year
your
good
some
could
them
see
other
than
then
now
look
only
come
its
over
think
also
back
after
use
two
how
our
work
first
well
way
even
new
want
because
any
these
give
day
most
us

View File

@ -0,0 +1,100 @@
the
be
to
of
and
a
in
that
have
I
it
for
not
on
with
he
as
you
do
at
this
but
his
by
from
they
we
say
her
she
or
an
will
my
one
all
would
there
their
what
so
up
out
if
about
who
get
which
go
me
when
make
can
like
time
no
just
him
know
take
people
into
year
your
good
some
could
them
see
other
than
then
now
look
only
come
its
over
think
also
back
after
use
two
how
our
work
first
well
way
even
new
want
because
any
these
give
day
most
us

View File

@ -964,7 +964,6 @@ rule "ETC.5.0: Skip dossier_redaction entries if confidentiality is 'confidentia
$dossierRedaction: TextEntity(type() == "dossier_redaction")
then
$dossierRedaction.skip("ETC.5.0", "Ignore dossier_redaction when confidential");
$dossierRedaction.getIntersectingNodes().forEach(node -> update(node));
end
rule "ETC.5.1: Remove dossier_redaction entries if confidentiality is not 'confidential'"
@ -1137,7 +1136,6 @@ rule "MAN.0.0: Apply manual resize redaction"
manualChangesApplicationService.resize($entityToBeResized, $resizeRedaction);
retract($resizeRedaction);
update($entityToBeResized);
$entityToBeResized.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.0.1: Apply manual resize redaction"
@ -1164,7 +1162,6 @@ rule "MAN.1.0: Apply id removals that are valid and not in forced redactions to
$entityToBeRemoved.getManualOverwrite().addChange($idRemoval);
update($entityToBeRemoved);
retract($idRemoval);
$entityToBeRemoved.getIntersectingNodes().forEach(node -> update(node));
end
rule "MAN.1.1: Apply id removals that are valid and not in forced redactions to Image"
@ -1189,7 +1186,6 @@ rule "MAN.2.0: Apply force redaction"
then
$entityToForce.getManualOverwrite().addChange($force);
update($entityToForce);
$entityToForce.getIntersectingNodes().forEach(node -> update(node));
retract($force);
end
@ -1214,7 +1210,6 @@ rule "MAN.3.0: Apply entity recategorization"
not ManualRecategorization($id == annotationId, requestDate.isBefore($requestDate))
$entityToBeRecategorized: TextEntity(matchesAnnotationId($id), type() != $type)
then
$entityToBeRecategorized.getIntersectingNodes().forEach(node -> update(node));
$entityToBeRecategorized.getManualOverwrite().addChange($recategorization);
update($entityToBeRecategorized);
retract($recategorization);
@ -1297,7 +1292,6 @@ rule "X.0.1: Remove Entity contained by Entity of same type with manual changes"
$larger: TextEntity($type: type(), $entityType: entityType, !removed(), hasManualChanges())
$contained: TextEntity(containedBy($larger), type() == $type, entityType == $entityType, this != $larger, !hasManualChanges())
then
$contained.getIntersectingNodes().forEach(node -> update(node));
$contained.remove("X.0.1", "remove Entity contained by Entity of same type with manual changes");
retract($contained);
end
@ -1320,7 +1314,6 @@ rule "X.2.1: Remove Entity of type HINT when contained by FALSE_POSITIVE"
$falsePositive: TextEntity($type: type(), entityType == EntityType.FALSE_POSITIVE, active())
$entity: TextEntity(containedBy($falsePositive), type() == $type, (entityType == EntityType.HINT), !hasManualChanges())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("X.2.1", "remove Entity of type ENTITY when contained by FALSE_POSITIVE");
retract($entity)
end
@ -1380,7 +1373,6 @@ rule "X.6.0: Remove Entity of lower rank, when contained by entity of type ENTIT
$higherRank: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active())
$lowerRank: TextEntity(containedBy($higherRank), type() != $type, dictionary.getDictionaryRank(type) < dictionary.getDictionaryRank($type), !hasManualChanges())
then
$lowerRank.getIntersectingNodes().forEach(node -> update(node));
$lowerRank.remove("X.6.0", "remove Entity of lower rank, when contained by entity of type ENTITY or HINT");
retract($lowerRank);
end
@ -1391,7 +1383,6 @@ rule "X.6.1: remove Entity, when contained in another entity of type ENTITY or H
$outer: TextEntity($type: type(), (entityType == EntityType.ENTITY || entityType == EntityType.HINT), active())
$inner: TextEntity(containedBy($outer), type() != $type, $outer.getTextRange().length > getTextRange().length(), !hasManualChanges())
then
$inner.getIntersectingNodes().forEach(node -> update(node));
$inner.remove("X.6.1", "remove Entity, when contained in another entity of type ENTITY or HINT with larger text range");
retract($inner);
end
@ -1473,7 +1464,6 @@ rule "DICT.0.0: Remove Template Dictionary Entity when contained by Dossier Dict
$dictionaryRemoval: TextEntity($type: type(), entityType == EntityType.DICTIONARY_REMOVAL, engines contains Engine.DOSSIER_DICTIONARY)
$entity: TextEntity(getTextRange().equals($dictionaryRemoval.getTextRange()), engines contains Engine.DICTIONARY, type() == $type, (entityType == EntityType.ENTITY || entityType == EntityType.HINT), !hasManualChanges())
then
$entity.getIntersectingNodes().forEach(node -> update(node));
$entity.remove("DICT.0.0", "Remove Template Dictionary Entity when contained by Dossier Dictionary DICTIONARY_REMOVAL");
$entity.addEngine(Engine.DOSSIER_DICTIONARY);
end

@ -1 +1 @@
Subproject commit 57e6e0dd3c08a3a65ec59b5dfb70f0f77ebcc7c7
Subproject commit 5705cc0782605fdca5dfff134b436f7143c9e421

View File

@ -16,5 +16,7 @@
<logger name="org.apache.fontbox.ttf" level="ERROR"/>
<logger name="org.drools.mvel" level="ERROR"/>
<logger name="org.springframework.web.socket.config" level="WARN"/>
<logger name="org.mongodb.driver.client" level="ERROR"/>
</configuration>