RED-9728: remove False Positives from table methods

This commit is contained in:
Kilian Schüttler 2024-09-06 11:07:48 +02:00
parent 03e321a824
commit bcbd4587f1
28 changed files with 407 additions and 96 deletions

View File

@ -12,7 +12,7 @@ plugins {
description = "redaction-service-server-v1"
val layoutParserVersion = "0.161.0"
val layoutParserVersion = "0.174.0"
val jacksonVersion = "2.15.2"
val droolsVersion = "9.44.0.Final"
val pdfBoxVersion = "3.0.0"

View File

@ -38,6 +38,8 @@ public class RedactionServiceSettings {
private boolean annotationMode;
private boolean droolsDebug;
public int getDroolsExecutionTimeoutSecs(int numberOfPages) {

View File

@ -0,0 +1,70 @@
package com.iqser.red.service.redaction.v1.server.logger;
import org.kie.api.definition.rule.Rule;
import org.kie.api.event.rule.DefaultRuleRuntimeEventListener;
import org.kie.api.event.rule.ObjectDeletedEvent;
import org.kie.api.event.rule.ObjectInsertedEvent;
import org.kie.api.event.rule.ObjectUpdatedEvent;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class ObjectTrackingEventListener extends DefaultRuleRuntimeEventListener {
RulesLogger logger;
@Override
public void objectInserted(ObjectInsertedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectInsertedEvent:{} has been inserted", event.getObject());
return;
}
logger.logObjectTracking("ObjectInsertedEvent:{}: {} has been inserted", formatRuleName(event.getRule()), event.getObject());
}
@Override
public void objectDeleted(ObjectDeletedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectDeletedEvent: {} has been deleted", event.getOldObject());
return;
}
logger.logObjectTracking("ObjectDeletedEvent: {}: {} has been deleted", formatRuleName(event.getRule()), event.getOldObject());
}
@Override
public void objectUpdated(ObjectUpdatedEvent event) {
if (!logger.isObjectTrackingActive()) {
return;
}
if (event.getRule() == null) {
logger.logObjectTracking("ObjectUpdatedEvent:{} has been updated", event.getObject());
return;
}
logger.logObjectTracking("ObjectUpdatedEvent:{}: {} has been updated", formatRuleName(event.getRule()), event.getObject());
}
public static String formatRuleName(Rule rule) {
String name = rule.getName();
if (name.length() > 20) {
return name.substring(0, 20) + "...";
}
return name;
}
}

View File

@ -1,31 +1,40 @@
package com.iqser.red.service.redaction.v1.server.logger;
import java.time.OffsetDateTime;
import java.util.regex.Pattern;
import org.slf4j.helpers.MessageFormatter;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
/**
* This class provides logging functionality specifically for rules execution
* in a Drools context. It is designed to log messages with different log levels
* (INFO, WARN, ERROR) and formats messages using a placeholder-based approach
* similar to popular logging frameworks like SLF4J. <p>
*
* <p>
* Log messages can include placeholders (i.e., `{}`), which will be replaced by
* the corresponding arguments when the message is formatted. <p>
*
* <p>
* Example usage:
* <pre>
* logger.info("Message with placeholder {}", object);
* </pre>
*/
@Slf4j
@RequiredArgsConstructor
public class RulesLogger {
private final WebSocketService webSocketService;
private final Context context;
@Getter
private boolean objectTrackingActive;
@Getter
private boolean agendaTrackingActive;
/**
* Logs a message at the INFO level.
@ -51,6 +60,75 @@ public class RulesLogger {
}
/**
* Logs a message at the INFO level, if object tracking has been activated.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void logObjectTracking(String message, Object... args) {
if (objectTrackingActive) {
info(message, args);
}
}
/**
* If object tracking is enabled, the RulesLogger will log all inserted/retracted/updated events.
* Initial value is disabled.
*/
public void enableObjectTracking() {
objectTrackingActive = true;
}
/**
* If object tracking is disabled, the RulesLogger won't log any inserted/retracted/updated events.
* Initial value is disabled.
*/
public void disableObjectTracking() {
objectTrackingActive = false;
}
/**
* Logs a message at the INFO level, if agenda tracking has been activated.
*
* @param message The log message containing optional placeholders (i.e., `{}`).
* @param args The arguments to replace the placeholders in the message.
*/
public void logAgendaTracking(String message, Object... args) {
if (agendaTrackingActive) {
info(message, args);
}
}
/**
* If agenda tracking is enabled, the RulesLogger will log each firing Rule with its name, objects and metadata.
* Initial value is disabled.
*/
public void enableAgendaTracking() {
agendaTrackingActive = true;
}
/**
* If agenda tracking is disabled, the RulesLogger won't log any rule firings.
* Initial value is disabled.
*/
public void disableAgendaTracking() {
agendaTrackingActive = false;
}
/**
* Logs a message at the ERROR level, including an exception.
*
@ -67,6 +145,11 @@ public class RulesLogger {
private void log(LogLevel logLevel, String message, Object... args) {
var formattedMessage = formatMessage(message, args);
switch (logLevel) {
case INFO -> log.info(message, args);
case WARN -> log.warn(message, args);
case ERROR -> log.error(message, args);
}
var ruleLog = RuleLogEvent.builder()
.tenantId(context.getTenantId())
.ruleVersion(context.getRuleVersion())
@ -85,22 +168,7 @@ public class RulesLogger {
private String formatMessage(String message, Object... args) {
if (args == null || args.length == 0) {
return message;
}
var pattern = Pattern.compile("\\{}");
var matcher = pattern.matcher(message);
var sb = new StringBuilder();
int i = 0;
while (matcher.find() && i < args.length) {
matcher.appendReplacement(sb, args[i] != null ? args[i].toString() : "null");
i++;
}
matcher.appendTail(sb);
return sb.toString();
return MessageFormatter.arrayFormat(message, args).getMessage();
}
}

View File

@ -0,0 +1,65 @@
package com.iqser.red.service.redaction.v1.server.logger;
import java.util.Map;
import org.kie.api.definition.rule.Rule;
import org.kie.api.event.rule.AfterMatchFiredEvent;
import org.kie.api.event.rule.DefaultAgendaEventListener;
import org.kie.api.event.rule.MatchCreatedEvent;
import lombok.AllArgsConstructor;
@AllArgsConstructor
public class TrackingAgendaEventListener extends DefaultAgendaEventListener {
private RulesLogger logger;
@Override
public void matchCreated(MatchCreatedEvent event) {
if (logger.isAgendaTrackingActive()) {
logger.logAgendaTracking(event.toString());
}
}
@Override
public void afterMatchFired(AfterMatchFiredEvent event) {
if (!logger.isAgendaTrackingActive()) {
return;
}
Rule rule = event.getMatch().getRule();
String ruleName = formatRuleName(rule);
Map<String, Object> ruleMetaDataMap = rule.getMetaData();
StringBuilder sb = new StringBuilder("AfterMatchFiredEvent: " + ruleName);
if (event.getMatch().getObjects() != null && !event.getMatch().getObjects().isEmpty()) {
sb.append(", ").append(event.getMatch().getObjects().size()).append(" objects: ");
for (Object object : event.getMatch().getObjects()) {
sb.append(object).append(", ");
}
sb.delete(sb.length() - 2, sb.length());
}
if (!ruleMetaDataMap.isEmpty()) {
sb.append("\n With [").append(ruleMetaDataMap.size()).append("] meta-data:");
for (String key : ruleMetaDataMap.keySet()) {
sb.append("\n key=").append(key).append(", value=").append(ruleMetaDataMap.get(key));
}
}
logger.logAgendaTracking(sb.toString());
}
public static String formatRuleName(Rule rule) {
return ObjectTrackingEventListener.formatRuleName(rule);
}
}

View File

@ -180,6 +180,22 @@ public class PrecursorEntity implements IEntity {
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
@Override
public boolean valid() {
return active() && validEntityType();
}
private static EntityType getEntityType(EntryType entryType) {
switch (entryType) {

View File

@ -11,6 +11,7 @@ import java.util.stream.Stream;
import org.ahocorasick.trie.Trie;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import lombok.Data;
@ -104,6 +105,12 @@ public class SearchImplementation {
}
public Stream<TextRange> getBoundaries(TextBlock textBlock) {
return getBoundaries(textBlock, textBlock.getTextRange());
}
public Stream<TextRange> getBoundaries(CharSequence text, TextRange region) {
if (this.values.isEmpty()) {

View File

@ -52,6 +52,17 @@ public interface IEntity {
String type();
/**
* An Entity is valid, when it active and not a false recommendation, a false positive or a dictionary removal.
*
* @return true, if the entity is valid, false otherwise/
*/
default boolean valid() {
return active();
}
/**
* Calculates the length of the entity's value.
*

View File

@ -289,6 +289,21 @@ public class TextEntity implements IEntity {
}
/**
* @return true when this entity is of EntityType ENTITY or HINT
*/
public boolean validEntityType() {
return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT);
}
public boolean valid() {
return active() && validEntityType();
}
@Override
public String value() {

View File

@ -115,7 +115,7 @@ public class Page {
@Override
public String toString() {
return String.valueOf(number);
return "Page: " + number;
}
}

View File

@ -17,6 +17,7 @@ import java.util.stream.Stream;
import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
@ -77,6 +78,20 @@ public interface SemanticNode {
Set<TextEntity> getEntities();
/**
* A view of the Entity Set of this SemanticNode including only the active (APPLIED or SKIPPED) Entities which are of a valid type (ENTITY or HINT).
* This is used for all functions, which check for the existence of an Entity, such as hasEntityOfType().
*
* @return Set of valid TextEntities
*/
default Stream<TextEntity> streamValidEntities() {
return getEntities().stream()
.filter(IEntity::active)
.filter(TextEntity::validEntityType);
}
/**
* Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock.
*
@ -277,9 +292,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfType(String type) {
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> redactionEntity.type().equals(type));
return streamValidEntities().anyMatch(redactionEntity -> redactionEntity.type().equals(type));
}
@ -292,10 +305,8 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAnyType(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
.anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
return streamValidEntities().anyMatch(redactionEntity -> Arrays.stream(types)
.anyMatch(type -> redactionEntity.type().equals(type)));
}
@ -308,9 +319,7 @@ public interface SemanticNode {
*/
default boolean hasEntitiesOfAllTypes(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
.map(TextEntity::type)
return streamValidEntities().map(TextEntity::type)
.collect(Collectors.toUnmodifiableSet())
.containsAll(Arrays.stream(types)
.toList());
@ -319,31 +328,28 @@ public interface SemanticNode {
/**
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
* Ignores Entity with ignored == true or removed == true.
* Ignores Entity which are not active or of a removal type ignored == true or removed == true.
*
* @param type string representing the type of entities to return
* @return List of RedactionEntities of any the type
*/
default List<TextEntity> getEntitiesOfType(String type) {
return getEntities().stream()
.filter(TextEntity::active)
.filter(redactionEntity -> redactionEntity.type().equals(type))
return streamValidEntities().filter(redactionEntity -> redactionEntity.type().equals(type))
.toList();
}
/**
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
* Ignores Entity with ignored == true or removed == true.
* Ignores Entity that are not valid.
*
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities of any provided type
*/
default List<TextEntity> getEntitiesOfType(List<String> types) {
return getEntities().stream()
.filter(TextEntity::active)
return streamValidEntities()//
.filter(redactionEntity -> redactionEntity.isAnyType(types))
.toList();
}
@ -351,15 +357,14 @@ public interface SemanticNode {
/**
* Returns a List of Entities in this SemanticNode which have any of the provided types.
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
* Ignores Entity that are not valid.
*
* @param types A list of strings representing the types of entities to return
* @return List of RedactionEntities that match any of the provided types
*/
default List<TextEntity> getEntitiesOfType(String... types) {
return getEntities().stream()
.filter(TextEntity::active)
return streamValidEntities()//
.filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types)
.toList()))
.toList();
@ -463,7 +468,7 @@ public interface SemanticNode {
*/
default boolean containsStringIgnoreCase(String string) {
return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT));
return getTextBlock().getSearchTextLowerCase().contains(string.toLowerCase(Locale.ROOT));
}
@ -774,13 +779,12 @@ public interface SemanticNode {
/**
* TODO: this produces unwanted results for sections spanning multiple columns.
* Computes the Union of the bounding boxes of all children recursively.
*
* @return The union of the BoundingBoxes of all children
*/
private Map<Page, Rectangle2D> getBBoxFromChildren() {
//TODO: this produces unwanted results for sections spanning multiple columns.
Map<Page, Rectangle2D> bBoxPerPage = new HashMap<>();
List<Map<Page, Rectangle2D>> childrenBBoxes = streamChildren().map(SemanticNode::getBBox)
.toList();

View File

@ -74,8 +74,7 @@ public class Table implements SemanticNode {
return IntStream.range(0, numberOfRows).boxed()
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
.flatMap(this::streamRow)
.map(TableCell::getEntities)
.flatMap(Collection::stream);
.flatMap(TableCell::streamValidEntities);
}
@ -135,11 +134,11 @@ public class Table implements SemanticNode {
/**
* Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
* Streams all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
* Ignores Entity with ignored == true or removed == true.
*
* @param types type strings to check whether a row contains an entity like them
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
* @return Stream of all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types.
*/
public Stream<TextEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
@ -192,30 +191,26 @@ public class Table implements SemanticNode {
/**
* Streams all Entities in the given row.
* Streams all valid Entities in the given row.
*
* @param rowNumber the row number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInRow(int rowNumber) {
return streamRow(rowNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
return streamRow(rowNumber).flatMap(TableCell::streamValidEntities);
}
/**
* Streams all Entities in the given col.
* Streams all valid Entities in the given col.
*
* @param colNumber the column number to look for
* @return stream of TextEntities occurring in row
*/
public Stream<TextEntity> streamTextEntitiesInCol(int colNumber) {
return streamCol(colNumber).map(TableCell::getEntities)
.flatMap(Collection::stream)
.filter(TextEntity::active);
return streamCol(colNumber).flatMap(TableCell::streamValidEntities);
}
@ -269,6 +264,7 @@ public class Table implements SemanticNode {
return streamHeaders().filter(tableCellNode -> tableCellNode.getTextBlock().getSearchText().contains(header))
.map(TableCell::getCol)
.distinct()
.flatMap(this::streamCol)
.filter(tableCellNode -> !tableCellNode.isHeader());
}

View File

@ -2,20 +2,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes;
import java.awt.geom.Rectangle2D;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.FieldDefaults;
@ -42,7 +35,6 @@ public class TableCell extends AbstractSemanticNode {
TextBlock textBlock;
@Override
public Map<Page, Rectangle2D> getBBox() {

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.lang.ref.SoftReference;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
@ -43,9 +44,11 @@ public class AtomicTextBlock implements TextBlock {
//string coordinates
TextRange textRange;
String searchText;
List<String> words;
List<Integer> lineBreaks;
SoftReference<String> searchTextLowerCaseCache;
SoftReference<List<String>> wordsCache;
//position coordinates
List<Integer> stringIdxToPositionIdx;
@Getter
@ -121,8 +124,31 @@ public class AtomicTextBlock implements TextBlock {
}
@Override
public String getSearchTextLowerCase() {
String text = null;
if (searchTextLowerCaseCache != null) {
text = searchTextLowerCaseCache.get();
}
if (text == null) {
text = getSearchText().toLowerCase(Locale.ENGLISH);
searchTextLowerCaseCache = new SoftReference<>(text);
}
return text;
}
public List<String> getWords() {
List<String> words = null;
if (wordsCache != null) {
words = wordsCache.get();
}
if (words == null) {
words = new ArrayList<>();
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
@ -131,6 +157,7 @@ public class AtomicTextBlock implements TextBlock {
for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
words.add(searchText.substring(start, end));
}
wordsCache = new SoftReference<>(words);
}
return words;
}

View File

@ -3,11 +3,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock;
import static java.lang.String.format;
import java.awt.geom.Rectangle2D;
import java.lang.ref.SoftReference;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Stream;
@ -25,6 +27,7 @@ public class ConcatenatedTextBlock implements TextBlock {
List<AtomicTextBlock> atomicTextBlocks;
String searchText;
TextRange textRange;
SoftReference<String> searchTextLowerCaseCache;
public static ConcatenatedTextBlock empty() {
@ -100,6 +103,23 @@ public class ConcatenatedTextBlock implements TextBlock {
}
@Override
public String getSearchTextLowerCase() {
String text = null;
if (searchTextLowerCaseCache != null) {
text = searchTextLowerCaseCache.get();
}
if (text == null) {
text = getSearchText().toLowerCase(Locale.ENGLISH);
searchTextLowerCaseCache = new SoftReference<>(text);
}
return text;
}
@Override
public List<String> getWords() {

View File

@ -19,6 +19,9 @@ public interface TextBlock extends CharSequence {
String getSearchText();
String getSearchTextLowerCase();
List<String> getWords();

View File

@ -58,7 +58,7 @@ public class DictionarySearchService {
Set<Engine> engines = isDossierDictionaryEntry ? Set.of(Engine.DOSSIER_DICTIONARY) : Set.of(Engine.DICTIONARY);
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService);
searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
searchImplementation.getBoundaries(node.getTextBlock())
.filter(boundary -> entityCreationService.isValidEntityTextRange(node.getTextBlock(), boundary))
.forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, engines)
.ifPresent(entity -> {

View File

@ -358,12 +358,6 @@ public class EntityLogCreatorService {
}
private boolean isHint(EntityType entityType) {
return entityType.equals(EntityType.HINT);
}
public static EntryState buildEntryState(IEntity entity) {
if (entity.applied() && entity.active()) {

View File

@ -7,8 +7,6 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentData;
import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree;
@ -111,7 +109,6 @@ public class DocumentGraphMapper {
}
private Headline buildHeadline(Context context) {
return Headline.builder().documentTree(context.documentTree).build();

View File

@ -475,7 +475,7 @@ public class EntityCreationService {
*/
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
return searchImplementation.getBoundaries(node.getTextBlock())
.filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary))
.map(bounds -> byTextRange(bounds, type, entityType, node))
.filter(Optional::isPresent)
@ -496,7 +496,7 @@ public class EntityCreationService {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, false);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
return searchImplementation.getBoundaries(textBlock)
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))
@ -518,7 +518,7 @@ public class EntityCreationService {
TextBlock textBlock = node.getTextBlock();
SearchImplementation searchImplementation = new SearchImplementation(strings, true);
return searchImplementation.getBoundaries(textBlock, node.getTextRange())
return searchImplementation.getBoundaries(textBlock)
.map(boundary -> toLineAfterTextRange(textBlock, boundary))
.filter(boundary -> isValidEntityTextRange(textBlock, boundary))
.map(boundary -> byTextRange(boundary, type, entityType, node))

View File

@ -188,7 +188,7 @@ public class EntityFindingUtility {
List<TextBlock> textBlocks = node.getTextBlocksByPageNumbers(pageNumbers);
return textBlocks.stream()
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange()))
.flatMap(searchImplementation::getBoundaries)
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, node, Collections.emptySet()))
.filter(Optional::isPresent)
.map(Optional::get)
@ -214,7 +214,7 @@ public class EntityFindingUtility {
List<TextBlock> textBlocks = document.getTextBlocksByPageNumbers(pageNumbers);
return textBlocks.stream()
.flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange())
.flatMap(tb -> searchImplementation.getBoundaries(tb)
.filter(textRange -> entityCreationService.isValidEntityTextRange(tb, textRange)))
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
.filter(Optional::isPresent)
@ -228,7 +228,7 @@ public class EntityFindingUtility {
SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive);
return searchImplementation.getBoundaries(document.getTextBlock(), document.getTextRange())
return searchImplementation.getBoundaries(document.getTextBlock())
.filter(textRange -> entityCreationService.isValidEntityTextRange(document.getTextBlock(), textRange))
.map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet()))
.filter(Optional::isPresent)

View File

@ -23,15 +23,17 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog
import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.logger.ObjectTrackingEventListener;
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
import com.iqser.red.service.redaction.v1.server.model.component.Component;
import com.iqser.red.service.redaction.v1.server.model.component.Entity;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache;
import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator;
import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -64,6 +66,12 @@ public class ComponentDroolsExecutionService {
ComponentCreationService componentCreationService = new ComponentCreationService(kieSession);
ComponentMappingService componentMappingService = new ComponentMappingService(componentMappingMemoryCache, componentMappings);
RulesLogger logger = new RulesLogger(webSocketService, context);
if (settings.isDroolsDebug()) {
logger.enableAgendaTracking();
logger.enableObjectTracking();
}
kieSession.addEventListener(new TrackingAgendaEventListener(logger));
kieSession.addEventListener(new ObjectTrackingEventListener(logger));
kieSession.setGlobal("componentCreationService", componentCreationService);
try {
@ -72,7 +80,6 @@ public class ComponentDroolsExecutionService {
log.warn("Logger is not present");
}
if (hasComponentMappingServiceGlobal(kieSession)) {
kieSession.setGlobal(COMPONENT_MAPPING_SERVICE_GLOBAL, componentMappingService);
}
@ -85,7 +92,10 @@ public class ComponentDroolsExecutionService {
entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset()));
if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) {
entry.getDuplicatedTextRanges()
.forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd())));
.forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry,
document,
duplicatedTextRange.getStart(),
duplicatedTextRange.getEnd())));
}
return entities.stream();
})

View File

@ -22,15 +22,17 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation;
import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.logger.ObjectTrackingEventListener;
import com.iqser.red.service.redaction.v1.server.logger.RulesLogger;
import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService;
import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService;
import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService;
import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException;
import io.micrometer.core.annotation.Timed;
@ -96,6 +98,12 @@ public class EntityDroolsExecutionService {
EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession, nodesInKieSession);
RulesLogger logger = new RulesLogger(webSocketService, context);
if (settings.isDroolsDebug()) {
logger.enableAgendaTracking();
logger.enableObjectTracking();
}
kieSession.addEventListener(new TrackingAgendaEventListener(logger));
kieSession.addEventListener(new ObjectTrackingEventListener(logger));
kieSession.setGlobal("document", document);
kieSession.setGlobal("entityCreationService", entityCreationService);
@ -144,14 +152,14 @@ public class EntityDroolsExecutionService {
try {
completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS);
} catch (ExecutionException e) {
logger.error(e,"Exception during rule execution");
logger.error(e, "Exception during rule execution");
kieSession.dispose();
if (e.getCause() instanceof TimeoutException) {
throw new DroolsTimeoutException(e, false, RuleFileType.ENTITY);
}
throw new RuntimeException(e);
} catch (InterruptedException e) {
logger.error(e,"Exception during rule execution");
logger.error(e, "Exception during rule execution");
kieSession.dispose();
throw new RuntimeException(e);
} catch (TimeoutException e) {

View File

@ -18,7 +18,7 @@ public class WebSocketService {
public void sendLogEvent(RuleLogEvent ruleLogEvent) {
String destination = "/topic/" + ruleLogEvent.getTenantId() + "/rule-log-events";
log.info("Sending message to url {}", destination);
log.debug("Sending message to url {}", destination);
template.convertAndSend(destination, ruleLogEvent);
}

View File

@ -13,5 +13,5 @@
<root level="INFO">
<appender-ref ref="${logType}"/>
</root>
<logger name="org.drools.mvel" level="ERROR"/>
</configuration>

View File

@ -94,6 +94,8 @@ import lombok.extern.slf4j.Slf4j;
FileType.DOCUMENT_TEXT,
FileType.IMAGE_INFO,
FileType.NER_ENTITIES,
FileType.LLM_NER_ENTITIES,
FileType.AZURE_NER_ENTITIES,
FileType.TABLES,
FileType.IMPORTED_REDACTIONS);
@ -104,7 +106,7 @@ import lombok.extern.slf4j.Slf4j;
FileType.DOCUMENT_STRUCTURE,
FileType.DOCUMENT_TEXT);
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here
Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/New Folder/DOSSIER_TEMPLATE"); // Add your dossier-template here
ObjectMapper mapper = ObjectMapperFactory.create();
final String TENANT_ID = "tenant";
TestDossierTemplate testDossierTemplate;
@ -143,7 +145,7 @@ import lombok.extern.slf4j.Slf4j;
@SneakyThrows
public void runAnalysisEnd2End() {
String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
String folder = "/home/kschuettler/Downloads/New Folder/436e4a2a-0ba3-4d3c-9944-c355f5c1cca2"; // Should contain all files from minio directly, still zipped. Can contain multiple files.
Path absoluteFolderPath;
if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path
@ -157,10 +159,12 @@ import lombok.extern.slf4j.Slf4j;
List<AnalyzeRequest> analyzeRequests = prepareStorageForFolder(absoluteFolderPath);
log.info("Found {} distinct fileIds with all required files", analyzeRequests.size());
for (int i = 0; i < analyzeRequests.size(); i++) {
long start = System.currentTimeMillis();
AnalyzeRequest analyzeRequest = analyzeRequests.get(i);
log.info("----------------------------------------------------------------------------------");
log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId());
analyzeService.analyze(analyzeRequest);
log.info("{}/{}: Finished analysis for file {} in {} ms", i + 1, analyzeRequests.size(), analyzeRequest.getFileId(), System.currentTimeMillis() - start);
log.info("----------------------------------------------------------------------------------");
log.info("");
}

View File

@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.document.graph;
import static com.iqser.red.service.redaction.v1.server.utils.SeparatorUtils.boundaryIsSurroundedBySeparators;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
import java.awt.Color;
@ -34,7 +33,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu
import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType;
import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type;
import com.iqser.red.service.redaction.v1.server.logger.Context;
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
@ -176,7 +174,8 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
float durationMillis = ((float) (System.currentTimeMillis() - start));
System.out.printf("%d calls of buildTextBlock() on document took %f s, average is %f ms\n", n, durationMillis / 1000, durationMillis / n);
Section section = document.getAllSections().get(9);
Section section = document.getAllSections()
.get(9);
start = System.currentTimeMillis();
for (int i = 0; i < n; i++) {
@ -308,7 +307,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration
String type) {
TextBlock textBlock = document.getTextBlock();
searchImplementation.getBoundaries(textBlock, textBlock.getTextRange())
searchImplementation.getBoundaries(textBlock)
.filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary))
.map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType, document))
.forEach(foundEntities::add);

View File

@ -14,4 +14,7 @@
<appender-ref ref="${logType}"/>
</root>
<logger name="org.apache.fontbox.ttf" level="ERROR"/>
<logger name="org.drools.mvel" level="ERROR"/>
</configuration>