From bcbd4587f1fda7f4bb1551370131de3819dcf66a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kilian=20Sch=C3=BCttler?= Date: Fri, 6 Sep 2024 11:07:48 +0200 Subject: [PATCH] RED-9728: remove False Positives from table methods --- .../build.gradle.kts | 2 +- .../v1/server/RedactionServiceSettings.java | 2 + .../logger/ObjectTrackingEventListener.java | 70 ++++++++++++ .../v1/server/logger/RulesLogger.java | 106 ++++++++++++++---- .../logger/TrackingAgendaEventListener.java | 65 +++++++++++ .../v1/server/model/PrecursorEntity.java | 16 +++ .../dictionary/SearchImplementation.java | 7 ++ .../server/model/document/entity/IEntity.java | 11 ++ .../model/document/entity/TextEntity.java | 15 +++ .../v1/server/model/document/nodes/Page.java | 2 +- .../model/document/nodes/SemanticNode.java | 50 +++++---- .../v1/server/model/document/nodes/Table.java | 20 ++-- .../model/document/nodes/TableCell.java | 8 -- .../document/textblock/AtomicTextBlock.java | 29 ++++- .../textblock/ConcatenatedTextBlock.java | 22 +++- .../model/document/textblock/TextBlock.java | 3 + .../service/DictionarySearchService.java | 2 +- .../service/EntityLogCreatorService.java | 6 - .../service/document/DocumentGraphMapper.java | 3 - .../document/EntityCreationService.java | 6 +- .../document/EntityFindingUtility.java | 6 +- .../ComponentDroolsExecutionService.java | 16 ++- .../drools/EntityDroolsExecutionService.java | 14 ++- .../service/websocket/WebSocketService.java | 2 +- .../src/main/resources/logback-spring.xml | 2 +- .../v1/server/AnalysisEnd2EndTest.java | 8 +- .../DocumentPerformanceIntegrationTest.java | 7 +- .../src/test/resources/logback-spring.xml | 3 + 28 files changed, 407 insertions(+), 96 deletions(-) create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/ObjectTrackingEventListener.java create mode 100644 redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/TrackingAgendaEventListener.java diff --git a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts index 2f56ffb6..94d35138 100644 --- a/redaction-service-v1/redaction-service-server-v1/build.gradle.kts +++ b/redaction-service-v1/redaction-service-server-v1/build.gradle.kts @@ -12,7 +12,7 @@ plugins { description = "redaction-service-server-v1" -val layoutParserVersion = "0.161.0" +val layoutParserVersion = "0.174.0" val jacksonVersion = "2.15.2" val droolsVersion = "9.44.0.Final" val pdfBoxVersion = "3.0.0" diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java index 2b6e4b47..5229dcbf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/RedactionServiceSettings.java @@ -38,6 +38,8 @@ public class RedactionServiceSettings { private boolean annotationMode; + private boolean droolsDebug; + public int getDroolsExecutionTimeoutSecs(int numberOfPages) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/ObjectTrackingEventListener.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/ObjectTrackingEventListener.java new file mode 100644 index 00000000..d52b5e72 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/ObjectTrackingEventListener.java @@ -0,0 +1,70 @@ +package com.iqser.red.service.redaction.v1.server.logger; + +import org.kie.api.definition.rule.Rule; +import org.kie.api.event.rule.DefaultRuleRuntimeEventListener; +import org.kie.api.event.rule.ObjectDeletedEvent; +import org.kie.api.event.rule.ObjectInsertedEvent; +import org.kie.api.event.rule.ObjectUpdatedEvent; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class ObjectTrackingEventListener extends DefaultRuleRuntimeEventListener { + + RulesLogger logger; + + + @Override + public void objectInserted(ObjectInsertedEvent event) { + + if (!logger.isObjectTrackingActive()) { + return; + } + + if (event.getRule() == null) { + logger.logObjectTracking("ObjectInsertedEvent:{} has been inserted", event.getObject()); + return; + } + + logger.logObjectTracking("ObjectInsertedEvent:{}: {} has been inserted", formatRuleName(event.getRule()), event.getObject()); + } + + + @Override + public void objectDeleted(ObjectDeletedEvent event) { + + if (!logger.isObjectTrackingActive()) { + return; + } + if (event.getRule() == null) { + logger.logObjectTracking("ObjectDeletedEvent: {} has been deleted", event.getOldObject()); + return; + } + logger.logObjectTracking("ObjectDeletedEvent: {}: {} has been deleted", formatRuleName(event.getRule()), event.getOldObject()); + } + + + @Override + public void objectUpdated(ObjectUpdatedEvent event) { + + if (!logger.isObjectTrackingActive()) { + return; + } + if (event.getRule() == null) { + logger.logObjectTracking("ObjectUpdatedEvent:{} has been updated", event.getObject()); + return; + } + logger.logObjectTracking("ObjectUpdatedEvent:{}: {} has been updated", formatRuleName(event.getRule()), event.getObject()); + } + + + public static String formatRuleName(Rule rule) { + + String name = rule.getName(); + if (name.length() > 20) { + return name.substring(0, 20) + "..."; + } + return name; + } + +} diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/RulesLogger.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/RulesLogger.java index 37304f3e..1560d5c9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/RulesLogger.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/RulesLogger.java @@ -1,31 +1,40 @@ package com.iqser.red.service.redaction.v1.server.logger; import java.time.OffsetDateTime; -import java.util.regex.Pattern; + +import org.slf4j.helpers.MessageFormatter; import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService; +import lombok.Getter; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; /** * This class provides logging functionality specifically for rules execution * in a Drools context. It is designed to log messages with different log levels * (INFO, WARN, ERROR) and formats messages using a placeholder-based approach * similar to popular logging frameworks like SLF4J.

- * + *

* Log messages can include placeholders (i.e., `{}`), which will be replaced by * the corresponding arguments when the message is formatted.

- * + *

* Example usage: *

  *     logger.info("Message with placeholder {}", object);
  * 
*/ +@Slf4j @RequiredArgsConstructor public class RulesLogger { private final WebSocketService webSocketService; private final Context context; + @Getter + private boolean objectTrackingActive; + @Getter + private boolean agendaTrackingActive; + /** * Logs a message at the INFO level. @@ -51,6 +60,75 @@ public class RulesLogger { } + /** + * Logs a message at the INFO level, if object tracking has been activated. + * + * @param message The log message containing optional placeholders (i.e., `{}`). + * @param args The arguments to replace the placeholders in the message. + */ + public void logObjectTracking(String message, Object... args) { + + if (objectTrackingActive) { + info(message, args); + } + } + + + /** + * If object tracking is enabled, the RulesLogger will log all inserted/retracted/updated events. + * Initial value is disabled. + */ + public void enableObjectTracking() { + + objectTrackingActive = true; + } + + + /** + * If object tracking is disabled, the RulesLogger won't log any inserted/retracted/updated events. + * Initial value is disabled. + */ + public void disableObjectTracking() { + + objectTrackingActive = false; + } + + + /** + * Logs a message at the INFO level, if agenda tracking has been activated. + * + * @param message The log message containing optional placeholders (i.e., `{}`). + * @param args The arguments to replace the placeholders in the message. + */ + public void logAgendaTracking(String message, Object... args) { + + if (agendaTrackingActive) { + info(message, args); + } + + } + + + /** + * If agenda tracking is enabled, the RulesLogger will log each firing Rule with its name, objects and metadata. + * Initial value is disabled. + */ + public void enableAgendaTracking() { + + agendaTrackingActive = true; + } + + + /** + * If agenda tracking is disabled, the RulesLogger won't log any rule firings. + * Initial value is disabled. + */ + public void disableAgendaTracking() { + + agendaTrackingActive = false; + } + + /** * Logs a message at the ERROR level, including an exception. * @@ -67,6 +145,11 @@ public class RulesLogger { private void log(LogLevel logLevel, String message, Object... args) { var formattedMessage = formatMessage(message, args); + switch (logLevel) { + case INFO -> log.info(message, args); + case WARN -> log.warn(message, args); + case ERROR -> log.error(message, args); + } var ruleLog = RuleLogEvent.builder() .tenantId(context.getTenantId()) .ruleVersion(context.getRuleVersion()) @@ -85,22 +168,7 @@ public class RulesLogger { private String formatMessage(String message, Object... args) { - if (args == null || args.length == 0) { - return message; - } - - var pattern = Pattern.compile("\\{}"); - var matcher = pattern.matcher(message); - var sb = new StringBuilder(); - int i = 0; - - while (matcher.find() && i < args.length) { - matcher.appendReplacement(sb, args[i] != null ? args[i].toString() : "null"); - i++; - } - matcher.appendTail(sb); - - return sb.toString(); + return MessageFormatter.arrayFormat(message, args).getMessage(); } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/TrackingAgendaEventListener.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/TrackingAgendaEventListener.java new file mode 100644 index 00000000..497af362 --- /dev/null +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/logger/TrackingAgendaEventListener.java @@ -0,0 +1,65 @@ +package com.iqser.red.service.redaction.v1.server.logger; + +import java.util.Map; + +import org.kie.api.definition.rule.Rule; +import org.kie.api.event.rule.AfterMatchFiredEvent; +import org.kie.api.event.rule.DefaultAgendaEventListener; +import org.kie.api.event.rule.MatchCreatedEvent; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class TrackingAgendaEventListener extends DefaultAgendaEventListener { + + private RulesLogger logger; + + + @Override + public void matchCreated(MatchCreatedEvent event) { + + if (logger.isAgendaTrackingActive()) { + logger.logAgendaTracking(event.toString()); + } + } + + + @Override + public void afterMatchFired(AfterMatchFiredEvent event) { + + if (!logger.isAgendaTrackingActive()) { + return; + } + + Rule rule = event.getMatch().getRule(); + + String ruleName = formatRuleName(rule); + Map ruleMetaDataMap = rule.getMetaData(); + + StringBuilder sb = new StringBuilder("AfterMatchFiredEvent: " + ruleName); + + if (event.getMatch().getObjects() != null && !event.getMatch().getObjects().isEmpty()) { + sb.append(", ").append(event.getMatch().getObjects().size()).append(" objects: "); + for (Object object : event.getMatch().getObjects()) { + sb.append(object).append(", "); + } + sb.delete(sb.length() - 2, sb.length()); + } + + if (!ruleMetaDataMap.isEmpty()) { + sb.append("\n With [").append(ruleMetaDataMap.size()).append("] meta-data:"); + for (String key : ruleMetaDataMap.keySet()) { + sb.append("\n key=").append(key).append(", value=").append(ruleMetaDataMap.get(key)); + } + } + + logger.logAgendaTracking(sb.toString()); + } + + + public static String formatRuleName(Rule rule) { + + return ObjectTrackingEventListener.formatRuleName(rule); + } + +} \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java index a3ec0376..e942c582 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/PrecursorEntity.java @@ -180,6 +180,22 @@ public class PrecursorEntity implements IEntity { } + /** + * @return true when this entity is of EntityType ENTITY or HINT + */ + public boolean validEntityType() { + + return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); + } + + + @Override + public boolean valid() { + + return active() && validEntityType(); + } + + private static EntityType getEntityType(EntryType entryType) { switch (entryType) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java index 42862452..0fd97aed 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/SearchImplementation.java @@ -11,6 +11,7 @@ import java.util.stream.Stream; import org.ahocorasick.trie.Trie; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import lombok.Data; @@ -104,6 +105,12 @@ public class SearchImplementation { } + public Stream getBoundaries(TextBlock textBlock) { + + return getBoundaries(textBlock, textBlock.getTextRange()); + } + + public Stream getBoundaries(CharSequence text, TextRange region) { if (this.values.isEmpty()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java index ebf5e740..3cc3cb3c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java @@ -52,6 +52,17 @@ public interface IEntity { String type(); + /** + * An Entity is valid, when it active and not a false recommendation, a false positive or a dictionary removal. + * + * @return true, if the entity is valid, false otherwise/ + */ + default boolean valid() { + + return active(); + } + + /** * Calculates the length of the entity's value. * diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java index a80a00c8..f8334be6 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/TextEntity.java @@ -289,6 +289,21 @@ public class TextEntity implements IEntity { } + /** + * @return true when this entity is of EntityType ENTITY or HINT + */ + public boolean validEntityType() { + + return entityType.equals(EntityType.ENTITY) || entityType.equals(EntityType.HINT); + } + + + public boolean valid() { + + return active() && validEntityType(); + } + + @Override public String value() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java index 0fa334d0..ef0b8ddd 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java @@ -115,7 +115,7 @@ public class Page { @Override public String toString() { - return String.valueOf(number); + return "Page: " + number; } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index 884a39f3..5b6aa021 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -17,6 +17,7 @@ import java.util.stream.Stream; import com.iqser.red.service.redaction.v1.server.model.document.ConsecutiveTextBlockCollector; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; @@ -77,6 +78,20 @@ public interface SemanticNode { Set getEntities(); + /** + * A view of the Entity Set of this SemanticNode including only the active (APPLIED or SKIPPED) Entities which are of a valid type (ENTITY or HINT). + * This is used for all functions, which check for the existence of an Entity, such as hasEntityOfType(). + * + * @return Set of valid TextEntities + */ + default Stream streamValidEntities() { + + return getEntities().stream() + .filter(IEntity::active) + .filter(TextEntity::validEntityType); + } + + /** * Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's TextBlock. * @@ -277,9 +292,7 @@ public interface SemanticNode { */ default boolean hasEntitiesOfType(String type) { - return getEntities().stream() - .filter(TextEntity::active) - .anyMatch(redactionEntity -> redactionEntity.type().equals(type)); + return streamValidEntities().anyMatch(redactionEntity -> redactionEntity.type().equals(type)); } @@ -292,10 +305,8 @@ public interface SemanticNode { */ default boolean hasEntitiesOfAnyType(String... types) { - return getEntities().stream() - .filter(TextEntity::active) - .anyMatch(redactionEntity -> Arrays.stream(types) - .anyMatch(type -> redactionEntity.type().equals(type))); + return streamValidEntities().anyMatch(redactionEntity -> Arrays.stream(types) + .anyMatch(type -> redactionEntity.type().equals(type))); } @@ -308,9 +319,7 @@ public interface SemanticNode { */ default boolean hasEntitiesOfAllTypes(String... types) { - return getEntities().stream() - .filter(TextEntity::active) - .map(TextEntity::type) + return streamValidEntities().map(TextEntity::type) .collect(Collectors.toUnmodifiableSet()) .containsAll(Arrays.stream(types) .toList()); @@ -319,31 +328,28 @@ public interface SemanticNode { /** * Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author". - * Ignores Entity with ignored == true or removed == true. + * Ignores Entity which are not active or of a removal type ignored == true or removed == true. * * @param type string representing the type of entities to return * @return List of RedactionEntities of any the type */ default List getEntitiesOfType(String type) { - return getEntities().stream() - .filter(TextEntity::active) - .filter(redactionEntity -> redactionEntity.type().equals(type)) + return streamValidEntities().filter(redactionEntity -> redactionEntity.type().equals(type)) .toList(); } /** * Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author". - * Ignores Entity with ignored == true or removed == true. + * Ignores Entity that are not valid. * * @param types A list of strings representing the types of entities to return * @return List of RedactionEntities of any provided type */ default List getEntitiesOfType(List types) { - return getEntities().stream() - .filter(TextEntity::active) + return streamValidEntities()// .filter(redactionEntity -> redactionEntity.isAnyType(types)) .toList(); } @@ -351,15 +357,14 @@ public interface SemanticNode { /** * Returns a List of Entities in this SemanticNode which have any of the provided types. - * Ignores Entity with the ignored flag set to true or the removed flag set to true. + * Ignores Entity that are not valid. * * @param types A list of strings representing the types of entities to return * @return List of RedactionEntities that match any of the provided types */ default List getEntitiesOfType(String... types) { - return getEntities().stream() - .filter(TextEntity::active) + return streamValidEntities()// .filter(redactionEntity -> redactionEntity.isAnyType(Arrays.stream(types) .toList())) .toList(); @@ -463,7 +468,7 @@ public interface SemanticNode { */ default boolean containsStringIgnoreCase(String string) { - return getTextBlock().getSearchText().toLowerCase(Locale.ROOT).contains(string.toLowerCase(Locale.ROOT)); + return getTextBlock().getSearchTextLowerCase().contains(string.toLowerCase(Locale.ROOT)); } @@ -774,13 +779,12 @@ public interface SemanticNode { /** - * TODO: this produces unwanted results for sections spanning multiple columns. * Computes the Union of the bounding boxes of all children recursively. * * @return The union of the BoundingBoxes of all children */ private Map getBBoxFromChildren() { - + //TODO: this produces unwanted results for sections spanning multiple columns. Map bBoxPerPage = new HashMap<>(); List> childrenBBoxes = streamChildren().map(SemanticNode::getBBox) .toList(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index f648d8a9..55ff987a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -74,8 +74,7 @@ public class Table implements SemanticNode { return IntStream.range(0, numberOfRows).boxed() .filter(row -> rowContainsStringsIgnoreCase(row, strings)) .flatMap(this::streamRow) - .map(TableCell::getEntities) - .flatMap(Collection::stream); + .flatMap(TableCell::streamValidEntities); } @@ -135,11 +134,11 @@ public class Table implements SemanticNode { /** - * Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types. + * Streams all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types. * Ignores Entity with ignored == true or removed == true. * * @param types type strings to check whether a row contains an entity like them - * @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types. + * @return Stream of all entities in this table, that appear in a row, which contains at least one valid entity with any of the provided types. */ public Stream streamEntitiesWhereRowContainsEntitiesOfType(List types) { @@ -192,30 +191,26 @@ public class Table implements SemanticNode { /** - * Streams all Entities in the given row. + * Streams all valid Entities in the given row. * * @param rowNumber the row number to look for * @return stream of TextEntities occurring in row */ public Stream streamTextEntitiesInRow(int rowNumber) { - return streamRow(rowNumber).map(TableCell::getEntities) - .flatMap(Collection::stream) - .filter(TextEntity::active); + return streamRow(rowNumber).flatMap(TableCell::streamValidEntities); } /** - * Streams all Entities in the given col. + * Streams all valid Entities in the given col. * * @param colNumber the column number to look for * @return stream of TextEntities occurring in row */ public Stream streamTextEntitiesInCol(int colNumber) { - return streamCol(colNumber).map(TableCell::getEntities) - .flatMap(Collection::stream) - .filter(TextEntity::active); + return streamCol(colNumber).flatMap(TableCell::streamValidEntities); } @@ -269,6 +264,7 @@ public class Table implements SemanticNode { return streamHeaders().filter(tableCellNode -> tableCellNode.getTextBlock().getSearchText().contains(header)) .map(TableCell::getCol) + .distinct() .flatMap(this::streamCol) .filter(tableCellNode -> !tableCellNode.isHeader()); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java index 67fe0c84..64dce481 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java @@ -2,20 +2,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.nodes; import java.awt.geom.Rectangle2D; import java.util.HashMap; -import java.util.HashSet; -import java.util.List; import java.util.Map; -import java.util.Set; -import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity; -import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine; import lombok.AccessLevel; import lombok.AllArgsConstructor; -import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; @@ -42,7 +35,6 @@ public class TableCell extends AbstractSemanticNode { TextBlock textBlock; - @Override public Map getBBox() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java index 3b6a1102..de1db3d0 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/AtomicTextBlock.java @@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock; import static java.lang.String.format; import java.awt.geom.Rectangle2D; +import java.lang.ref.SoftReference; import java.text.BreakIterator; import java.util.ArrayList; import java.util.Arrays; @@ -43,9 +44,11 @@ public class AtomicTextBlock implements TextBlock { //string coordinates TextRange textRange; String searchText; - List words; List lineBreaks; + SoftReference searchTextLowerCaseCache; + SoftReference> wordsCache; + //position coordinates List stringIdxToPositionIdx; @Getter @@ -121,8 +124,31 @@ public class AtomicTextBlock implements TextBlock { } + @Override + public String getSearchTextLowerCase() { + + String text = null; + if (searchTextLowerCaseCache != null) { + text = searchTextLowerCaseCache.get(); + } + + if (text == null) { + text = getSearchText().toLowerCase(Locale.ENGLISH); + searchTextLowerCaseCache = new SoftReference<>(text); + } + + return text; + } + + public List getWords() { + List words = null; + + if (wordsCache != null) { + words = wordsCache.get(); + } + if (words == null) { words = new ArrayList<>(); BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH); @@ -131,6 +157,7 @@ public class AtomicTextBlock implements TextBlock { for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { words.add(searchText.substring(start, end)); } + wordsCache = new SoftReference<>(words); } return words; } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java index b69a2590..965bebc4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/ConcatenatedTextBlock.java @@ -3,11 +3,13 @@ package com.iqser.red.service.redaction.v1.server.model.document.textblock; import static java.lang.String.format; import java.awt.geom.Rectangle2D; +import java.lang.ref.SoftReference; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; -import java.util.Collection; +import java.util.Locale; import java.util.Map; import java.util.stream.Stream; @@ -25,6 +27,7 @@ public class ConcatenatedTextBlock implements TextBlock { List atomicTextBlocks; String searchText; TextRange textRange; + SoftReference searchTextLowerCaseCache; public static ConcatenatedTextBlock empty() { @@ -100,6 +103,23 @@ public class ConcatenatedTextBlock implements TextBlock { } + @Override + public String getSearchTextLowerCase() { + + String text = null; + if (searchTextLowerCaseCache != null) { + text = searchTextLowerCaseCache.get(); + } + + if (text == null) { + text = getSearchText().toLowerCase(Locale.ENGLISH); + searchTextLowerCaseCache = new SoftReference<>(text); + } + + return text; + } + + @Override public List getWords() { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java index b8a96db4..1bfc82de 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/textblock/TextBlock.java @@ -19,6 +19,9 @@ public interface TextBlock extends CharSequence { String getSearchText(); + String getSearchTextLowerCase(); + + List getWords(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java index 06d8237a..fe46e644 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/DictionarySearchService.java @@ -58,7 +58,7 @@ public class DictionarySearchService { Set engines = isDossierDictionaryEntry ? Set.of(Engine.DOSSIER_DICTIONARY) : Set.of(Engine.DICTIONARY); EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService); - searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) + searchImplementation.getBoundaries(node.getTextBlock()) .filter(boundary -> entityCreationService.isValidEntityTextRange(node.getTextBlock(), boundary)) .forEach(bounds -> entityCreationService.byTextRangeWithEngine(bounds, type, entityType, node, engines) .ifPresent(entity -> { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java index ee5ff437..0a2cf448 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/EntityLogCreatorService.java @@ -358,12 +358,6 @@ public class EntityLogCreatorService { } - private boolean isHint(EntityType entityType) { - - return entityType.equals(EntityType.HINT); - } - - public static EntryState buildEntryState(IEntity entity) { if (entity.applied() && entity.active()) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java index 98364c15..9319457a 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/DocumentGraphMapper.java @@ -7,8 +7,6 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; import com.iqser.red.service.redaction.v1.server.model.document.DocumentData; import com.iqser.red.service.redaction.v1.server.model.document.DocumentTree; @@ -111,7 +109,6 @@ public class DocumentGraphMapper { } - private Headline buildHeadline(Context context) { return Headline.builder().documentTree(context.documentTree).build(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index cf7933f2..a6c592b5 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -475,7 +475,7 @@ public class EntityCreationService { */ public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { - return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) + return searchImplementation.getBoundaries(node.getTextBlock()) .filter(boundary -> isValidEntityTextRange(node.getTextBlock(), boundary)) .map(bounds -> byTextRange(bounds, type, entityType, node)) .filter(Optional::isPresent) @@ -496,7 +496,7 @@ public class EntityCreationService { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, false); - return searchImplementation.getBoundaries(textBlock, node.getTextRange()) + return searchImplementation.getBoundaries(textBlock) .map(boundary -> toLineAfterTextRange(textBlock, boundary)) .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) @@ -518,7 +518,7 @@ public class EntityCreationService { TextBlock textBlock = node.getTextBlock(); SearchImplementation searchImplementation = new SearchImplementation(strings, true); - return searchImplementation.getBoundaries(textBlock, node.getTextRange()) + return searchImplementation.getBoundaries(textBlock) .map(boundary -> toLineAfterTextRange(textBlock, boundary)) .filter(boundary -> isValidEntityTextRange(textBlock, boundary)) .map(boundary -> byTextRange(boundary, type, entityType, node)) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java index e12b818c..3fc6cf33 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityFindingUtility.java @@ -188,7 +188,7 @@ public class EntityFindingUtility { List textBlocks = node.getTextBlocksByPageNumbers(pageNumbers); return textBlocks.stream() - .flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange())) + .flatMap(searchImplementation::getBoundaries) .map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, node, Collections.emptySet())) .filter(Optional::isPresent) .map(Optional::get) @@ -214,7 +214,7 @@ public class EntityFindingUtility { List textBlocks = document.getTextBlocksByPageNumbers(pageNumbers); return textBlocks.stream() - .flatMap(tb -> searchImplementation.getBoundaries(tb, tb.getTextRange()) + .flatMap(tb -> searchImplementation.getBoundaries(tb) .filter(textRange -> entityCreationService.isValidEntityTextRange(tb, textRange))) .map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet())) .filter(Optional::isPresent) @@ -228,7 +228,7 @@ public class EntityFindingUtility { SearchImplementation searchImplementation = new SearchImplementation(value, !caseSensitive); - return searchImplementation.getBoundaries(document.getTextBlock(), document.getTextRange()) + return searchImplementation.getBoundaries(document.getTextBlock()) .filter(textRange -> entityCreationService.isValidEntityTextRange(document.getTextBlock(), textRange)) .map(boundary -> entityCreationService.byTextRangeWithEngine(boundary, "temp", EntityType.ENTITY, document, Collections.emptySet())) .filter(Optional::isPresent) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java index bd674a66..54a62b7c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/ComponentDroolsExecutionService.java @@ -23,15 +23,17 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.analysislog import com.iqser.red.service.persistence.service.v1.api.shared.model.component.ComponentMappingMetadata; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.logger.Context; +import com.iqser.red.service.redaction.v1.server.logger.ObjectTrackingEventListener; import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; +import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener; import com.iqser.red.service.redaction.v1.server.model.component.Component; import com.iqser.red.service.redaction.v1.server.model.component.Entity; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; -import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService; import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingMemoryCache; import com.iqser.red.service.redaction.v1.server.service.components.ComponentMappingService; import com.iqser.red.service.redaction.v1.server.service.document.ComponentComparator; import com.iqser.red.service.redaction.v1.server.service.document.ComponentCreationService; +import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService; import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException; import com.knecon.fforesight.tenantcommons.TenantContext; @@ -64,6 +66,12 @@ public class ComponentDroolsExecutionService { ComponentCreationService componentCreationService = new ComponentCreationService(kieSession); ComponentMappingService componentMappingService = new ComponentMappingService(componentMappingMemoryCache, componentMappings); RulesLogger logger = new RulesLogger(webSocketService, context); + if (settings.isDroolsDebug()) { + logger.enableAgendaTracking(); + logger.enableObjectTracking(); + } + kieSession.addEventListener(new TrackingAgendaEventListener(logger)); + kieSession.addEventListener(new ObjectTrackingEventListener(logger)); kieSession.setGlobal("componentCreationService", componentCreationService); try { @@ -72,7 +80,6 @@ public class ComponentDroolsExecutionService { log.warn("Logger is not present"); } - if (hasComponentMappingServiceGlobal(kieSession)) { kieSession.setGlobal(COMPONENT_MAPPING_SERVICE_GLOBAL, componentMappingService); } @@ -85,7 +92,10 @@ public class ComponentDroolsExecutionService { entities.add(Entity.fromEntityLogEntry(entry, document, entry.getStartOffset(), entry.getEndOffset())); if (entry.getDuplicatedTextRanges() != null && !entry.getDuplicatedTextRanges().isEmpty()) { entry.getDuplicatedTextRanges() - .forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry, document, duplicatedTextRange.getStart(), duplicatedTextRange.getEnd()))); + .forEach(duplicatedTextRange -> entities.add(Entity.fromEntityLogEntry(entry, + document, + duplicatedTextRange.getStart(), + duplicatedTextRange.getEnd()))); } return entities.stream(); }) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java index 54dc95c8..d95e1017 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/drools/EntityDroolsExecutionService.java @@ -22,15 +22,17 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.entitymapped.BaseAnnotation; import com.iqser.red.service.redaction.v1.server.RedactionServiceSettings; import com.iqser.red.service.redaction.v1.server.logger.Context; +import com.iqser.red.service.redaction.v1.server.logger.ObjectTrackingEventListener; import com.iqser.red.service.redaction.v1.server.logger.RulesLogger; +import com.iqser.red.service.redaction.v1.server.logger.TrackingAgendaEventListener; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode; import com.iqser.red.service.redaction.v1.server.service.ManualChangesApplicationService; -import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService; import com.iqser.red.service.redaction.v1.server.service.document.EntityCreationService; import com.iqser.red.service.redaction.v1.server.service.document.EntityEnrichmentService; +import com.iqser.red.service.redaction.v1.server.service.websocket.WebSocketService; import com.iqser.red.service.redaction.v1.server.utils.exception.DroolsTimeoutException; import io.micrometer.core.annotation.Timed; @@ -96,6 +98,12 @@ public class EntityDroolsExecutionService { EntityCreationService entityCreationService = new EntityCreationService(entityEnrichmentService, kieSession, nodesInKieSession); RulesLogger logger = new RulesLogger(webSocketService, context); + if (settings.isDroolsDebug()) { + logger.enableAgendaTracking(); + logger.enableObjectTracking(); + } + kieSession.addEventListener(new TrackingAgendaEventListener(logger)); + kieSession.addEventListener(new ObjectTrackingEventListener(logger)); kieSession.setGlobal("document", document); kieSession.setGlobal("entityCreationService", entityCreationService); @@ -144,14 +152,14 @@ public class EntityDroolsExecutionService { try { completableFuture.get(settings.getDroolsExecutionTimeoutSecs(document.getNumberOfPages()), TimeUnit.SECONDS); } catch (ExecutionException e) { - logger.error(e,"Exception during rule execution"); + logger.error(e, "Exception during rule execution"); kieSession.dispose(); if (e.getCause() instanceof TimeoutException) { throw new DroolsTimeoutException(e, false, RuleFileType.ENTITY); } throw new RuntimeException(e); } catch (InterruptedException e) { - logger.error(e,"Exception during rule execution"); + logger.error(e, "Exception during rule execution"); kieSession.dispose(); throw new RuntimeException(e); } catch (TimeoutException e) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/websocket/WebSocketService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/websocket/WebSocketService.java index c5774950..6986ba11 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/websocket/WebSocketService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/websocket/WebSocketService.java @@ -18,7 +18,7 @@ public class WebSocketService { public void sendLogEvent(RuleLogEvent ruleLogEvent) { String destination = "/topic/" + ruleLogEvent.getTenantId() + "/rule-log-events"; - log.info("Sending message to url {}", destination); + log.debug("Sending message to url {}", destination); template.convertAndSend(destination, ruleLogEvent); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/logback-spring.xml b/redaction-service-v1/redaction-service-server-v1/src/main/resources/logback-spring.xml index 33b2cef7..02d4f3ec 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/logback-spring.xml +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/logback-spring.xml @@ -13,5 +13,5 @@ - + \ No newline at end of file diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java index e22e57c3..526ac8d1 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/AnalysisEnd2EndTest.java @@ -94,6 +94,8 @@ import lombok.extern.slf4j.Slf4j; FileType.DOCUMENT_TEXT, FileType.IMAGE_INFO, FileType.NER_ENTITIES, + FileType.LLM_NER_ENTITIES, + FileType.AZURE_NER_ENTITIES, FileType.TABLES, FileType.IMPORTED_REDACTIONS); @@ -104,7 +106,7 @@ import lombok.extern.slf4j.Slf4j; FileType.DOCUMENT_STRUCTURE, FileType.DOCUMENT_TEXT); - Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/mainBodyFailed/DOSSIER_TEMPLATE"); // Add your dossier-template here + Path dossierTemplateToUse = Path.of("/home/kschuettler/Downloads/New Folder/DOSSIER_TEMPLATE"); // Add your dossier-template here ObjectMapper mapper = ObjectMapperFactory.create(); final String TENANT_ID = "tenant"; TestDossierTemplate testDossierTemplate; @@ -143,7 +145,7 @@ import lombok.extern.slf4j.Slf4j; @SneakyThrows public void runAnalysisEnd2End() { - String folder = "/home/kschuettler/Downloads/mainBodyFailed/728d0af4-f4c4-4bc9-acf8-7d2632b02962/"; // Should contain all files from minio directly, still zipped. Can contain multiple files. + String folder = "/home/kschuettler/Downloads/New Folder/436e4a2a-0ba3-4d3c-9944-c355f5c1cca2"; // Should contain all files from minio directly, still zipped. Can contain multiple files. Path absoluteFolderPath; if (folder.startsWith("files")) { // if it starts with "files" it is most likely in the resources folder, else it should be an absolute path @@ -157,10 +159,12 @@ import lombok.extern.slf4j.Slf4j; List analyzeRequests = prepareStorageForFolder(absoluteFolderPath); log.info("Found {} distinct fileIds with all required files", analyzeRequests.size()); for (int i = 0; i < analyzeRequests.size(); i++) { + long start = System.currentTimeMillis(); AnalyzeRequest analyzeRequest = analyzeRequests.get(i); log.info("----------------------------------------------------------------------------------"); log.info("{}/{}: Starting analysis for file {}", i + 1, analyzeRequests.size(), analyzeRequest.getFileId()); analyzeService.analyze(analyzeRequest); + log.info("{}/{}: Finished analysis for file {} in {} ms", i + 1, analyzeRequests.size(), analyzeRequest.getFileId(), System.currentTimeMillis() - start); log.info("----------------------------------------------------------------------------------"); log.info(""); } diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java index 4ac29688..de8d388f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/document/graph/DocumentPerformanceIntegrationTest.java @@ -2,7 +2,6 @@ package com.iqser.red.service.redaction.v1.server.document.graph; import static com.iqser.red.service.redaction.v1.server.utils.SeparatorUtils.boundaryIsSurroundedBySeparators; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; import java.awt.Color; @@ -34,7 +33,6 @@ import com.iqser.red.service.persistence.service.v1.api.shared.model.FileAttribu import com.iqser.red.service.persistence.service.v1.api.shared.model.RuleFileType; import com.iqser.red.service.persistence.service.v1.api.shared.model.annotations.ManualRedactions; import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive; -import com.iqser.red.service.persistence.service.v1.api.shared.model.dossiertemplate.type.Type; import com.iqser.red.service.redaction.v1.server.logger.Context; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; @@ -176,7 +174,8 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration float durationMillis = ((float) (System.currentTimeMillis() - start)); System.out.printf("%d calls of buildTextBlock() on document took %f s, average is %f ms\n", n, durationMillis / 1000, durationMillis / n); - Section section = document.getAllSections().get(9); + Section section = document.getAllSections() + .get(9); start = System.currentTimeMillis(); for (int i = 0; i < n; i++) { @@ -308,7 +307,7 @@ public class DocumentPerformanceIntegrationTest extends BuildDocumentIntegration String type) { TextBlock textBlock = document.getTextBlock(); - searchImplementation.getBoundaries(textBlock, textBlock.getTextRange()) + searchImplementation.getBoundaries(textBlock) .filter(boundary -> boundaryIsSurroundedBySeparators(textBlock, boundary)) .map(bounds -> TextEntity.initialEntityNode(bounds, type, entityType, document)) .forEach(foundEntities::add); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/logback-spring.xml b/redaction-service-v1/redaction-service-server-v1/src/test/resources/logback-spring.xml index 33b2cef7..8e3e4cf2 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/logback-spring.xml +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/logback-spring.xml @@ -14,4 +14,7 @@ + + + \ No newline at end of file