From 93250d5463dc932874fe8f4b2253e731a5b9c1d1 Mon Sep 17 00:00:00 2001 From: Andrei Isvoran Date: Thu, 11 Apr 2024 14:52:33 +0200 Subject: [PATCH] RED-8694 - Add Javadoc to classes/methods used in rules --- .../v1/server/model/NerEntities.java | 19 + .../server/model/dictionary/Dictionary.java | 60 +++ .../model/dictionary/DictionaryModel.java | 54 +- .../v1/server/model/document/TextRange.java | 75 ++- .../server/model/document/entity/IEntity.java | 171 ++++++ .../model/document/entity/MatchedRule.java | 24 + .../server/model/document/nodes/Document.java | 23 + .../server/model/document/nodes/Header.java | 3 + .../server/model/document/nodes/Headline.java | 13 + .../v1/server/model/document/nodes/Image.java | 4 + .../v1/server/model/document/nodes/Page.java | 8 + .../model/document/nodes/Paragraph.java | 3 + .../server/model/document/nodes/Section.java | 20 + .../document/nodes/SectionIdentifier.java | 31 ++ .../model/document/nodes/SemanticNode.java | 23 +- .../v1/server/model/document/nodes/Table.java | 3 + .../model/document/nodes/TableCell.java | 3 + .../ManualChangesApplicationService.java | 12 + .../document/EntityCreationService.java | 488 +++++++++++++++++- .../server/utils/RedactionSearchUtility.java | 138 +++++ .../resources/drools/all_rules_documine.drl | 10 +- .../test/resources/drools/documine_flora.drl | 10 +- .../src/main/resources/all_rules_documine.drl | 10 +- 23 files changed, 1158 insertions(+), 47 deletions(-) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/NerEntities.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/NerEntities.java index c81707b0..57542164 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/NerEntities.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/NerEntities.java @@ -11,6 +11,10 @@ import lombok.AllArgsConstructor; import lombok.Getter; import lombok.experimental.FieldDefaults; +/** + * Represents a collection of named entity recognition (NER) entities. + * This class provides methods to manage and query NER entities. + */ @Getter @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true) @@ -25,6 +29,12 @@ public class NerEntities { } + /** + * Checks if there are any entities of a specified type. + * + * @param type The type of entity to check for. + * @return true if there is at least one entity of the specified type, false otherwise. + */ public boolean hasEntitiesOfType(String type) { return nerEntityList.stream() @@ -32,6 +42,12 @@ public class NerEntities { } + /** + * Returns a stream of NER entities of a specified type. + * + * @param type The type of entities to return. + * @return a stream of {@link NerEntity} objects of the specified type. + */ public Stream streamEntitiesOfType(String type) { return nerEntityList.stream() @@ -39,6 +55,9 @@ public class NerEntities { } + /** + * Represents a single NER entity with its value, text range, and type. + */ public record NerEntity(String value, TextRange textRange, String type) { } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java index 703154cf..472a610f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/Dictionary.java @@ -23,6 +23,9 @@ import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundExcepti import lombok.Data; import lombok.Getter; +/** + * A class representing a dictionary used for redaction processes, containing various dictionary models and their versions. + */ @Data public class Dictionary { @@ -51,6 +54,11 @@ public class Dictionary { } + /** + * Checks if the dictionary contains local entries. + * + * @return true if any dictionary model contains local entries, false otherwise. + */ public boolean hasLocalEntries() { return dictionaryModels.stream() @@ -64,6 +72,13 @@ public class Dictionary { } + /** + * Retrieves the {@link DictionaryModel} of a specified type. + * + * @param type The type of dictionary model to retrieve. + * @return The {@link DictionaryModel} of the specified type. + * @throws NotFoundException If the specified type is not found in the dictionary. + */ public DictionaryModel getType(String type) { DictionaryModel model = localAccessMap.get(type); @@ -74,6 +89,12 @@ public class Dictionary { } + /** + * Checks if the dictionary of a specific type is considered a hint. + * + * @param type The type of dictionary to check. + * @return true if the dictionary model is marked as a hint, false otherwise. + */ public boolean isHint(String type) { DictionaryModel model = localAccessMap.get(type); @@ -84,6 +105,12 @@ public class Dictionary { } + /** + * Checks if the dictionary of a specific type is case-insensitive. + * + * @param type The type of dictionary to check. + * @return true if the dictionary is case-insensitive, false otherwise. + */ public boolean isCaseInsensitiveDictionary(String type) { DictionaryModel dictionaryModel = localAccessMap.get(type); @@ -94,6 +121,18 @@ public class Dictionary { } + /** + * Adds a local dictionary entry of a specific type. + * + * @param type The type of dictionary to add the entry to. + * @param value The value of the entry. + * @param matchedRules A collection of {@link MatchedRule} associated with the entry. + * @param alsoAddLastname Indicates whether to also add the lastname separately as an entry. + * @throws IllegalArgumentException If the specified type does not exist within the dictionary, if the type + * does not have any local entries defined, or if the provided value is + * blank. This ensures that only valid, non-empty entries + * are added to the dictionary. + */ private void addLocalDictionaryEntry(String type, String value, Collection matchedRules, boolean alsoAddLastname) { if (value.isBlank()) { @@ -133,18 +172,33 @@ public class Dictionary { } + /** + * Recommends a text entity for inclusion in every dictionary model without separating the last name. + * + * @param textEntity The {@link TextEntity} to be recommended. + */ public void recommendEverywhere(TextEntity textEntity) { addLocalDictionaryEntry(textEntity.type(), textEntity.getValue(), textEntity.getMatchedRuleList(), false); } + /** + * Recommends a text entity for inclusion in every dictionary model with the last name added separately. + * + * @param textEntity The {@link TextEntity} to be recommended. + */ public void recommendEverywhereWithLastNameSeparately(TextEntity textEntity) { addLocalDictionaryEntry(textEntity.type(), textEntity.getValue(), textEntity.getMatchedRuleList(), true); } + /** + * Adds multiple author names contained within a text entity as recommendations in the dictionary. + * + * @param textEntity The {@link TextEntity} containing author names to be added. + */ public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) { splitIntoAuthorNames(textEntity).forEach(authorName -> addLocalDictionaryEntry(textEntity.type(), authorName, textEntity.getMatchedRuleList(), true)); @@ -152,6 +206,12 @@ public class Dictionary { } + /** + * Splits a {@link TextEntity} into individual author names based on commas or new lines. + * + * @param textEntity The {@link TextEntity} to split. + * @return A list of strings where each string is an author name. + */ public static List splitIntoAuthorNames(TextEntity textEntity) { List splitAuthorNames; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/DictionaryModel.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/DictionaryModel.java index 4f780146..30ae00ad 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/DictionaryModel.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/dictionary/DictionaryModel.java @@ -13,6 +13,12 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu import lombok.Data; import lombok.extern.slf4j.Slf4j; +/** + * Represents a model of a dictionary containing entries for redaction processes. + * It includes various types of entries such as standard entries, false positives, + * and false recommendations. Additionally, it manages local entries with matched + * rules for enhanced search and matching capabilities. + */ @Data @Slf4j public class DictionaryModel implements Serializable { @@ -36,6 +42,19 @@ public class DictionaryModel implements Serializable { private transient SearchImplementation localSearch; + /** + * Constructs a new DictionaryModel with specified parameters. + * + * @param type The type of the dictionary model. + * @param rank The rank order of the dictionary model. + * @param color An array representing the color associated with this model. + * @param caseInsensitive Flag indicating whether the dictionary is case-insensitive. + * @param hint Flag indicating whether this model should be used as a hint. + * @param entries Set of dictionary entry models representing the entries. + * @param falsePositives Set of dictionary entry models representing false positives. + * @param falseRecommendations Set of dictionary entry models representing false recommendations. + * @param isDossierDictionary Flag indicating whether this model is for a dossier dictionary. + */ public DictionaryModel(String type, int rank, float[] color, @@ -52,13 +71,17 @@ public class DictionaryModel implements Serializable { this.caseInsensitive = caseInsensitive; this.hint = hint; this.isDossierDictionary = isDossierDictionary; - this.entries = entries; this.falsePositives = falsePositives; this.falseRecommendations = falseRecommendations; } + /** + * Returns the search implementation for local entries. + * + * @return The {@link SearchImplementation} for local entries. + */ public SearchImplementation getLocalSearch() { if (this.localSearch == null || this.localSearch.getValues().size() != this.localEntriesWithMatchedRules.size()) { @@ -68,6 +91,11 @@ public class DictionaryModel implements Serializable { } + /** + * Returns the search implementation for non-deleted dictionary entries. + * + * @return The {@link SearchImplementation} for non-deleted dictionary entries. + */ public SearchImplementation getEntriesSearch() { if (entriesSearch == null) { @@ -80,6 +108,11 @@ public class DictionaryModel implements Serializable { } + /** + * Returns the search implementation for deleted dictionary entries. + * + * @return The {@link SearchImplementation} for deleted dictionary entries. + */ public SearchImplementation getDeletionEntriesSearch() { if (deletionEntriesSearch == null) { @@ -92,6 +125,11 @@ public class DictionaryModel implements Serializable { } + /** + * Returns the search implementation for non-deleted false positive entries. + * + * @return The {@link SearchImplementation} for non-deleted false positive entries. + */ public SearchImplementation getFalsePositiveSearch() { if (falsePositiveSearch == null) { @@ -104,6 +142,11 @@ public class DictionaryModel implements Serializable { } + /** + * Returns the search implementation for non-deleted false recommendation entries. + * + * @return The {@link SearchImplementation} for non-deleted false recommendation entries. + */ public SearchImplementation getFalseRecommendationsSearch() { if (falseRecommendationsSearch == null) { @@ -116,12 +159,17 @@ public class DictionaryModel implements Serializable { } + /** + * Retrieves the matched rules for a given value from the local dictionary entries. + * The value is processed based on the case sensitivity of the dictionary. + * + * @param value The value for which to retrieve the matched rules. + * @return A set of {@link MatchedRule} associated with the given value, or null if no rules are found. + */ public Set getMatchedRulesForLocalDictionaryEntry(String value) { var cleanedValue = isCaseInsensitive() ? value.toLowerCase(Locale.US) : value; - return localEntriesWithMatchedRules.get(cleanedValue); - } } diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/TextRange.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/TextRange.java index b3e76926..ef3ad47d 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/TextRange.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/TextRange.java @@ -11,6 +11,10 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl import lombok.EqualsAndHashCode; import lombok.Setter; +/** + * Represents a range of text defined by a start and end index. + * Provides functionality to check containment, intersection, and to adjust ranges based on specified conditions. + */ @Setter @EqualsAndHashCode @SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName") @@ -20,6 +24,13 @@ public class TextRange implements Comparable { private int end; + /** + * Constructs a TextRange with specified start and end indexes. + * + * @param start The starting index of the range. + * @param end The ending index of the range. + * @throws IllegalArgumentException If start is greater than end. + */ public TextRange(int start, int end) { if (start > end) { @@ -30,6 +41,11 @@ public class TextRange implements Comparable { } + /** + * Returns the length of the text range. + * + * @return The length of the range. + */ public int length() { return end - start; @@ -48,18 +64,38 @@ public class TextRange implements Comparable { } + /** + * Checks if this {@link TextRange} fully contains another TextRange. + * + * @param textRange The {@link TextRange} to check. + * @return true if this range contains the specified range, false otherwise. + */ public boolean contains(TextRange textRange) { return start <= textRange.start() && textRange.end() <= end; } + /** + * Checks if this {@link TextRange} is fully contained by another TextRange. + * + * @param textRange The {@link TextRange} to check against. + * @return true if this range is contained by the specified range, false otherwise. + */ public boolean containedBy(TextRange textRange) { return textRange.contains(this); } + /** + * Checks if this {@link TextRange} contains another range specified by start and end indices. + * + * @param start The starting index of the range to check. + * @param end The ending index of the range to check. + * @return true if this range fully contains the specified range, false otherwise. + * @throws IllegalArgumentException If the start index is greater than the end index. + */ public boolean contains(int start, int end) { if (start > end) { @@ -69,6 +105,14 @@ public class TextRange implements Comparable { } + /** + * Checks if this {@link TextRange} is fully contained within another range specified by start and end indices. + * + * @param start The starting index of the outer range. + * @param end The ending index of the outer range. + * @return true if this range is fully contained within the specified range, false otherwise. + * @throws IllegalArgumentException If the start index is greater than the end index. + */ public boolean containedBy(int start, int end) { if (start > end) { @@ -78,18 +122,37 @@ public class TextRange implements Comparable { } + /** + * Determines if the specified index is within this {@link TextRange}. + * + * @param index The index to check. + * @return true if the index is within the range (inclusive of the start and exclusive of the end), false otherwise. + */ public boolean contains(int index) { return start <= index && index < end; } + /** + * Checks if this {@link TextRange} intersects with another {@link TextRange}. + * + * @param textRange The {@link TextRange} to check for intersection. + * @return true if the ranges intersect, false otherwise. + */ public boolean intersects(TextRange textRange) { return textRange.start() < this.end && this.start < textRange.end(); } + /** + * Splits this TextRange into multiple ranges based on a list of indices. + * + * @param splitIndices The indices at which to split the range. + * @return A list of TextRanges resulting from the split. + * @throws IndexOutOfBoundsException If any split index is outside this TextRange. + */ public List split(List splitIndices) { if (splitIndices.stream() @@ -116,6 +179,13 @@ public class TextRange implements Comparable { } + /** + * Merges a collection of TextRanges into a single Text range encompassing all. + * + * @param boundaries The collection of TextRanges to merge. + * @return A new TextRange covering the entire span of the given ranges. + * @throws IllegalArgumentException If boundaries are empty. + */ public static TextRange merge(Collection boundaries) { int minStart = boundaries.stream() @@ -152,16 +222,17 @@ public class TextRange implements Comparable { /** - * shrinks the boundary, such that textBlock.subSequence(boundary) returns a string without trailing or preceding whitespaces. + * Shrinks the boundary, such that textBlock.subSequence(boundary) returns a string without trailing or preceding whitespaces. * * @param textBlock TextBlock to check whitespaces against - * @return trimmed boundary + * @return Trimmed boundary */ public TextRange trim(TextBlock textBlock) { if (this.length() == 0) { return this; } + int trimmedStart = this.start; while (textBlock.containsIndex(trimmedStart) && trimmedStart < end && Character.isWhitespace(textBlock.charAt(trimmedStart))) { trimmedStart++; diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java index 9d5bdfad..ebf5e740 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/IEntity.java @@ -12,27 +12,64 @@ import lombok.NonNull; public interface IEntity { + /** + * Gets the list of rules matched against this entity. + * + * @return A priority queue of matched rules. + */ PriorityQueue getMatchedRuleList(); + /** + * Gets the manual overwrite actions applied to this entity, if any. + * + * @return The manual overwrite details. + */ ManualChangeOverwrite getManualOverwrite(); + /** + * Gets the value of this entity as a string. + * + * @return The string value. + */ String getValue(); + /** + * Gets the range of text in the document associated with this entity. + * + * @return The text range. + */ TextRange getTextRange(); + /** + * Gets the type of this entity. + * + * @return The entity type. + */ String type(); + /** + * Calculates the length of the entity's value. + * + * @return The length of the value. + */ default int length() { return value().length(); } + /** + * Retrieves the value of the entity, considering any manual overwrite. + * If no manual overwrite value is found, return the value of the entity or an empty string + * if that value is null. + * + * @return The possibly overwritten value + */ default String value() { return getManualOverwrite().getValue() @@ -40,6 +77,11 @@ public interface IEntity { } + /** + * Determines if the entity has been applied, considering manual overwrites. + * + * @return True if applied, false otherwise. + */ // Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner. default boolean applied() { @@ -48,12 +90,22 @@ public interface IEntity { } + /** + * Determines if the entity has been skipped, based on its applied status. + * + * @return True if skipped, false otherwise. + */ default boolean skipped() { return !applied(); } + /** + * Determines if the entity has been ignored, considering manual overwrites. + * + * @return True if ignored, false otherwise. + */ default boolean ignored() { return getManualOverwrite().getIgnored() @@ -61,6 +113,11 @@ public interface IEntity { } + /** + * Determines if the entity has been removed, considering manual overwrites. + * + * @return True if removed, false otherwise. + */ default boolean removed() { return getManualOverwrite().getRemoved() @@ -68,6 +125,11 @@ public interface IEntity { } + /** + * Checks if the entity has been resized, considering manual overwrites. + * + * @return True if resized, false otherwise. + */ default boolean resized() { return getManualOverwrite().getResized() @@ -75,24 +137,48 @@ public interface IEntity { } + /** + * Checks if the entity is considered active, based on its removed and ignored status. + * An active entry is not removed or ignored. + * + * @return True if active, false otherwise. + */ default boolean active() { return !(removed() || ignored()); } + /** + * Checks if there are any manual changes applied to the entity. + * + * @return True if there are manual changes, false otherwise. + */ default boolean hasManualChanges() { return !getManualOverwrite().getManualChangeLog().isEmpty(); } + /** + * Retrieves a set of references associated with the entity's matched rule. + * + * @return A set of references. + */ default Set references() { return getMatchedRule().getReferences(); } + /** + * Applies a redaction to the entity with a specified legal basis. + * + * @param ruleIdentifier The identifier of the rule being applied. + * @param reason The reason for the redaction. + * @param legalBasis The legal basis for the redaction, which must not be blank or empty. + * @throws IllegalArgumentException If the legal basis is blank or empty. + */ default void redact(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) { if (legalBasis.isBlank() || legalBasis.isEmpty()) { @@ -102,36 +188,75 @@ public interface IEntity { } + /** + * Applies a rule to the entity with an optional legal basis. + * + * @param ruleIdentifier The identifier of the rule being applied. + * @param reason The reason for applying the rule. + * @param legalBasis The legal basis for the application, can be a default or unspecified value. + */ default void apply(@NonNull String ruleIdentifier, String reason, String legalBasis) { addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).legalBasis(legalBasis).applied(true).build()); } + /** + * Applies a rule to the entity without specifying a legal basis, which will be replaced by "n-a". + * + * @param ruleIdentifier The identifier of the rule being applied. + * @param reason The reason for applying the rule. + */ default void apply(@NonNull String ruleIdentifier, String reason) { apply(ruleIdentifier, reason, "n-a"); } + /** + * Marks the entity as skipped according to a specific rule. + * + * @param ruleIdentifier The identifier of the rule being skipped. + * @param reason The reason for skipping the rule. + */ default void skip(@NonNull String ruleIdentifier, String reason) { addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build()); } + /** + * Marks the entity as removed according to a specific rule. + * + * @param ruleIdentifier The identifier of the rule based on which the entity is removed. + * @param reason The reason for the removal. + */ default void remove(String ruleIdentifier, String reason) { addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).removed(true).build()); } + /** + * Marks the entity as ignored according to a specific rule. + * + * @param ruleIdentifier The identifier of the rule based on which the entity is removed. + * @param reason The reason for the removal. + */ default void ignore(String ruleIdentifier, String reason) { addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).ignored(true).build()); } + /** + * Applies a rule to the entity, indicating that the value should be written with line breaks. + * + * @param ruleIdentifier The identifier of the rule being applied. + * @param reason The reason for the rule application. + * @param legalBasis The legal basis for the rule, which must not be empty. + * @throws IllegalArgumentException If the legal basis is blank or empty. + */ default void applyWithLineBreaks(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) { if (legalBasis.isBlank() || legalBasis.isEmpty()) { @@ -147,6 +272,15 @@ public interface IEntity { } + /** + * Applies a rule to the entity with a collection of references. + * + * @param ruleIdentifier The identifier of the rule being applied. + * @param reason The reason for the rule application. + * @param legalBasis The legal basis for the rule, which must not be empty. + * @param references A collection of text entities that are referenced by this rule application. + * @throws IllegalArgumentException If the legal basis is blank or empty. + */ default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection references) { if (legalBasis.isBlank() || legalBasis.isEmpty()) { @@ -162,18 +296,35 @@ public interface IEntity { } + /** + * Marks the entity as skipped for a specific rule and associates a collection of references. + * + * @param ruleIdentifier The identifier of the rule being skipped. + * @param reason The reason for skipping the rule. + * @param references A collection of text entities that are referenced by the skipped rule. + */ default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection references) { getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build()); } + /** + * Adds a single matched rule to this entity. + * + * @param matchedRule The matched rule to add. + */ default void addMatchedRule(MatchedRule matchedRule) { getMatchedRuleList().add(matchedRule); } + /** + * Adds a collection of matched rules to this entity. + * + * @param matchedRules The collection of matched rules to add. + */ default void addMatchedRules(Collection matchedRules) { if (getMatchedRuleList().equals(matchedRules)) { @@ -183,12 +334,22 @@ public interface IEntity { } + /** + * Retrieves the 'unit' value of the highest priority matched rule. + * + * @return The unit value of the matched rule. + */ default int getMatchedRuleUnit() { return getMatchedRule().getRuleIdentifier().unit(); } + /** + * Gets the highest priority matched rule for this entity. + * + * @return The matched rule. + */ default MatchedRule getMatchedRule() { if (getMatchedRuleList().isEmpty()) { @@ -198,6 +359,11 @@ public interface IEntity { } + /** + * Builds a reason string for this entity, incorporating descriptions from manual changes. + * + * @return The built reason string. + */ default String buildReasonWithManualChangeDescriptions() { if (getManualOverwrite().getDescriptions().isEmpty()) { @@ -210,6 +376,11 @@ public interface IEntity { } + /** + * Retrieves the legal basis for the action taken on this entity, considering any manual overwrite. + * + * @return The legal basis. + */ default String legalBasis() { return getManualOverwrite().getLegalBasis() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java index 658ee543..a72a9adf 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/entity/MatchedRule.java @@ -15,6 +15,9 @@ import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.experimental.FieldDefaults; +/** + * Represents a rule that has been matched during the document redaction process. + */ @Getter @Builder @AllArgsConstructor @@ -42,12 +45,26 @@ public final class MatchedRule implements Comparable { Set references = Collections.emptySet(); + /** + * Creates an empty instance of {@link MatchedRule}. + * This can be used as a placeholder or when no rule is actually matched. + * + * @return An empty {@link MatchedRule} instance. + */ public static MatchedRule empty() { return MatchedRule.builder().ruleIdentifier(RuleIdentifier.empty()).build(); } + /** + * Returns a modified instance of {@link MatchedRule} based on its applied status. + * If the rule has been applied, it returns a new {@link MatchedRule} instance that retains all properties of the original + * except for the 'applied' status, which is set to false. + * If the rule has not been applied, it returns the original instance. + * + * @return A {@link MatchedRule} instance with 'applied' set to false. + */ public MatchedRule asSkippedIfApplied() { if (!this.isApplied()) { @@ -63,6 +80,13 @@ public final class MatchedRule implements Comparable { } + /** + * Compares this rule with another {@link MatchedRule} to establish a priority order. + * The comparison is based on the rule type, unit, and ID, in that order. + * + * @param matchedRule The {@link MatchedRule} to compare against. + * @return A negative integer, zero, or a positive integer as this rule is less than, equal to, or greater than the specified rule. + */ @Override public int compareTo(MatchedRule matchedRule) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Document.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Document.java index e876c620..0f0381eb 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Document.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Document.java @@ -24,6 +24,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; +/** + * Represents the entire document as a node within the document's semantic structure. + */ @Data @Builder @AllArgsConstructor @@ -63,6 +66,11 @@ public class Document implements GenericSemanticNode { } + /** + * Gets the main sections of the document as a list. + * + * @return A list of main sections within the document. + */ public List
getMainSections() { return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node) @@ -70,6 +78,11 @@ public class Document implements GenericSemanticNode { } + /** + * Streams all terminal (leaf) text blocks within the document in their natural order. + * + * @return A stream of terminal {@link TextBlock}. + */ public Stream streamTerminalTextBlocksInOrder() { return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock); @@ -99,6 +112,11 @@ public class Document implements GenericSemanticNode { } + /** + * Streams all nodes within the document, regardless of type, in their natural order. + * + * @return A stream of all {@link SemanticNode} within the document. + */ private Stream streamAllNodes() { return documentTree.allEntriesInOrder() @@ -106,6 +124,11 @@ public class Document implements GenericSemanticNode { } + /** + * Streams all image nodes contained within the document. + * + * @return A stream of {@link Image} nodes. + */ public Stream streamAllImages() { return streamAllSubNodesOfType(NodeType.IMAGE).map(node -> (Image) node); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Header.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Header.java index 5e0f15bc..5f1b217f 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Header.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Header.java @@ -19,6 +19,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; +/** + * Represents the header part of a document page. + */ @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Headline.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Headline.java index 8a5ec833..0c33390b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Headline.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Headline.java @@ -20,6 +20,9 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; +/** + * Represents a headline in a document. + */ @Data @Builder @AllArgsConstructor @@ -98,12 +101,22 @@ public class Headline implements GenericSemanticNode { } + /** + * Creates an empty headline with no text content. + * + * @return An empty {@link Headline} instance. + */ public static Headline empty() { return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build(); } + /** + * Checks if this headline is associated with any paragraphs within its parent section or node. + * + * @return True if there are paragraphs associated with this headline, false otherwise. + */ public boolean hasParagraphs() { return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH) diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java index bbb9affb..5ac8bed9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Image.java @@ -28,6 +28,10 @@ import lombok.EqualsAndHashCode; import lombok.NoArgsConstructor; import lombok.experimental.FieldDefaults; +/** + * + Represents an image within the document. + */ @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java index 3354372c..4216471c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Page.java @@ -17,6 +17,9 @@ import lombok.NoArgsConstructor; import lombok.Setter; import lombok.experimental.FieldDefaults; +/** + * Represents a single page in a document. + */ @Getter @Setter @Builder @@ -43,6 +46,11 @@ public class Page { Set images = new HashSet<>(); + /** + * Constructs and returns a {@link TextBlock} representing the concatenated text of all leaf semantic nodes in the main body. + * + * @return The main body text block. + */ public TextBlock getMainBodyTextBlock() { return mainBody.stream() diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Paragraph.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Paragraph.java index 21bdc4ed..81e21678 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Paragraph.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Paragraph.java @@ -19,6 +19,9 @@ import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; import lombok.experimental.SuperBuilder; +/** + * Represents a paragraph in the document. + */ @Data @SuperBuilder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java index 6199fc85..d2140552 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Section.java @@ -21,6 +21,9 @@ import lombok.RequiredArgsConstructor; import lombok.experimental.FieldDefaults; import lombok.extern.slf4j.Slf4j; +/** + * Represents a section within a document, encapsulating both its textual content and semantic structure. + */ @Slf4j @Data @Builder @@ -51,6 +54,11 @@ public class Section implements GenericSemanticNode { } + /** + * Checks if this section contains any tables. + * + * @return True if the section contains at least one table, false otherwise. + */ public boolean hasTables() { return streamAllSubNodesOfType(NodeType.TABLE).findAny() @@ -91,12 +99,24 @@ public class Section implements GenericSemanticNode { } + /** + * Checks if any headline within this section or its sub-nodes contains a given string. + * + * @param value The string to search for within headlines, case-sensitive. + * @return True if at least one headline contains the specified string, false otherwise. + */ public boolean anyHeadlineContainsString(String value) { return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value)); } + /** + * Checks if any headline within this section or its sub-nodes contains a given string, case-insensitive. + * + * @param value The string to search for within headlines, case-insensitive. + * @return True if at least one headline contains the specified string, false otherwise. + */ public boolean anyHeadlineContainsStringIgnoreCase(String value) { return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value)); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java index a47ffe6a..0d3cdbaa 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SectionIdentifier.java @@ -10,6 +10,9 @@ import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.experimental.FieldDefaults; +/** + * Represents a unique identifier for a section within a document. + */ @AllArgsConstructor @FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE) public class SectionIdentifier { @@ -28,6 +31,12 @@ public class SectionIdentifier { boolean asChild; + /** + * Generates a SectionIdentifier from the headline text of a section, determining its format and structure. + * + * @param headline The headline text from which to generate the section identifier. + * @return A {@link SectionIdentifier} instance corresponding to the headline text. + */ public static SectionIdentifier fromSearchText(String headline) { if (headline == null || headline.isEmpty() || headline.isBlank()) { @@ -43,18 +52,34 @@ public class SectionIdentifier { } + /** + * Marks the current section identifier as a child of another section. + * + * @param sectionIdentifier The parent section identifier. + * @return A new {@link SectionIdentifier} instance marked as a child. + */ public static SectionIdentifier asChildOf(SectionIdentifier sectionIdentifier) { return new SectionIdentifier(sectionIdentifier.format, sectionIdentifier.toString(), sectionIdentifier.identifiers, true); } + /** + * Generates a SectionIdentifier that represents the entire document. + * + * @return A {@link SectionIdentifier} with a document-wide scope. + */ public static SectionIdentifier document() { return new SectionIdentifier(Format.DOCUMENT, "document", Collections.emptyList(), false); } + /** + * Generates an empty SectionIdentifier. + * + * @return An empty {@link SectionIdentifier} instance. + */ public static SectionIdentifier empty() { return new SectionIdentifier(Format.EMPTY, "empty", Collections.emptyList(), false); @@ -109,6 +134,12 @@ public class SectionIdentifier { } + /** + * Determines if the current section is a child of the given section, based on their identifiers. + * + * @param sectionIdentifier The section identifier to compare against. + * @return True if the current section is a child of the given section, false otherwise. + */ public boolean isChildOf(SectionIdentifier sectionIdentifier) { if (this.format.equals(Format.DOCUMENT) || this.format.equals(Format.EMPTY)) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java index 77f7ea76..b148832b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/SemanticNode.java @@ -44,11 +44,12 @@ public interface SemanticNode { */ default TextBlock getTextBlock() { - return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock).collect(new TextBlockCollector()); + return streamAllSubNodes().filter(SemanticNode::isLeaf) + .map(SemanticNode::getTextBlock) + .collect(new TextBlockCollector()); } - /** * Any Node maintains its own Set of Entities. * This Set contains all Entities whose TextRange intersects the TextRange of this node. @@ -437,10 +438,10 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains all the provided Strings ignoring case. + * Checks whether this SemanticNode contains all the provided Strings case-insensitive. * * @param string A String which the TextBlock might contain - * @return true, if this node's TextBlock contains the string ignoring case + * @return true, if this node's TextBlock contains the string case-insensitive */ default boolean containsStringIgnoreCase(String string) { @@ -449,7 +450,7 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains any of the provided Strings ignoring case. + * Checks whether this SemanticNode contains any of the provided Strings case-insensitive. * * @param strings A List of Strings which the TextBlock might contain * @return true, if this node's TextBlock contains any of the strings @@ -462,7 +463,7 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains any of the provided Strings ignoring case. + * Checks whether this SemanticNode contains any of the provided Strings case-insensitive. * * @param strings A List of Strings which the TextBlock might contain * @return true, if this node's TextBlock contains any of the strings @@ -489,7 +490,7 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains exactly the provided String as a word ignoring case. + * Checks whether this SemanticNode contains exactly the provided String as a word case-insensitive. * * @param word - String which the TextBlock might contain * @return true, if this node's TextBlock contains string @@ -519,7 +520,7 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case. + * Checks whether this SemanticNode contains any of the provided Strings as a word case-insensitive. * * @param words - A List of Strings which the TextBlock might contain * @return true, if this node's TextBlock contains any of the provided strings @@ -551,7 +552,7 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode contains all the provided Strings as word ignoring case. + * Checks whether this SemanticNode contains all the provided Strings as word case-insensitive. * * @param words - A List of Strings which the TextBlock might contain * @return true, if this node's TextBlock contains all the provided strings @@ -580,10 +581,10 @@ public interface SemanticNode { /** - * Checks whether this SemanticNode matches the provided regex pattern ignoring case. + * Checks whether this SemanticNode matches the provided regex pattern case-insensitive. * * @param regexPattern A String representing a regex pattern, which the TextBlock might contain - * @return true, if this node's TextBlock contains the regex pattern ignoring case + * @return true, if this node's TextBlock contains the regex pattern case-insensitive */ default boolean matchesRegexIgnoreCase(String regexPattern) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java index 95abbe0d..f648d8a9 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/Table.java @@ -26,6 +26,9 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; +/** + * Represents a table within a document. + */ @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java index 0a1d3cf8..7cb66e01 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/model/document/nodes/TableCell.java @@ -20,6 +20,9 @@ import lombok.Data; import lombok.EqualsAndHashCode; import lombok.experimental.FieldDefaults; +/** + * Represents a single table cell within a table. + */ @Data @Builder @AllArgsConstructor diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java index 67ab7b56..14204d04 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/ManualChangesApplicationService.java @@ -58,6 +58,12 @@ public class ManualChangesApplicationService { } + /** + * Resizes a text entity based on manual resize redaction details. + * + * @param entityToBeResized The entity to resize. + * @param manualResizeRedaction The details of the resize operation. + */ public void resize(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) { resizeEntityAndReinsert(entityToBeResized, manualResizeRedaction); @@ -140,6 +146,12 @@ public class ManualChangesApplicationService { } + /** + * Resizes an image entity based on manual resize redaction instructions. + * + * @param image The image to resize. + * @param manualResizeRedaction The details of the resize operation. + */ public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) { if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) { diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java index da185c00..819a9e23 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/service/document/EntityCreationService.java @@ -54,6 +54,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, case-sensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -65,6 +76,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, case-insensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -76,6 +98,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the start string in the entity, case-sensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -92,6 +125,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the start string in the entity, case-insensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -108,6 +152,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the end string in the entity, case-sensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -124,6 +179,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the end string in the entity, case-insensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -140,6 +206,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the start and end string in the entity, case-sensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -160,6 +237,17 @@ public class EntityCreationService { } + /** + * Creates entities found between specified start and stop strings, including the start and end string in the entity, case-insensitive. + * + * @param start The starting string to search for. + * @param stop The stopping string to search for. + * @param type The type of entity to create. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(start, stop); @@ -180,6 +268,17 @@ public class EntityCreationService { } + /** + * Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node, case-sensitive. + * + * @param starts A list of start strings to search for. + * @param stops A list of stop strings to search for. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream shortestBetweenAnyString(List starts, List stops, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(starts, stops); @@ -191,6 +290,17 @@ public class EntityCreationService { } + /** + * Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node, case-insensitive. + * + * @param starts A list of start strings to search for. + * @param stops A list of stop strings to search for. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which to search. + * @return A stream of {@link TextEntity} identified objects. + * @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for. + */ public Stream shortestBetweenAnyStringIgnoreCase(List starts, List stops, String type, EntityType entityType, SemanticNode node) { checkIfBothStartAndEndAreEmpty(starts, stops); @@ -202,6 +312,18 @@ public class EntityCreationService { } + /** + * Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node, + * case-insensitive, with a length limit. + * + * @param starts A list of start strings to search for, case-insensitively. + * @param stops A list of stop strings to search for, case-insensitively. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @param limit The maximum length of the entity text. + * @return A stream of {@link TextEntity} objects found between any of the start and stop strings, case-insensitively, and within the specified limit. + */ public Stream shortestBetweenAnyStringIgnoreCase(List starts, List stops, String type, EntityType entityType, SemanticNode node, int limit) { checkIfBothStartAndEndAreEmpty(starts, stops); @@ -213,6 +335,16 @@ public class EntityCreationService { } + /** + * Creates entities based on the boundaries identified between start and stop regular expressions within a specified semantic node. + * + * @param regexStart The regular expression defining the start boundary. + * @param regexStop The regular expression defining the stop boundary. + * @param type The type of entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects identified between the start and stop regular expressions. + */ public Stream betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -223,6 +355,17 @@ public class EntityCreationService { } + /** + * Creates entities based on the boundaries identified between start and stop regular expressions within a specified semantic node, + * case-insensitive. + * + * @param regexStart The regular expression defining the start boundary, case-insensitive. + * @param regexStop The regular expression defining the stop boundary, case-insensitive. + * @param type The type of entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects identified between the start and stop regular expressions, case-insensitively. + */ public Stream betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -233,12 +376,35 @@ public class EntityCreationService { } + /** + * Creates entities based on the boundaries identified between specified start and stop text ranges within a semantic node. + * This is a more general method that can be used directly with lists of start and stop {@link TextRange} objects. + * + * @param startBoundaries A list of start text range boundaries. + * @param stopBoundaries A list of stop text range boundaries. + * @param type The type of entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects identified between the start and stop text ranges. + */ public Stream betweenTextRanges(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node) { return betweenTextRanges(startBoundaries, stopBoundaries, type, entityType, node, 0); } + /** + * Creates entities based on the boundaries identified between specified start and stop text ranges within a semantic node, + * with an optional length limit for the entities. + * + * @param startBoundaries A list of start text range boundaries. + * @param stopBoundaries A list of stop text range boundaries. + * @param type The type of entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @param limit The maximum length of the entity text; use 0 for no limit. + * @return A stream of {@link TextEntity} objects identified between the start and stop text ranges, within the specified limit. + */ public Stream betweenTextRanges(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node, int limit) { List entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries); @@ -283,6 +449,15 @@ public class EntityCreationService { } + /** + * Creates text entities based on boundaries identified by a search implementation within a specified semantic node. + * + * @param searchImplementation The search implementation to use for identifying boundaries. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects corresponding to the identified boundaries. + */ public Stream bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) { return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange()) @@ -294,6 +469,15 @@ public class EntityCreationService { } + /** + * Identifies text entities located immediately after the specified strings within a semantic node. + * + * @param strings A list of strings to search for. The text immediately following each string is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects found immediately after the specified strings. + */ public Stream lineAfterStrings(List strings, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -308,6 +492,15 @@ public class EntityCreationService { } + /** + * Identifies text entities located immediately after the specified strings within a semantic node, case-insensitive. + * + * @param strings A list of strings to search for, case-insensitive. The text immediately following each string is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects found immediately after the specified strings, case-insensitively. + */ public Stream lineAfterStringsIgnoreCase(List strings, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -322,6 +515,15 @@ public class EntityCreationService { } + /** + * Identifies a text entity located immediately after a specified string within a semantic node. + * + * @param string The string to search for. The text immediately following this string is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects found immediately after the specified string. + */ public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -335,6 +537,15 @@ public class EntityCreationService { } + /** + * Identifies a text entity located immediately after a specified string within a semantic node, case-insensitive. + * + * @param string The string to search for, case-insensitive. The text immediately following this string is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param node The semantic node within which the search is performed. + * @return A stream of {@link TextEntity} objects found immediately after the specified string, case-insensitively. + */ public Stream lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) { TextBlock textBlock = node.getTextBlock(); @@ -348,6 +559,15 @@ public class EntityCreationService { } + /** + * Identifies text entities located immediately after a specified string across table cell columns within a table node. + * + * @param string The string to search for. The text immediately following this string in subsequent table cells is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param tableNode The table node within which the search is performed. + * @return A stream of {@link TextEntity} objects found across table cell columns immediately after the specified string. + */ public Stream lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) { return tableNode.streamTableCells() @@ -359,6 +579,15 @@ public class EntityCreationService { } + /** + * Identifies text entities located immediately after a specified string across table cell columns within a table node, case-insensitive. + * + * @param string The string to search for, case-insensitive. The text immediately following this string in subsequent table cells is considered for entity creation. + * @param type The type of the entity to be created. + * @param entityType The detailed classification of the entity. + * @param tableNode The table node within which the search is performed. + * @return A stream of {@link TextEntity} objects found across table cell columns immediately after the specified string, case-insensitively. + */ public Stream lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) { return tableNode.streamTableCells() @@ -397,6 +626,15 @@ public class EntityCreationService { } + /** + * Attempts to create a text entity for text within a semantic node, immediately after a specified string. + * + * @param semanticNode The semantic node within which to search for the string. + * @param string The string after which the entity should be created. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if the string is not found. + */ public Optional semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) { var textBlock = semanticNode.getTextBlock(); @@ -415,30 +653,77 @@ public class EntityCreationService { } + /** + * Identifies text entities based on matches to a regular expression pattern within a semantic node's text block, + * considering line breaks in the text. + * + * @param regexPattern The regex pattern to match. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node containing the text block to search. + * @return A stream of identified {@link TextEntity} objects. + */ public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node); } + /** + * Identifies text entities based on matches to a regular expression pattern within a semantic node's text block, considering line breaks in the text, case-insensitive. + * + * @param regexPattern The regex pattern to match. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node containing the text block to search. + * @return A stream of identified {@link TextEntity} objects. + */ public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node); } + /** + * Identifies text entities based on matches to a regular expression pattern within a semantic node's text block. + * + * @param regexPattern The regex pattern to match. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node containing the text block to search. + * @return A stream of identified {@link TextEntity} objects. + */ public Stream byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegex(regexPattern, type, entityType, 0, node); } + /** + * Identifies text entities based on matches to a regular expression pattern within a semantic node's text block, case-insensitive. + * + * @param regexPattern The regex pattern to match. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node containing the text block to search. + * @return A stream of identified {@link TextEntity} objects. + */ public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) { return byRegexIgnoreCase(regexPattern, type, entityType, 0, node); } + /** + * Identifies text entities within a semantic node's text block based on a regex pattern that includes line breaks. + * + * @param regexPattern Regex pattern to match, including handling for line breaks. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param group The regex group to target for entity creation. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the regex pattern. + */ public Stream byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findTextRangesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock()) @@ -449,6 +734,16 @@ public class EntityCreationService { } + /** + * Identifies text entities within a semantic node's text block based on a regex pattern that includes line breaks, case-insensitive. + * + * @param regexPattern Regex pattern to match, including handling for line breaks. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param group The regex group to target for entity creation. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the regex pattern. + */ public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findTextRangesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock()) @@ -459,6 +754,16 @@ public class EntityCreationService { } + /** + * Identifies text entities based on a simple regex pattern. + * + * @param regexPattern Regex pattern to match, including handling for line breaks. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param group The regex group to target for entity creation. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the regex pattern. + */ public Stream byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findTextRangesByRegex(regexPattern, group, node.getTextBlock()) @@ -469,6 +774,16 @@ public class EntityCreationService { } + /** + * Identifies text entities based on a simple regex pattern, case-insensitive. + * + * @param regexPattern Regex pattern to match, including handling for line breaks. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param group The regex group to target for entity creation. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the regex pattern. + */ public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) { return RedactionSearchUtility.findTextRangesByRegexIgnoreCase(regexPattern, group, node.getTextBlock()) @@ -479,6 +794,15 @@ public class EntityCreationService { } + /** + * Identifies text entities based on an exact string match within a semantic node's text block. + * + * @param keyword String keyword to search for. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the exact string. + */ public Stream byString(String keyword, String type, EntityType entityType, SemanticNode node) { return RedactionSearchUtility.findTextRangesByString(keyword, node.getTextBlock()) @@ -489,6 +813,15 @@ public class EntityCreationService { } + /** + * Identifies text entities based on an exact string match within a semantic node's text block, case-insensitive. + * + * @param keyword String keyword to search for. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects that match the exact string, case-insensitive. + */ public Stream byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) { return RedactionSearchUtility.findTextRangesByStringIgnoreCase(keyword, node.getTextBlock()) @@ -499,6 +832,14 @@ public class EntityCreationService { } + /** + * Extracts text entities from paragraphs only, within a given semantic node. + * + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node to search within. + * @return A stream of {@link TextEntity} objects extracted from paragraphs only. + */ public Stream bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) { return node.streamAllSubNodesOfType(NodeType.PARAGRAPH) @@ -508,6 +849,14 @@ public class EntityCreationService { } + /** + * Merges consecutive paragraphs into a single text entity within a given semantic node. + * + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node to search within. + * @return A stream of merged {@link TextEntity} objects from consecutive paragraphs. + */ public Stream bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) { return node.streamAllSubNodesOfType(NodeType.PARAGRAPH) @@ -520,6 +869,15 @@ public class EntityCreationService { } + /** + * Creates a text entity immediately following a specified string within a semantic node. + * + * @param string The string after which to create the entity. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @param node The semantic node to search within. + * @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if not found. + */ public Optional semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) { if (!node.containsString(string)) { @@ -530,6 +888,14 @@ public class EntityCreationService { } + /** + * Creates a text entity based on the entire text range of a semantic node. + * + * @param node The semantic node to base the text entity on. + * @param type The type of entity to create. + * @param entityType The entity's classification. + * @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if not valid. + */ public Optional bySemanticNode(SemanticNode node, String type, EntityType entityType) { TextRange textRange = node.getTextBlock().getTextRange(); @@ -544,6 +910,13 @@ public class EntityCreationService { } + /** + * Expands a text entity's start boundary based on a regex pattern match. + * + * @param entity The original text entity to expand. + * @param regexPattern The regex pattern used to find the new start boundary. + * @return An {@link Optional} containing the expanded {@link TextEntity}, or {@link Optional#empty()} if not valid. + */ public Optional byPrefixExpansionRegex(TextEntity entity, String regexPattern) { int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern); @@ -551,6 +924,13 @@ public class EntityCreationService { } + /** + * Expands a text entity's end boundary based on a regex pattern match. + * + * @param entity The original text entity to expand. + * @param regexPattern The regex pattern used to find the new end boundary. + * @return An {@link Optional} containing the expanded {@link TextEntity}, or {@link Optional#empty()} if not valid. + */ public Optional bySuffixExpansionRegex(TextEntity entity, String regexPattern) { int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern); @@ -594,7 +974,7 @@ public class EntityCreationService { throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node)); } TextRange trimmedTextRange = textRange.trim(node.getTextBlock()); - if (trimmedTextRange.length() == 0){ + if (trimmedTextRange.length() == 0) { return Optional.empty(); } TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node); @@ -646,6 +1026,16 @@ public class EntityCreationService { } + /** + * Merges a list of text entities into a single entity, assuming they intersect and are of the same type. + * + * @param entitiesToMerge The list of entities to merge. + * @param type The type for the merged entity. + * @param entityType The entity's classification. + * @param node The semantic node related to these entities. + * @return A single merged {@link TextEntity}. + * @throws IllegalArgumentException If entities do not intersect or have different types. + */ public TextEntity mergeEntitiesOfSameType(List entitiesToMerge, String type, EntityType entityType, SemanticNode node) { if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) { @@ -683,11 +1073,22 @@ public class EntityCreationService { addEntityToGraph(mergedEntity, node); insertToKieSession(mergedEntity); - entitiesToMerge.stream().filter(e -> !e.equals(mergedEntity)).forEach(node.getEntities()::remove); + entitiesToMerge.stream() + .filter(e -> !e.equals(mergedEntity)) + .forEach(node.getEntities()::remove); return mergedEntity; } + /** + * Copies a list of text entities, creating a new entity for each in the list with the same properties. + * + * @param entities The list of entities to copy. + * @param type The type for the copied entities. + * @param entityType The classification for the copied entities. + * @param node The semantic node related to these entities. + * @return A stream of copied {@link TextEntity} objects. + */ public Stream copyEntities(List entities, String type, EntityType entityType, SemanticNode node) { return entities.stream() @@ -695,6 +1096,15 @@ public class EntityCreationService { } + /** + * Copies a single text entity, preserving all its matched rules. + * + * @param entity The entity to copy. + * @param type The type for the copied entity. + * @param entityType The classification for the copied entity. + * @param node The semantic node related to the entity. + * @return A copied {@link TextEntity} with matched rules. + */ public TextEntity copyEntity(TextEntity entity, String type, EntityType entityType, SemanticNode node) { var newEntity = copyEntityWithoutRules(entity, type, entityType, node); @@ -703,6 +1113,15 @@ public class EntityCreationService { } + /** + * Copies a single text entity without its matched rules. + * + * @param entity The entity to copy. + * @param type The type for the copied entity. + * @param entityType The classification for the copied entity. + * @param node The semantic node related to the entity. + * @return A copied {@link TextEntity} without matched rules. + */ public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) { TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException( @@ -714,14 +1133,27 @@ public class EntityCreationService { } - public void insertToKieSession(TextEntity mergedEntity) { + /** + * Inserts a text entity into the kieSession for further processing. + * + * @param textEntity The merged text entity to insert. + */ + public void insertToKieSession(TextEntity textEntity) { if (kieSession != null) { - kieSession.insert(mergedEntity); + kieSession.insert(textEntity); } } + /** + * Creates a text entity based on a Named Entity Recognition (NER) entity. + * + * @param nerEntity The NER entity used for creating the text entity. + * @param entityType The entity's classification. + * @param semanticNode The semantic node related to the NER entity. + * @return A new {@link TextEntity} based on the NER entity. + */ public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException( @@ -729,24 +1161,59 @@ public class EntityCreationService { } + /** + * Creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type. + * + * @param nerEntity The NER entity used for creating the text entity. + * @param type Type of the entity. + * @param entityType The entity's classification. + * @param semanticNode The semantic node related to the NER entity. + * @return A new {@link TextEntity} based on the NER entity. + */ public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException("No entity present!")); } + /** + * Optionally creates a text entity based on a Named Entity Recognition (NER) entity. + * + * @param nerEntity The NER entity used for creating the text entity. + * @param entityType The entity's classification. + * @param semanticNode The semantic node related to the NER entity. + * @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created. + */ public Optional optionalByNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) { return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER)); } + /** + * Optionally creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type. + * + * @param nerEntity The NER entity used for creating the text entity. + * @param type Type of the entity. + * @param entityType The entity's classification. + * @param semanticNode The semantic node related to the NER entity. + * @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created. + */ public Optional optionalByNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) { return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER)); } + /** + * Combines multiple NER entities into a single text entity. + * + * @param nerEntities The collection of NER entities to combine. + * @param type The type for the combined entity. + * @param entityType The classification for the combined entity. + * @param semanticNode The semantic node related to these entities. + * @return A stream of combined {@link TextEntity} objects. + */ public Stream combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) { return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities) @@ -756,12 +1223,25 @@ public class EntityCreationService { } + /** + * Validates if a given text range within a text block represents a valid entity. + * + * @param textBlock The text block containing the text range. + * @param textRange The text range to validate. + * @return true if the text range represents a valid entity, false otherwise. + */ public boolean isValidEntityTextRange(TextBlock textBlock, TextRange textRange) { return textRange.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, textRange); } + /** + * Adds a text entity to its related semantic node and updates the document tree accordingly. + * + * @param entity The text entity to add. + * @param node The semantic node related to the entity. + */ public void addEntityToGraph(TextEntity entity, SemanticNode node) { DocumentTree documentTree = node.getDocumentTree(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java index d8dc287b..094e7e42 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/utils/RedactionSearchUtility.java @@ -18,6 +18,13 @@ import lombok.experimental.UtilityClass; @UtilityClass public class RedactionSearchUtility { + /** + * Checks if any part of a CharSequence matches a given regex pattern. + * + * @param charSequence The CharSequence to be searched. + * @param regexPattern The regex pattern to match against. + * @return true if any part of the CharSequence matches the regex pattern. + */ public static boolean anyMatch(CharSequence charSequence, String regexPattern) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); @@ -25,6 +32,13 @@ public class RedactionSearchUtility { } + /** + * Checks if any part of a CharSequence matches a given regex pattern, case-insensitive. + * + * @param charSequence The CharSequence to be searched. + * @param regexPattern The regex pattern to match against. + * @return true if any part of the CharSequence matches the regex pattern. + */ public static boolean anyMatchIgnoreCase(CharSequence charSequence, String regexPattern) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); @@ -32,24 +46,53 @@ public class RedactionSearchUtility { } + /** + * Checks if the entirety of a CharSequence exactly matches a given regex pattern. + * + * @param charSequence The CharSequence to be matched. + * @param regexPattern The regex pattern to match against. + * @return true if the CharSequence exactly matches the regex pattern. + */ public static boolean exactMatch(CharSequence charSequence, String regexPattern) { return charSequence.toString().matches(regexPattern); } + /** + * Checks if any part of a TextBlock matches a given regex pattern, case-insensitive. + * + * @param textBlock The TextBlock to be searched. + * @param regexPattern The regex pattern to match against. + * @return true if any part of the TextBlock matches the regex pattern. + */ public static boolean anyMatchIgnoreCase(TextBlock textBlock, String regexPattern) { return anyMatchIgnoreCase(textBlock.getSearchText(), regexPattern); } + /** + * Checks if any part of a TextBlock matches a given regex pattern. + * + * @param textBlock The TextBlock to be searched. + * @param regexPattern The regex pattern to match against. + * @return true if any part of the TextBlock matches the regex pattern. + */ public static boolean anyMatch(TextBlock textBlock, String regexPattern) { return anyMatch(textBlock.getSearchText(), regexPattern); } + /** + * Finds the first TextRange in a given CharSequence that matches a regex pattern. + * + * @param regexPattern The regex pattern to match against. + * @param searchText The CharSequence to be searched. + * @return The first TextRange that matches the pattern. + * @throws IllegalArgumentException If no match is found. + */ public static TextRange findFirstTextRange(String regexPattern, CharSequence searchText) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); @@ -61,6 +104,13 @@ public class RedactionSearchUtility { } + /** + * Expands the end boundary of a TextEntity based on a subsequent regex match. + * + * @param entity The entity to expand. + * @param regexPattern The regex pattern used for expansion. + * @return The new end boundary index. + */ public static int getExpandedEndByRegex(TextEntity entity, String regexPattern) { int expandedEnd; @@ -74,6 +124,13 @@ public class RedactionSearchUtility { } + /** + * Expands the start boundary of a TextEntity based on a subsequent regex match. + * + * @param entity The entity to expand. + * @param regexPattern The regex pattern used for expansion. + * @return The new end boundary index. + */ public static int getExpandedStartByRegex(TextEntity entity, String regexPattern) { int expandedStart; @@ -87,6 +144,14 @@ public class RedactionSearchUtility { } + /** + * Identifies all lines within a text block that fall within a specified vertical range. + * + * @param maxY The maximum Y-coordinate of the vertical range. + * @param minY The minimum Y-coordinate of the vertical range. + * @param textBlock The text block containing the lines to be checked. + * @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range. + */ public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) { List lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed() @@ -107,6 +172,13 @@ public class RedactionSearchUtility { } + /** + * Finds TextRanges matching a regex pattern within a TextBlock. + * + * @param regexPattern The regex pattern to match against. + * @param textBlock The TextBlock to search within. + * @return A list of TextRanges corresponding to regex matches. + */ public static List findTextRangesByRegex(String regexPattern, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); @@ -115,6 +187,14 @@ public class RedactionSearchUtility { } + /** + * Finds TextRanges matching a regex pattern within a TextBlock capturing groups. + * + * @param regexPattern The regex pattern to match against. + * @param group The group to capture within the regex pattern. + * @param textBlock The TextBlock to search within. + * @return A list of TextRanges corresponding to regex matches. + */ public static List findTextRangesByRegex(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, false); @@ -122,6 +202,14 @@ public class RedactionSearchUtility { } + /** + * Finds text ranges that match a regex pattern with consideration for line breaks within a text block. + * + * @param regexPattern The regex pattern to search for, allowing for multiline matches. + * @param group The regex pattern group to extract from matches. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects corresponding to the matches found. + */ public static List findTextRangesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false); @@ -129,6 +217,13 @@ public class RedactionSearchUtility { } + /** + * Finds text ranges within a text block that match a given regex pattern, case-insensitive. + * + * @param regexPattern The regex pattern to search for, with case-insensitive matching. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects corresponding to the matches found. + */ public static List findTextRangesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true); @@ -136,6 +231,13 @@ public class RedactionSearchUtility { } + /** + * Finds text ranges within a text block that match a given regex pattern, and case-insensitive. + * + * @param regexPattern The regex pattern to search for. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects corresponding to the group matches found, with case-insensitive matching. + */ public static List findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); @@ -143,6 +245,14 @@ public class RedactionSearchUtility { } + /** + * Finds text ranges within a text block that match a given regex pattern, capturing a specific group, and case-insensitive. + * + * @param regexPattern The regex pattern to search for. + * @param group The group within the regex pattern to capture. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects corresponding to the group matches found, with case-insensitive matching. + */ public static List findTextRangesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) { Pattern pattern = Patterns.getCompiledPattern(regexPattern, true); @@ -173,6 +283,13 @@ public class RedactionSearchUtility { } + /** + * Finds all occurrences of a specified string within a text block and returns their positions as text ranges. + * + * @param searchString The string to search for within the text block. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects representing the start and end positions of each occurrence of the search string. + */ public static List findTextRangesByString(String searchString, TextBlock textBlock) { List boundaries = new LinkedList<>(); @@ -183,6 +300,13 @@ public class RedactionSearchUtility { } + /** + * Finds all occurrences of a specified string within a text block, case-insensitive, and returns their positions as text ranges. + * + * @param searchString The string to search for within the text block, case-insensitively. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects representing the start and end positions of each occurrence of the search string, case-insensitive. + */ public static List findTextRangesByStringIgnoreCase(String searchString, TextBlock textBlock) { Pattern pattern = Pattern.compile(Pattern.quote(searchString), Pattern.CASE_INSENSITIVE); @@ -190,6 +314,13 @@ public class RedactionSearchUtility { } + /** + * Searches a text block for all occurrences of each string in a list and returns their positions as text ranges. + * + * @param searchList A list of strings to search for within the text block. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects representing the start and end positions of occurrences of each string in the list. + */ public static List findTextRangesByList(List searchList, TextBlock textBlock) { List boundaries = new LinkedList<>(); @@ -200,6 +331,13 @@ public class RedactionSearchUtility { } + /** + * Searches a text block for all occurrences of each string in a list, case-insensitive, and returns their positions as text ranges. + * + * @param searchList A list of strings to search for within the text block, case-insensitively. + * @param textBlock The text block to search within. + * @return A list of {@link TextRange} objects representing the start and end positions of occurrences of each string in the list, case-insensitive. + */ public static List findTextRangesByListIgnoreCase(List searchList, TextBlock textBlock) { List boundaries = new LinkedList<>(); diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl index 77b31d02..500db246 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/main/resources/drools/all_rules_documine.drl @@ -12,14 +12,12 @@ import java.util.Collection; import java.util.stream.Stream; import java.util.Optional; -import com.iqser.red.service.redaction.v1.server.model.document.*; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; @@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl index decaa8b9..88b6ac6c 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl +++ b/redaction-service-v1/redaction-service-server-v1/src/test/resources/drools/documine_flora.drl @@ -12,14 +12,12 @@ import java.util.Collection; import java.util.stream.Stream; import java.util.Optional; -import com.iqser.red.service.redaction.v1.server.model.document.*; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; @@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel; diff --git a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl index 77b31d02..500db246 100644 --- a/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl +++ b/redaction-service-v1/rules-management/src/main/resources/all_rules_documine.drl @@ -12,14 +12,12 @@ import java.util.Collection; import java.util.stream.Stream; import java.util.Optional; -import com.iqser.red.service.redaction.v1.server.model.document.*; import com.iqser.red.service.redaction.v1.server.model.document.TextRange; -import com.iqser.red.service.redaction.v1.server.model.document.entity.*; +import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity; import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType; import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule; import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule -import com.iqser.red.service.redaction.v1.server.model.document.nodes.*; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table; import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell; @@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline; import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier; -import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer; import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header; import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.*; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock; -import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock; import com.iqser.red.service.redaction.v1.server.model.NerEntities; import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary; import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;