Merge branch 'RED-8694' into 'master'
RED-8694 - Add Javadoc to classes/methods used in rules Closes RED-8694 See merge request redactmanager/redaction-service!369
This commit is contained in:
commit
e9043c930a
@ -11,6 +11,10 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a collection of named entity recognition (NER) entities.
|
||||
* This class provides methods to manage and query NER entities.
|
||||
*/
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
|
||||
@ -25,6 +29,12 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if there are any entities of a specified type.
|
||||
*
|
||||
* @param type The type of entity to check for.
|
||||
* @return true if there is at least one entity of the specified type, false otherwise.
|
||||
*/
|
||||
public boolean hasEntitiesOfType(String type) {
|
||||
|
||||
return nerEntityList.stream()
|
||||
@ -32,6 +42,12 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a stream of NER entities of a specified type.
|
||||
*
|
||||
* @param type The type of entities to return.
|
||||
* @return a stream of {@link NerEntity} objects of the specified type.
|
||||
*/
|
||||
public Stream<NerEntity> streamEntitiesOfType(String type) {
|
||||
|
||||
return nerEntityList.stream()
|
||||
@ -39,6 +55,9 @@ public class NerEntities {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Represents a single NER entity with its value, text range, and type.
|
||||
*/
|
||||
public record NerEntity(String value, TextRange textRange, String type) {
|
||||
|
||||
}
|
||||
|
||||
@ -23,6 +23,9 @@ import com.iqser.red.service.redaction.v1.server.utils.exception.NotFoundExcepti
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
|
||||
/**
|
||||
* A class representing a dictionary used for redaction processes, containing various dictionary models and their versions.
|
||||
*/
|
||||
@Data
|
||||
public class Dictionary {
|
||||
|
||||
@ -51,6 +54,11 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the dictionary contains local entries.
|
||||
*
|
||||
* @return true if any dictionary model contains local entries, false otherwise.
|
||||
*/
|
||||
public boolean hasLocalEntries() {
|
||||
|
||||
return dictionaryModels.stream()
|
||||
@ -64,6 +72,13 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the {@link DictionaryModel} of a specified type.
|
||||
*
|
||||
* @param type The type of dictionary model to retrieve.
|
||||
* @return The {@link DictionaryModel} of the specified type.
|
||||
* @throws NotFoundException If the specified type is not found in the dictionary.
|
||||
*/
|
||||
public DictionaryModel getType(String type) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
@ -74,6 +89,12 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the dictionary of a specific type is considered a hint.
|
||||
*
|
||||
* @param type The type of dictionary to check.
|
||||
* @return true if the dictionary model is marked as a hint, false otherwise.
|
||||
*/
|
||||
public boolean isHint(String type) {
|
||||
|
||||
DictionaryModel model = localAccessMap.get(type);
|
||||
@ -84,6 +105,12 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the dictionary of a specific type is case-insensitive.
|
||||
*
|
||||
* @param type The type of dictionary to check.
|
||||
* @return true if the dictionary is case-insensitive, false otherwise.
|
||||
*/
|
||||
public boolean isCaseInsensitiveDictionary(String type) {
|
||||
|
||||
DictionaryModel dictionaryModel = localAccessMap.get(type);
|
||||
@ -94,6 +121,18 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a local dictionary entry of a specific type.
|
||||
*
|
||||
* @param type The type of dictionary to add the entry to.
|
||||
* @param value The value of the entry.
|
||||
* @param matchedRules A collection of {@link MatchedRule} associated with the entry.
|
||||
* @param alsoAddLastname Indicates whether to also add the lastname separately as an entry.
|
||||
* @throws IllegalArgumentException If the specified type does not exist within the dictionary, if the type
|
||||
* does not have any local entries defined, or if the provided value is
|
||||
* blank. This ensures that only valid, non-empty entries
|
||||
* are added to the dictionary.
|
||||
*/
|
||||
private void addLocalDictionaryEntry(String type, String value, Collection<MatchedRule> matchedRules, boolean alsoAddLastname) {
|
||||
|
||||
if (value.isBlank()) {
|
||||
@ -133,18 +172,33 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Recommends a text entity for inclusion in every dictionary model without separating the last name.
|
||||
*
|
||||
* @param textEntity The {@link TextEntity} to be recommended.
|
||||
*/
|
||||
public void recommendEverywhere(TextEntity textEntity) {
|
||||
|
||||
addLocalDictionaryEntry(textEntity.type(), textEntity.getValue(), textEntity.getMatchedRuleList(), false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Recommends a text entity for inclusion in every dictionary model with the last name added separately.
|
||||
*
|
||||
* @param textEntity The {@link TextEntity} to be recommended.
|
||||
*/
|
||||
public void recommendEverywhereWithLastNameSeparately(TextEntity textEntity) {
|
||||
|
||||
addLocalDictionaryEntry(textEntity.type(), textEntity.getValue(), textEntity.getMatchedRuleList(), true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds multiple author names contained within a text entity as recommendations in the dictionary.
|
||||
*
|
||||
* @param textEntity The {@link TextEntity} containing author names to be added.
|
||||
*/
|
||||
public void addMultipleAuthorsAsRecommendation(TextEntity textEntity) {
|
||||
|
||||
splitIntoAuthorNames(textEntity).forEach(authorName -> addLocalDictionaryEntry(textEntity.type(), authorName, textEntity.getMatchedRuleList(), true));
|
||||
@ -152,6 +206,12 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Splits a {@link TextEntity} into individual author names based on commas or new lines.
|
||||
*
|
||||
* @param textEntity The {@link TextEntity} to split.
|
||||
* @return A list of strings where each string is an author name.
|
||||
*/
|
||||
public static List<String> splitIntoAuthorNames(TextEntity textEntity) {
|
||||
|
||||
List<String> splitAuthorNames;
|
||||
|
||||
@ -13,6 +13,12 @@ import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRu
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Represents a model of a dictionary containing entries for redaction processes.
|
||||
* It includes various types of entries such as standard entries, false positives,
|
||||
* and false recommendations. Additionally, it manages local entries with matched
|
||||
* rules for enhanced search and matching capabilities.
|
||||
*/
|
||||
@Data
|
||||
@Slf4j
|
||||
public class DictionaryModel implements Serializable {
|
||||
@ -36,6 +42,19 @@ public class DictionaryModel implements Serializable {
|
||||
private transient SearchImplementation localSearch;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new DictionaryModel with specified parameters.
|
||||
*
|
||||
* @param type The type of the dictionary model.
|
||||
* @param rank The rank order of the dictionary model.
|
||||
* @param color An array representing the color associated with this model.
|
||||
* @param caseInsensitive Flag indicating whether the dictionary is case-insensitive.
|
||||
* @param hint Flag indicating whether this model should be used as a hint.
|
||||
* @param entries Set of dictionary entry models representing the entries.
|
||||
* @param falsePositives Set of dictionary entry models representing false positives.
|
||||
* @param falseRecommendations Set of dictionary entry models representing false recommendations.
|
||||
* @param isDossierDictionary Flag indicating whether this model is for a dossier dictionary.
|
||||
*/
|
||||
public DictionaryModel(String type,
|
||||
int rank,
|
||||
float[] color,
|
||||
@ -52,13 +71,17 @@ public class DictionaryModel implements Serializable {
|
||||
this.caseInsensitive = caseInsensitive;
|
||||
this.hint = hint;
|
||||
this.isDossierDictionary = isDossierDictionary;
|
||||
|
||||
this.entries = entries;
|
||||
this.falsePositives = falsePositives;
|
||||
this.falseRecommendations = falseRecommendations;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search implementation for local entries.
|
||||
*
|
||||
* @return The {@link SearchImplementation} for local entries.
|
||||
*/
|
||||
public SearchImplementation getLocalSearch() {
|
||||
|
||||
if (this.localSearch == null || this.localSearch.getValues().size() != this.localEntriesWithMatchedRules.size()) {
|
||||
@ -68,6 +91,11 @@ public class DictionaryModel implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search implementation for non-deleted dictionary entries.
|
||||
*
|
||||
* @return The {@link SearchImplementation} for non-deleted dictionary entries.
|
||||
*/
|
||||
public SearchImplementation getEntriesSearch() {
|
||||
|
||||
if (entriesSearch == null) {
|
||||
@ -80,6 +108,11 @@ public class DictionaryModel implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search implementation for deleted dictionary entries.
|
||||
*
|
||||
* @return The {@link SearchImplementation} for deleted dictionary entries.
|
||||
*/
|
||||
public SearchImplementation getDeletionEntriesSearch() {
|
||||
|
||||
if (deletionEntriesSearch == null) {
|
||||
@ -92,6 +125,11 @@ public class DictionaryModel implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search implementation for non-deleted false positive entries.
|
||||
*
|
||||
* @return The {@link SearchImplementation} for non-deleted false positive entries.
|
||||
*/
|
||||
public SearchImplementation getFalsePositiveSearch() {
|
||||
|
||||
if (falsePositiveSearch == null) {
|
||||
@ -104,6 +142,11 @@ public class DictionaryModel implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the search implementation for non-deleted false recommendation entries.
|
||||
*
|
||||
* @return The {@link SearchImplementation} for non-deleted false recommendation entries.
|
||||
*/
|
||||
public SearchImplementation getFalseRecommendationsSearch() {
|
||||
|
||||
if (falseRecommendationsSearch == null) {
|
||||
@ -116,12 +159,17 @@ public class DictionaryModel implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the matched rules for a given value from the local dictionary entries.
|
||||
* The value is processed based on the case sensitivity of the dictionary.
|
||||
*
|
||||
* @param value The value for which to retrieve the matched rules.
|
||||
* @return A set of {@link MatchedRule} associated with the given value, or null if no rules are found.
|
||||
*/
|
||||
public Set<MatchedRule> getMatchedRulesForLocalDictionaryEntry(String value) {
|
||||
|
||||
var cleanedValue = isCaseInsensitive() ? value.toLowerCase(Locale.US) : value;
|
||||
|
||||
return localEntriesWithMatchedRules.get(cleanedValue);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -11,6 +11,10 @@ import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBl
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.Setter;
|
||||
|
||||
/**
|
||||
* Represents a range of text defined by a start and end index.
|
||||
* Provides functionality to check containment, intersection, and to adjust ranges based on specified conditions.
|
||||
*/
|
||||
@Setter
|
||||
@EqualsAndHashCode
|
||||
@SuppressWarnings("PMD.AvoidFieldNameMatchingMethodName")
|
||||
@ -20,6 +24,13 @@ public class TextRange implements Comparable<TextRange> {
|
||||
private int end;
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a TextRange with specified start and end indexes.
|
||||
*
|
||||
* @param start The starting index of the range.
|
||||
* @param end The ending index of the range.
|
||||
* @throws IllegalArgumentException If start is greater than end.
|
||||
*/
|
||||
public TextRange(int start, int end) {
|
||||
|
||||
if (start > end) {
|
||||
@ -30,6 +41,11 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of the text range.
|
||||
*
|
||||
* @return The length of the range.
|
||||
*/
|
||||
public int length() {
|
||||
|
||||
return end - start;
|
||||
@ -48,18 +64,38 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} fully contains another TextRange.
|
||||
*
|
||||
* @param textRange The {@link TextRange} to check.
|
||||
* @return true if this range contains the specified range, false otherwise.
|
||||
*/
|
||||
public boolean contains(TextRange textRange) {
|
||||
|
||||
return start <= textRange.start() && textRange.end() <= end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} is fully contained by another TextRange.
|
||||
*
|
||||
* @param textRange The {@link TextRange} to check against.
|
||||
* @return true if this range is contained by the specified range, false otherwise.
|
||||
*/
|
||||
public boolean containedBy(TextRange textRange) {
|
||||
|
||||
return textRange.contains(this);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} contains another range specified by start and end indices.
|
||||
*
|
||||
* @param start The starting index of the range to check.
|
||||
* @param end The ending index of the range to check.
|
||||
* @return true if this range fully contains the specified range, false otherwise.
|
||||
* @throws IllegalArgumentException If the start index is greater than the end index.
|
||||
*/
|
||||
public boolean contains(int start, int end) {
|
||||
|
||||
if (start > end) {
|
||||
@ -69,6 +105,14 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} is fully contained within another range specified by start and end indices.
|
||||
*
|
||||
* @param start The starting index of the outer range.
|
||||
* @param end The ending index of the outer range.
|
||||
* @return true if this range is fully contained within the specified range, false otherwise.
|
||||
* @throws IllegalArgumentException If the start index is greater than the end index.
|
||||
*/
|
||||
public boolean containedBy(int start, int end) {
|
||||
|
||||
if (start > end) {
|
||||
@ -78,18 +122,37 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the specified index is within this {@link TextRange}.
|
||||
*
|
||||
* @param index The index to check.
|
||||
* @return true if the index is within the range (inclusive of the start and exclusive of the end), false otherwise.
|
||||
*/
|
||||
public boolean contains(int index) {
|
||||
|
||||
return start <= index && index < end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this {@link TextRange} intersects with another {@link TextRange}.
|
||||
*
|
||||
* @param textRange The {@link TextRange} to check for intersection.
|
||||
* @return true if the ranges intersect, false otherwise.
|
||||
*/
|
||||
public boolean intersects(TextRange textRange) {
|
||||
|
||||
return textRange.start() < this.end && this.start < textRange.end();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Splits this TextRange into multiple ranges based on a list of indices.
|
||||
*
|
||||
* @param splitIndices The indices at which to split the range.
|
||||
* @return A list of TextRanges resulting from the split.
|
||||
* @throws IndexOutOfBoundsException If any split index is outside this TextRange.
|
||||
*/
|
||||
public List<TextRange> split(List<Integer> splitIndices) {
|
||||
|
||||
if (splitIndices.stream()
|
||||
@ -116,6 +179,13 @@ public class TextRange implements Comparable<TextRange> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges a collection of TextRanges into a single Text range encompassing all.
|
||||
*
|
||||
* @param boundaries The collection of TextRanges to merge.
|
||||
* @return A new TextRange covering the entire span of the given ranges.
|
||||
* @throws IllegalArgumentException If boundaries are empty.
|
||||
*/
|
||||
public static TextRange merge(Collection<TextRange> boundaries) {
|
||||
|
||||
int minStart = boundaries.stream()
|
||||
@ -152,16 +222,17 @@ public class TextRange implements Comparable<TextRange> {
|
||||
|
||||
|
||||
/**
|
||||
* shrinks the boundary, such that textBlock.subSequence(boundary) returns a string without trailing or preceding whitespaces.
|
||||
* Shrinks the boundary, such that textBlock.subSequence(boundary) returns a string without trailing or preceding whitespaces.
|
||||
*
|
||||
* @param textBlock TextBlock to check whitespaces against
|
||||
* @return trimmed boundary
|
||||
* @return Trimmed boundary
|
||||
*/
|
||||
public TextRange trim(TextBlock textBlock) {
|
||||
|
||||
if (this.length() == 0) {
|
||||
return this;
|
||||
}
|
||||
|
||||
int trimmedStart = this.start;
|
||||
while (textBlock.containsIndex(trimmedStart) && trimmedStart < end && Character.isWhitespace(textBlock.charAt(trimmedStart))) {
|
||||
trimmedStart++;
|
||||
|
||||
@ -12,27 +12,64 @@ import lombok.NonNull;
|
||||
|
||||
public interface IEntity {
|
||||
|
||||
/**
|
||||
* Gets the list of rules matched against this entity.
|
||||
*
|
||||
* @return A priority queue of matched rules.
|
||||
*/
|
||||
PriorityQueue<MatchedRule> getMatchedRuleList();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the manual overwrite actions applied to this entity, if any.
|
||||
*
|
||||
* @return The manual overwrite details.
|
||||
*/
|
||||
ManualChangeOverwrite getManualOverwrite();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the value of this entity as a string.
|
||||
*
|
||||
* @return The string value.
|
||||
*/
|
||||
String getValue();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the range of text in the document associated with this entity.
|
||||
*
|
||||
* @return The text range.
|
||||
*/
|
||||
TextRange getTextRange();
|
||||
|
||||
|
||||
/**
|
||||
* Gets the type of this entity.
|
||||
*
|
||||
* @return The entity type.
|
||||
*/
|
||||
String type();
|
||||
|
||||
|
||||
/**
|
||||
* Calculates the length of the entity's value.
|
||||
*
|
||||
* @return The length of the value.
|
||||
*/
|
||||
default int length() {
|
||||
|
||||
return value().length();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the value of the entity, considering any manual overwrite.
|
||||
* If no manual overwrite value is found, return the value of the entity or an empty string
|
||||
* if that value is null.
|
||||
*
|
||||
* @return The possibly overwritten value
|
||||
*/
|
||||
default String value() {
|
||||
|
||||
return getManualOverwrite().getValue()
|
||||
@ -40,6 +77,11 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the entity has been applied, considering manual overwrites.
|
||||
*
|
||||
* @return True if applied, false otherwise.
|
||||
*/
|
||||
// Don't use default accessor pattern (e.g. isApplied()), as it might lead to errors in drools due to property-specific optimization of the drools planner.
|
||||
default boolean applied() {
|
||||
|
||||
@ -48,12 +90,22 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the entity has been skipped, based on its applied status.
|
||||
*
|
||||
* @return True if skipped, false otherwise.
|
||||
*/
|
||||
default boolean skipped() {
|
||||
|
||||
return !applied();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the entity has been ignored, considering manual overwrites.
|
||||
*
|
||||
* @return True if ignored, false otherwise.
|
||||
*/
|
||||
default boolean ignored() {
|
||||
|
||||
return getManualOverwrite().getIgnored()
|
||||
@ -61,6 +113,11 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the entity has been removed, considering manual overwrites.
|
||||
*
|
||||
* @return True if removed, false otherwise.
|
||||
*/
|
||||
default boolean removed() {
|
||||
|
||||
return getManualOverwrite().getRemoved()
|
||||
@ -68,6 +125,11 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the entity has been resized, considering manual overwrites.
|
||||
*
|
||||
* @return True if resized, false otherwise.
|
||||
*/
|
||||
default boolean resized() {
|
||||
|
||||
return getManualOverwrite().getResized()
|
||||
@ -75,24 +137,48 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the entity is considered active, based on its removed and ignored status.
|
||||
* An active entry is not removed or ignored.
|
||||
*
|
||||
* @return True if active, false otherwise.
|
||||
*/
|
||||
default boolean active() {
|
||||
|
||||
return !(removed() || ignored());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if there are any manual changes applied to the entity.
|
||||
*
|
||||
* @return True if there are manual changes, false otherwise.
|
||||
*/
|
||||
default boolean hasManualChanges() {
|
||||
|
||||
return !getManualOverwrite().getManualChangeLog().isEmpty();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves a set of references associated with the entity's matched rule.
|
||||
*
|
||||
* @return A set of references.
|
||||
*/
|
||||
default Set<TextEntity> references() {
|
||||
|
||||
return getMatchedRule().getReferences();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Applies a redaction to the entity with a specified legal basis.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being applied.
|
||||
* @param reason The reason for the redaction.
|
||||
* @param legalBasis The legal basis for the redaction, which must not be blank or empty.
|
||||
* @throws IllegalArgumentException If the legal basis is blank or empty.
|
||||
*/
|
||||
default void redact(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
@ -102,36 +188,75 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Applies a rule to the entity with an optional legal basis.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being applied.
|
||||
* @param reason The reason for applying the rule.
|
||||
* @param legalBasis The legal basis for the application, can be a default or unspecified value.
|
||||
*/
|
||||
default void apply(@NonNull String ruleIdentifier, String reason, String legalBasis) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).legalBasis(legalBasis).applied(true).build());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Applies a rule to the entity without specifying a legal basis, which will be replaced by "n-a".
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being applied.
|
||||
* @param reason The reason for applying the rule.
|
||||
*/
|
||||
default void apply(@NonNull String ruleIdentifier, String reason) {
|
||||
|
||||
apply(ruleIdentifier, reason, "n-a");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks the entity as skipped according to a specific rule.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being skipped.
|
||||
* @param reason The reason for skipping the rule.
|
||||
*/
|
||||
default void skip(@NonNull String ruleIdentifier, String reason) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).build());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks the entity as removed according to a specific rule.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule based on which the entity is removed.
|
||||
* @param reason The reason for the removal.
|
||||
*/
|
||||
default void remove(String ruleIdentifier, String reason) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).removed(true).build());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks the entity as ignored according to a specific rule.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule based on which the entity is removed.
|
||||
* @param reason The reason for the removal.
|
||||
*/
|
||||
default void ignore(String ruleIdentifier, String reason) {
|
||||
|
||||
addMatchedRule(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).ignored(true).build());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Applies a rule to the entity, indicating that the value should be written with line breaks.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being applied.
|
||||
* @param reason The reason for the rule application.
|
||||
* @param legalBasis The legal basis for the rule, which must not be empty.
|
||||
* @throws IllegalArgumentException If the legal basis is blank or empty.
|
||||
*/
|
||||
default void applyWithLineBreaks(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
@ -147,6 +272,15 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Applies a rule to the entity with a collection of references.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being applied.
|
||||
* @param reason The reason for the rule application.
|
||||
* @param legalBasis The legal basis for the rule, which must not be empty.
|
||||
* @param references A collection of text entities that are referenced by this rule application.
|
||||
* @throws IllegalArgumentException If the legal basis is blank or empty.
|
||||
*/
|
||||
default void applyWithReferences(@NonNull String ruleIdentifier, String reason, @NonNull String legalBasis, Collection<TextEntity> references) {
|
||||
|
||||
if (legalBasis.isBlank() || legalBasis.isEmpty()) {
|
||||
@ -162,18 +296,35 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks the entity as skipped for a specific rule and associates a collection of references.
|
||||
*
|
||||
* @param ruleIdentifier The identifier of the rule being skipped.
|
||||
* @param reason The reason for skipping the rule.
|
||||
* @param references A collection of text entities that are referenced by the skipped rule.
|
||||
*/
|
||||
default void skipWithReferences(@NonNull String ruleIdentifier, String reason, Collection<TextEntity> references) {
|
||||
|
||||
getMatchedRuleList().add(MatchedRule.builder().ruleIdentifier(RuleIdentifier.fromString(ruleIdentifier)).reason(reason).references(new HashSet<>(references)).build());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a single matched rule to this entity.
|
||||
*
|
||||
* @param matchedRule The matched rule to add.
|
||||
*/
|
||||
default void addMatchedRule(MatchedRule matchedRule) {
|
||||
|
||||
getMatchedRuleList().add(matchedRule);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a collection of matched rules to this entity.
|
||||
*
|
||||
* @param matchedRules The collection of matched rules to add.
|
||||
*/
|
||||
default void addMatchedRules(Collection<MatchedRule> matchedRules) {
|
||||
|
||||
if (getMatchedRuleList().equals(matchedRules)) {
|
||||
@ -183,12 +334,22 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the 'unit' value of the highest priority matched rule.
|
||||
*
|
||||
* @return The unit value of the matched rule.
|
||||
*/
|
||||
default int getMatchedRuleUnit() {
|
||||
|
||||
return getMatchedRule().getRuleIdentifier().unit();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the highest priority matched rule for this entity.
|
||||
*
|
||||
* @return The matched rule.
|
||||
*/
|
||||
default MatchedRule getMatchedRule() {
|
||||
|
||||
if (getMatchedRuleList().isEmpty()) {
|
||||
@ -198,6 +359,11 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Builds a reason string for this entity, incorporating descriptions from manual changes.
|
||||
*
|
||||
* @return The built reason string.
|
||||
*/
|
||||
default String buildReasonWithManualChangeDescriptions() {
|
||||
|
||||
if (getManualOverwrite().getDescriptions().isEmpty()) {
|
||||
@ -210,6 +376,11 @@ public interface IEntity {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the legal basis for the action taken on this entity, considering any manual overwrite.
|
||||
*
|
||||
* @return The legal basis.
|
||||
*/
|
||||
default String legalBasis() {
|
||||
|
||||
return getManualOverwrite().getLegalBasis()
|
||||
|
||||
@ -15,6 +15,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.Getter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a rule that has been matched during the document redaction process.
|
||||
*/
|
||||
@Getter
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@ -42,12 +45,26 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
Set<TextEntity> references = Collections.emptySet();
|
||||
|
||||
|
||||
/**
|
||||
* Creates an empty instance of {@link MatchedRule}.
|
||||
* This can be used as a placeholder or when no rule is actually matched.
|
||||
*
|
||||
* @return An empty {@link MatchedRule} instance.
|
||||
*/
|
||||
public static MatchedRule empty() {
|
||||
|
||||
return MatchedRule.builder().ruleIdentifier(RuleIdentifier.empty()).build();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a modified instance of {@link MatchedRule} based on its applied status.
|
||||
* If the rule has been applied, it returns a new {@link MatchedRule} instance that retains all properties of the original
|
||||
* except for the 'applied' status, which is set to false.
|
||||
* If the rule has not been applied, it returns the original instance.
|
||||
*
|
||||
* @return A {@link MatchedRule} instance with 'applied' set to false.
|
||||
*/
|
||||
public MatchedRule asSkippedIfApplied() {
|
||||
|
||||
if (!this.isApplied()) {
|
||||
@ -63,6 +80,13 @@ public final class MatchedRule implements Comparable<MatchedRule> {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compares this rule with another {@link MatchedRule} to establish a priority order.
|
||||
* The comparison is based on the rule type, unit, and ID, in that order.
|
||||
*
|
||||
* @param matchedRule The {@link MatchedRule} to compare against.
|
||||
* @return A negative integer, zero, or a positive integer as this rule is less than, equal to, or greater than the specified rule.
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(MatchedRule matchedRule) {
|
||||
|
||||
|
||||
@ -24,6 +24,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents the entire document as a node within the document's semantic structure.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@ -63,6 +66,11 @@ public class Document implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the main sections of the document as a list.
|
||||
*
|
||||
* @return A list of main sections within the document.
|
||||
*/
|
||||
public List<Section> getMainSections() {
|
||||
|
||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||
@ -70,6 +78,11 @@ public class Document implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Streams all terminal (leaf) text blocks within the document in their natural order.
|
||||
*
|
||||
* @return A stream of terminal {@link TextBlock}.
|
||||
*/
|
||||
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
|
||||
|
||||
return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock);
|
||||
@ -99,6 +112,11 @@ public class Document implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Streams all nodes within the document, regardless of type, in their natural order.
|
||||
*
|
||||
* @return A stream of all {@link SemanticNode} within the document.
|
||||
*/
|
||||
private Stream<SemanticNode> streamAllNodes() {
|
||||
|
||||
return documentTree.allEntriesInOrder()
|
||||
@ -106,6 +124,11 @@ public class Document implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Streams all image nodes contained within the document.
|
||||
*
|
||||
* @return A stream of {@link Image} nodes.
|
||||
*/
|
||||
public Stream<Image> streamAllImages() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.IMAGE).map(node -> (Image) node);
|
||||
|
||||
@ -19,6 +19,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents the header part of a document page.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
|
||||
@ -20,6 +20,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a headline in a document.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@ -98,12 +101,22 @@ public class Headline implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates an empty headline with no text content.
|
||||
*
|
||||
* @return An empty {@link Headline} instance.
|
||||
*/
|
||||
public static Headline empty() {
|
||||
|
||||
return Headline.builder().leafTextBlock(AtomicTextBlock.empty(-1L, 0, new Page(), -1, null)).build();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this headline is associated with any paragraphs within its parent section or node.
|
||||
*
|
||||
* @return True if there are paragraphs associated with this headline, false otherwise.
|
||||
*/
|
||||
public boolean hasParagraphs() {
|
||||
|
||||
return getParent().streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
|
||||
@ -28,6 +28,10 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
*
|
||||
Represents an image within the document.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
|
||||
@ -17,6 +17,9 @@ import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a single page in a document.
|
||||
*/
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
@ -43,6 +46,11 @@ public class Page {
|
||||
Set<Image> images = new HashSet<>();
|
||||
|
||||
|
||||
/**
|
||||
* Constructs and returns a {@link TextBlock} representing the concatenated text of all leaf semantic nodes in the main body.
|
||||
*
|
||||
* @return The main body text block.
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return mainBody.stream()
|
||||
|
||||
@ -19,6 +19,9 @@ import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.experimental.SuperBuilder;
|
||||
|
||||
/**
|
||||
* Represents a paragraph in the document.
|
||||
*/
|
||||
@Data
|
||||
@SuperBuilder
|
||||
@AllArgsConstructor
|
||||
|
||||
@ -21,6 +21,9 @@ import lombok.RequiredArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* Represents a section within a document, encapsulating both its textual content and semantic structure.
|
||||
*/
|
||||
@Slf4j
|
||||
@Data
|
||||
@Builder
|
||||
@ -51,6 +54,11 @@ public class Section implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if this section contains any tables.
|
||||
*
|
||||
* @return True if the section contains at least one table, false otherwise.
|
||||
*/
|
||||
public boolean hasTables() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.TABLE).findAny()
|
||||
@ -91,12 +99,24 @@ public class Section implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any headline within this section or its sub-nodes contains a given string.
|
||||
*
|
||||
* @param value The string to search for within headlines, case-sensitive.
|
||||
* @return True if at least one headline contains the specified string, false otherwise.
|
||||
*/
|
||||
public boolean anyHeadlineContainsString(String value) {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any headline within this section or its sub-nodes contains a given string, case-insensitive.
|
||||
*
|
||||
* @param value The string to search for within headlines, case-insensitive.
|
||||
* @return True if at least one headline contains the specified string, false otherwise.
|
||||
*/
|
||||
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
|
||||
|
||||
@ -10,6 +10,9 @@ import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a unique identifier for a section within a document.
|
||||
*/
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class SectionIdentifier {
|
||||
@ -28,6 +31,12 @@ public class SectionIdentifier {
|
||||
boolean asChild;
|
||||
|
||||
|
||||
/**
|
||||
* Generates a SectionIdentifier from the headline text of a section, determining its format and structure.
|
||||
*
|
||||
* @param headline The headline text from which to generate the section identifier.
|
||||
* @return A {@link SectionIdentifier} instance corresponding to the headline text.
|
||||
*/
|
||||
public static SectionIdentifier fromSearchText(String headline) {
|
||||
|
||||
if (headline == null || headline.isEmpty() || headline.isBlank()) {
|
||||
@ -43,18 +52,34 @@ public class SectionIdentifier {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks the current section identifier as a child of another section.
|
||||
*
|
||||
* @param sectionIdentifier The parent section identifier.
|
||||
* @return A new {@link SectionIdentifier} instance marked as a child.
|
||||
*/
|
||||
public static SectionIdentifier asChildOf(SectionIdentifier sectionIdentifier) {
|
||||
|
||||
return new SectionIdentifier(sectionIdentifier.format, sectionIdentifier.toString(), sectionIdentifier.identifiers, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generates a SectionIdentifier that represents the entire document.
|
||||
*
|
||||
* @return A {@link SectionIdentifier} with a document-wide scope.
|
||||
*/
|
||||
public static SectionIdentifier document() {
|
||||
|
||||
return new SectionIdentifier(Format.DOCUMENT, "document", Collections.emptyList(), false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generates an empty SectionIdentifier.
|
||||
*
|
||||
* @return An empty {@link SectionIdentifier} instance.
|
||||
*/
|
||||
public static SectionIdentifier empty() {
|
||||
|
||||
return new SectionIdentifier(Format.EMPTY, "empty", Collections.emptyList(), false);
|
||||
@ -109,6 +134,12 @@ public class SectionIdentifier {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the current section is a child of the given section, based on their identifiers.
|
||||
*
|
||||
* @param sectionIdentifier The section identifier to compare against.
|
||||
* @return True if the current section is a child of the given section, false otherwise.
|
||||
*/
|
||||
public boolean isChildOf(SectionIdentifier sectionIdentifier) {
|
||||
|
||||
if (this.format.equals(Format.DOCUMENT) || this.format.equals(Format.EMPTY)) {
|
||||
|
||||
@ -44,11 +44,12 @@ public interface SemanticNode {
|
||||
*/
|
||||
default TextBlock getTextBlock() {
|
||||
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getTextBlock).collect(new TextBlockCollector());
|
||||
return streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getTextBlock)
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose TextRange intersects the TextRange of this node.
|
||||
@ -437,10 +438,10 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings ignoring case.
|
||||
* Checks whether this SemanticNode contains all the provided Strings case-insensitive.
|
||||
*
|
||||
* @param string A String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the string ignoring case
|
||||
* @return true, if this node's TextBlock contains the string case-insensitive
|
||||
*/
|
||||
default boolean containsStringIgnoreCase(String string) {
|
||||
|
||||
@ -449,7 +450,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
* Checks whether this SemanticNode contains any of the provided Strings case-insensitive.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
@ -462,7 +463,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
* Checks whether this SemanticNode contains any of the provided Strings case-insensitive.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
@ -489,7 +490,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains exactly the provided String as a word ignoring case.
|
||||
* Checks whether this SemanticNode contains exactly the provided String as a word case-insensitive.
|
||||
*
|
||||
* @param word - String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains string
|
||||
@ -519,7 +520,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings as a word ignoring case.
|
||||
* Checks whether this SemanticNode contains any of the provided Strings as a word case-insensitive.
|
||||
*
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the provided strings
|
||||
@ -551,7 +552,7 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings as word ignoring case.
|
||||
* Checks whether this SemanticNode contains all the provided Strings as word case-insensitive.
|
||||
*
|
||||
* @param words - A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains all the provided strings
|
||||
@ -580,10 +581,10 @@ public interface SemanticNode {
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode matches the provided regex pattern ignoring case.
|
||||
* Checks whether this SemanticNode matches the provided regex pattern case-insensitive.
|
||||
*
|
||||
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the regex pattern ignoring case
|
||||
* @return true, if this node's TextBlock contains the regex pattern case-insensitive
|
||||
*/
|
||||
default boolean matchesRegexIgnoreCase(String regexPattern) {
|
||||
|
||||
|
||||
@ -26,6 +26,9 @@ import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a table within a document.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
|
||||
@ -20,6 +20,9 @@ import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
/**
|
||||
* Represents a single table cell within a table.
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
|
||||
@ -58,6 +58,12 @@ public class ManualChangesApplicationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Resizes a text entity based on manual resize redaction details.
|
||||
*
|
||||
* @param entityToBeResized The entity to resize.
|
||||
* @param manualResizeRedaction The details of the resize operation.
|
||||
*/
|
||||
public void resize(TextEntity entityToBeResized, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
resizeEntityAndReinsert(entityToBeResized, manualResizeRedaction);
|
||||
@ -140,6 +146,12 @@ public class ManualChangesApplicationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Resizes an image entity based on manual resize redaction instructions.
|
||||
*
|
||||
* @param image The image to resize.
|
||||
* @param manualResizeRedaction The details of the resize operation.
|
||||
*/
|
||||
public void resizeImage(Image image, ManualResizeRedaction manualResizeRedaction) {
|
||||
|
||||
if (manualResizeRedaction.getPositions().isEmpty() || manualResizeRedaction.getPositions() == null) {
|
||||
|
||||
@ -54,6 +54,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, case-sensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -65,6 +76,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, case-insensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -76,6 +98,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the start string in the entity, case-sensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -92,6 +125,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the start string in the entity, case-insensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -108,6 +152,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the end string in the entity, case-sensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -124,6 +179,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the end string in the entity, case-insensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -140,6 +206,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the start and end string in the entity, case-sensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -160,6 +237,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities found between specified start and stop strings, including the start and end string in the entity, case-insensitive.
|
||||
*
|
||||
* @param start The starting string to search for.
|
||||
* @param stop The stopping string to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(start, stop);
|
||||
@ -180,6 +268,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node, case-sensitive.
|
||||
*
|
||||
* @param starts A list of start strings to search for.
|
||||
* @param stops A list of stop strings to search for.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> shortestBetweenAnyString(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(starts, stops);
|
||||
@ -191,6 +290,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node, case-insensitive.
|
||||
*
|
||||
* @param starts A list of start strings to search for.
|
||||
* @param stops A list of stop strings to search for.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which to search.
|
||||
* @return A stream of {@link TextEntity} identified objects.
|
||||
* @throws IllegalArgumentException if both start and stop strings are empty, indicating there's nothing to search for.
|
||||
*/
|
||||
public Stream<TextEntity> shortestBetweenAnyStringIgnoreCase(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(starts, stops);
|
||||
@ -202,6 +312,18 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies the shortest text entities found between any of the given start and stop strings within a specified semantic node,
|
||||
* case-insensitive, with a length limit.
|
||||
*
|
||||
* @param starts A list of start strings to search for, case-insensitively.
|
||||
* @param stops A list of stop strings to search for, case-insensitively.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @param limit The maximum length of the entity text.
|
||||
* @return A stream of {@link TextEntity} objects found between any of the start and stop strings, case-insensitively, and within the specified limit.
|
||||
*/
|
||||
public Stream<TextEntity> shortestBetweenAnyStringIgnoreCase(List<String> starts, List<String> stops, String type, EntityType entityType, SemanticNode node, int limit) {
|
||||
|
||||
checkIfBothStartAndEndAreEmpty(starts, stops);
|
||||
@ -213,6 +335,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities based on the boundaries identified between start and stop regular expressions within a specified semantic node.
|
||||
*
|
||||
* @param regexStart The regular expression defining the start boundary.
|
||||
* @param regexStop The regular expression defining the stop boundary.
|
||||
* @param type The type of entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects identified between the start and stop regular expressions.
|
||||
*/
|
||||
public Stream<TextEntity> betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -223,6 +355,17 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities based on the boundaries identified between start and stop regular expressions within a specified semantic node,
|
||||
* case-insensitive.
|
||||
*
|
||||
* @param regexStart The regular expression defining the start boundary, case-insensitive.
|
||||
* @param regexStop The regular expression defining the stop boundary, case-insensitive.
|
||||
* @param type The type of entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects identified between the start and stop regular expressions, case-insensitively.
|
||||
*/
|
||||
public Stream<TextEntity> betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -233,12 +376,35 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities based on the boundaries identified between specified start and stop text ranges within a semantic node.
|
||||
* This is a more general method that can be used directly with lists of start and stop {@link TextRange} objects.
|
||||
*
|
||||
* @param startBoundaries A list of start text range boundaries.
|
||||
* @param stopBoundaries A list of stop text range boundaries.
|
||||
* @param type The type of entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects identified between the start and stop text ranges.
|
||||
*/
|
||||
public Stream<TextEntity> betweenTextRanges(List<TextRange> startBoundaries, List<TextRange> stopBoundaries, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return betweenTextRanges(startBoundaries, stopBoundaries, type, entityType, node, 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates entities based on the boundaries identified between specified start and stop text ranges within a semantic node,
|
||||
* with an optional length limit for the entities.
|
||||
*
|
||||
* @param startBoundaries A list of start text range boundaries.
|
||||
* @param stopBoundaries A list of stop text range boundaries.
|
||||
* @param type The type of entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @param limit The maximum length of the entity text; use 0 for no limit.
|
||||
* @return A stream of {@link TextEntity} objects identified between the start and stop text ranges, within the specified limit.
|
||||
*/
|
||||
public Stream<TextEntity> betweenTextRanges(List<TextRange> startBoundaries, List<TextRange> stopBoundaries, String type, EntityType entityType, SemanticNode node, int limit) {
|
||||
|
||||
List<TextRange> entityBoundaries = findNonOverlappingBoundariesBetweenBoundariesWithMinimalDistances(startBoundaries, stopBoundaries);
|
||||
@ -283,6 +449,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates text entities based on boundaries identified by a search implementation within a specified semantic node.
|
||||
*
|
||||
* @param searchImplementation The search implementation to use for identifying boundaries.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects corresponding to the identified boundaries.
|
||||
*/
|
||||
public Stream<TextEntity> bySearchImplementation(SearchImplementation searchImplementation, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return searchImplementation.getBoundaries(node.getTextBlock(), node.getTextRange())
|
||||
@ -294,6 +469,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities located immediately after the specified strings within a semantic node.
|
||||
*
|
||||
* @param strings A list of strings to search for. The text immediately following each string is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found immediately after the specified strings.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterStrings(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -308,6 +492,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities located immediately after the specified strings within a semantic node, case-insensitive.
|
||||
*
|
||||
* @param strings A list of strings to search for, case-insensitive. The text immediately following each string is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found immediately after the specified strings, case-insensitively.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterStringsIgnoreCase(List<String> strings, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -322,6 +515,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies a text entity located immediately after a specified string within a semantic node.
|
||||
*
|
||||
* @param string The string to search for. The text immediately following this string is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found immediately after the specified string.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -335,6 +537,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies a text entity located immediately after a specified string within a semantic node, case-insensitive.
|
||||
*
|
||||
* @param string The string to search for, case-insensitive. The text immediately following this string is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param node The semantic node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found immediately after the specified string, case-insensitively.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterStringIgnoreCase(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextBlock textBlock = node.getTextBlock();
|
||||
@ -348,6 +559,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities located immediately after a specified string across table cell columns within a table node.
|
||||
*
|
||||
* @param string The string to search for. The text immediately following this string in subsequent table cells is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param tableNode The table node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found across table cell columns immediately after the specified string.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterStringAcrossColumns(String string, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
@ -359,6 +579,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities located immediately after a specified string across table cell columns within a table node, case-insensitive.
|
||||
*
|
||||
* @param string The string to search for, case-insensitive. The text immediately following this string in subsequent table cells is considered for entity creation.
|
||||
* @param type The type of the entity to be created.
|
||||
* @param entityType The detailed classification of the entity.
|
||||
* @param tableNode The table node within which the search is performed.
|
||||
* @return A stream of {@link TextEntity} objects found across table cell columns immediately after the specified string, case-insensitively.
|
||||
*/
|
||||
public Stream<TextEntity> lineAfterStringAcrossColumnsIgnoreCase(String string, String type, EntityType entityType, Table tableNode) {
|
||||
|
||||
return tableNode.streamTableCells()
|
||||
@ -397,6 +626,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Attempts to create a text entity for text within a semantic node, immediately after a specified string.
|
||||
*
|
||||
* @param semanticNode The semantic node within which to search for the string.
|
||||
* @param string The string after which the entity should be created.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if the string is not found.
|
||||
*/
|
||||
public Optional<TextEntity> semanticNodeAfterString(SemanticNode semanticNode, String string, String type, EntityType entityType) {
|
||||
|
||||
var textBlock = semanticNode.getTextBlock();
|
||||
@ -415,30 +653,77 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on matches to a regular expression pattern within a semantic node's text block,
|
||||
* considering line breaks in the text.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node containing the text block to search.
|
||||
* @return A stream of identified {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaks(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on matches to a regular expression pattern within a semantic node's text block, considering line breaks in the text, case-insensitive.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node containing the text block to search.
|
||||
* @return A stream of identified {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexWithLineBreaksIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on matches to a regular expression pattern within a semantic node's text block.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node containing the text block to search.
|
||||
* @return A stream of identified {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegex(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on matches to a regular expression pattern within a semantic node's text block, case-insensitive.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node containing the text block to search.
|
||||
* @return A stream of identified {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return byRegexIgnoreCase(regexPattern, type, entityType, 0, node);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities within a semantic node's text block based on a regex pattern that includes line breaks.
|
||||
*
|
||||
* @param regexPattern Regex pattern to match, including handling for line breaks.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param group The regex group to target for entity creation.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the regex pattern.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexWithLineBreaks(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByRegexWithLineBreaks(regexPattern, group, node.getTextBlock())
|
||||
@ -449,6 +734,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities within a semantic node's text block based on a regex pattern that includes line breaks, case-insensitive.
|
||||
*
|
||||
* @param regexPattern Regex pattern to match, including handling for line breaks.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param group The regex group to target for entity creation.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the regex pattern.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByRegexWithLineBreaksIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
@ -459,6 +754,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on a simple regex pattern.
|
||||
*
|
||||
* @param regexPattern Regex pattern to match, including handling for line breaks.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param group The regex group to target for entity creation.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the regex pattern.
|
||||
*/
|
||||
public Stream<TextEntity> byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByRegex(regexPattern, group, node.getTextBlock())
|
||||
@ -469,6 +774,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on a simple regex pattern, case-insensitive.
|
||||
*
|
||||
* @param regexPattern Regex pattern to match, including handling for line breaks.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param group The regex group to target for entity creation.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the regex pattern.
|
||||
*/
|
||||
public Stream<TextEntity> byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByRegexIgnoreCase(regexPattern, group, node.getTextBlock())
|
||||
@ -479,6 +794,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on an exact string match within a semantic node's text block.
|
||||
*
|
||||
* @param keyword String keyword to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the exact string.
|
||||
*/
|
||||
public Stream<TextEntity> byString(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByString(keyword, node.getTextBlock())
|
||||
@ -489,6 +813,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies text entities based on an exact string match within a semantic node's text block, case-insensitive.
|
||||
*
|
||||
* @param keyword String keyword to search for.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects that match the exact string, case-insensitive.
|
||||
*/
|
||||
public Stream<TextEntity> byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return RedactionSearchUtility.findTextRangesByStringIgnoreCase(keyword, node.getTextBlock())
|
||||
@ -499,6 +832,14 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts text entities from paragraphs only, within a given semantic node.
|
||||
*
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of {@link TextEntity} objects extracted from paragraphs only.
|
||||
*/
|
||||
public Stream<TextEntity> bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
@ -508,6 +849,14 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges consecutive paragraphs into a single text entity within a given semantic node.
|
||||
*
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node to search within.
|
||||
* @return A stream of merged {@link TextEntity} objects from consecutive paragraphs.
|
||||
*/
|
||||
public Stream<TextEntity> bySemanticNodeParagraphsOnlyMergeConsecutive(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
return node.streamAllSubNodesOfType(NodeType.PARAGRAPH)
|
||||
@ -520,6 +869,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a text entity immediately following a specified string within a semantic node.
|
||||
*
|
||||
* @param string The string after which to create the entity.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node to search within.
|
||||
* @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if not found.
|
||||
*/
|
||||
public Optional<TextEntity> semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!node.containsString(string)) {
|
||||
@ -530,6 +888,14 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a text entity based on the entire text range of a semantic node.
|
||||
*
|
||||
* @param node The semantic node to base the text entity on.
|
||||
* @param type The type of entity to create.
|
||||
* @param entityType The entity's classification.
|
||||
* @return An {@link Optional} containing the created {@link TextEntity}, or {@link Optional#empty()} if not valid.
|
||||
*/
|
||||
public Optional<TextEntity> bySemanticNode(SemanticNode node, String type, EntityType entityType) {
|
||||
|
||||
TextRange textRange = node.getTextBlock().getTextRange();
|
||||
@ -544,6 +910,13 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands a text entity's start boundary based on a regex pattern match.
|
||||
*
|
||||
* @param entity The original text entity to expand.
|
||||
* @param regexPattern The regex pattern used to find the new start boundary.
|
||||
* @return An {@link Optional} containing the expanded {@link TextEntity}, or {@link Optional#empty()} if not valid.
|
||||
*/
|
||||
public Optional<TextEntity> byPrefixExpansionRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart = RedactionSearchUtility.getExpandedStartByRegex(entity, regexPattern);
|
||||
@ -551,6 +924,13 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands a text entity's end boundary based on a regex pattern match.
|
||||
*
|
||||
* @param entity The original text entity to expand.
|
||||
* @param regexPattern The regex pattern used to find the new end boundary.
|
||||
* @return An {@link Optional} containing the expanded {@link TextEntity}, or {@link Optional#empty()} if not valid.
|
||||
*/
|
||||
public Optional<TextEntity> bySuffixExpansionRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd = RedactionSearchUtility.getExpandedEndByRegex(entity, regexPattern);
|
||||
@ -594,7 +974,7 @@ public class EntityCreationService {
|
||||
throw new IllegalArgumentException(String.format("%s is not in the %s of the provided semantic node %s", textRange, node.getTextRange(), node));
|
||||
}
|
||||
TextRange trimmedTextRange = textRange.trim(node.getTextBlock());
|
||||
if (trimmedTextRange.length() == 0){
|
||||
if (trimmedTextRange.length() == 0) {
|
||||
return Optional.empty();
|
||||
}
|
||||
TextEntity entity = TextEntity.initialEntityNode(trimmedTextRange, type, entityType, node);
|
||||
@ -646,6 +1026,16 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges a list of text entities into a single entity, assuming they intersect and are of the same type.
|
||||
*
|
||||
* @param entitiesToMerge The list of entities to merge.
|
||||
* @param type The type for the merged entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param node The semantic node related to these entities.
|
||||
* @return A single merged {@link TextEntity}.
|
||||
* @throws IllegalArgumentException If entities do not intersect or have different types.
|
||||
*/
|
||||
public TextEntity mergeEntitiesOfSameType(List<TextEntity> entitiesToMerge, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
if (!allEntitiesIntersectAndHaveSameTypes(entitiesToMerge)) {
|
||||
@ -683,11 +1073,22 @@ public class EntityCreationService {
|
||||
addEntityToGraph(mergedEntity, node);
|
||||
insertToKieSession(mergedEntity);
|
||||
|
||||
entitiesToMerge.stream().filter(e -> !e.equals(mergedEntity)).forEach(node.getEntities()::remove);
|
||||
entitiesToMerge.stream()
|
||||
.filter(e -> !e.equals(mergedEntity))
|
||||
.forEach(node.getEntities()::remove);
|
||||
return mergedEntity;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Copies a list of text entities, creating a new entity for each in the list with the same properties.
|
||||
*
|
||||
* @param entities The list of entities to copy.
|
||||
* @param type The type for the copied entities.
|
||||
* @param entityType The classification for the copied entities.
|
||||
* @param node The semantic node related to these entities.
|
||||
* @return A stream of copied {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> copyEntities(List<TextEntity> entities, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
return entities.stream()
|
||||
@ -695,6 +1096,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Copies a single text entity, preserving all its matched rules.
|
||||
*
|
||||
* @param entity The entity to copy.
|
||||
* @param type The type for the copied entity.
|
||||
* @param entityType The classification for the copied entity.
|
||||
* @param node The semantic node related to the entity.
|
||||
* @return A copied {@link TextEntity} with matched rules.
|
||||
*/
|
||||
public TextEntity copyEntity(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
var newEntity = copyEntityWithoutRules(entity, type, entityType, node);
|
||||
@ -703,6 +1113,15 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Copies a single text entity without its matched rules.
|
||||
*
|
||||
* @param entity The entity to copy.
|
||||
* @param type The type for the copied entity.
|
||||
* @param entityType The classification for the copied entity.
|
||||
* @param node The semantic node related to the entity.
|
||||
* @return A copied {@link TextEntity} without matched rules.
|
||||
*/
|
||||
public TextEntity copyEntityWithoutRules(TextEntity entity, String type, EntityType entityType, SemanticNode node) {
|
||||
|
||||
TextEntity newEntity = byTextRangeWithEngine(entity.getTextRange(), type, entityType, node, entity.getEngines()).orElseThrow(() -> new NotFoundException(
|
||||
@ -714,14 +1133,27 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
public void insertToKieSession(TextEntity mergedEntity) {
|
||||
/**
|
||||
* Inserts a text entity into the kieSession for further processing.
|
||||
*
|
||||
* @param textEntity The merged text entity to insert.
|
||||
*/
|
||||
public void insertToKieSession(TextEntity textEntity) {
|
||||
|
||||
if (kieSession != null) {
|
||||
kieSession.insert(mergedEntity);
|
||||
kieSession.insert(textEntity);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a text entity based on a Named Entity Recognition (NER) entity.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return A new {@link TextEntity} based on the NER entity.
|
||||
*/
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException(
|
||||
@ -729,24 +1161,59 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param type Type of the entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return A new {@link TextEntity} based on the NER entity.
|
||||
*/
|
||||
public TextEntity byNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER)).orElseThrow(() -> new NotFoundException("No entity present!"));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
|
||||
*/
|
||||
public Optional<TextEntity> optionalByNerEntity(NerEntities.NerEntity nerEntity, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), nerEntity.type(), entityType, semanticNode, Set.of(Engine.NER));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Optionally creates a text entity based on a Named Entity Recognition (NER) entity, with a specified type.
|
||||
*
|
||||
* @param nerEntity The NER entity used for creating the text entity.
|
||||
* @param type Type of the entity.
|
||||
* @param entityType The entity's classification.
|
||||
* @param semanticNode The semantic node related to the NER entity.
|
||||
* @return An {@link Optional} containing the new {@link TextEntity} based on the NER entity, or {@link Optional#empty()} if not created.
|
||||
*/
|
||||
public Optional<TextEntity> optionalByNerEntity(NerEntities.NerEntity nerEntity, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return byTextRangeWithEngine(nerEntity.textRange(), type, entityType, semanticNode, Set.of(Engine.NER));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines multiple NER entities into a single text entity.
|
||||
*
|
||||
* @param nerEntities The collection of NER entities to combine.
|
||||
* @param type The type for the combined entity.
|
||||
* @param entityType The classification for the combined entity.
|
||||
* @param semanticNode The semantic node related to these entities.
|
||||
* @return A stream of combined {@link TextEntity} objects.
|
||||
*/
|
||||
public Stream<TextEntity> combineNerEntitiesToCbiAddressDefaults(NerEntities nerEntities, String type, EntityType entityType, SemanticNode semanticNode) {
|
||||
|
||||
return NerEntitiesAdapter.combineNerEntitiesToCbiAddressDefaults(nerEntities)
|
||||
@ -756,12 +1223,25 @@ public class EntityCreationService {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Validates if a given text range within a text block represents a valid entity.
|
||||
*
|
||||
* @param textBlock The text block containing the text range.
|
||||
* @param textRange The text range to validate.
|
||||
* @return true if the text range represents a valid entity, false otherwise.
|
||||
*/
|
||||
public boolean isValidEntityTextRange(TextBlock textBlock, TextRange textRange) {
|
||||
|
||||
return textRange.length() > 0 && boundaryIsSurroundedBySeparators(textBlock, textRange);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a text entity to its related semantic node and updates the document tree accordingly.
|
||||
*
|
||||
* @param entity The text entity to add.
|
||||
* @param node The semantic node related to the entity.
|
||||
*/
|
||||
public void addEntityToGraph(TextEntity entity, SemanticNode node) {
|
||||
|
||||
DocumentTree documentTree = node.getDocumentTree();
|
||||
|
||||
@ -18,6 +18,13 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class RedactionSearchUtility {
|
||||
|
||||
/**
|
||||
* Checks if any part of a CharSequence matches a given regex pattern.
|
||||
*
|
||||
* @param charSequence The CharSequence to be searched.
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @return true if any part of the CharSequence matches the regex pattern.
|
||||
*/
|
||||
public static boolean anyMatch(CharSequence charSequence, String regexPattern) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
@ -25,6 +32,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any part of a CharSequence matches a given regex pattern, case-insensitive.
|
||||
*
|
||||
* @param charSequence The CharSequence to be searched.
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @return true if any part of the CharSequence matches the regex pattern.
|
||||
*/
|
||||
public static boolean anyMatchIgnoreCase(CharSequence charSequence, String regexPattern) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
|
||||
@ -32,24 +46,53 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if the entirety of a CharSequence exactly matches a given regex pattern.
|
||||
*
|
||||
* @param charSequence The CharSequence to be matched.
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @return true if the CharSequence exactly matches the regex pattern.
|
||||
*/
|
||||
public static boolean exactMatch(CharSequence charSequence, String regexPattern) {
|
||||
|
||||
return charSequence.toString().matches(regexPattern);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any part of a TextBlock matches a given regex pattern, case-insensitive.
|
||||
*
|
||||
* @param textBlock The TextBlock to be searched.
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @return true if any part of the TextBlock matches the regex pattern.
|
||||
*/
|
||||
public static boolean anyMatchIgnoreCase(TextBlock textBlock, String regexPattern) {
|
||||
|
||||
return anyMatchIgnoreCase(textBlock.getSearchText(), regexPattern);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks if any part of a TextBlock matches a given regex pattern.
|
||||
*
|
||||
* @param textBlock The TextBlock to be searched.
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @return true if any part of the TextBlock matches the regex pattern.
|
||||
*/
|
||||
public static boolean anyMatch(TextBlock textBlock, String regexPattern) {
|
||||
|
||||
return anyMatch(textBlock.getSearchText(), regexPattern);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds the first TextRange in a given CharSequence that matches a regex pattern.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @param searchText The CharSequence to be searched.
|
||||
* @return The first TextRange that matches the pattern.
|
||||
* @throws IllegalArgumentException If no match is found.
|
||||
*/
|
||||
public static TextRange findFirstTextRange(String regexPattern, CharSequence searchText) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
@ -61,6 +104,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands the end boundary of a TextEntity based on a subsequent regex match.
|
||||
*
|
||||
* @param entity The entity to expand.
|
||||
* @param regexPattern The regex pattern used for expansion.
|
||||
* @return The new end boundary index.
|
||||
*/
|
||||
public static int getExpandedEndByRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedEnd;
|
||||
@ -74,6 +124,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands the start boundary of a TextEntity based on a subsequent regex match.
|
||||
*
|
||||
* @param entity The entity to expand.
|
||||
* @param regexPattern The regex pattern used for expansion.
|
||||
* @return The new end boundary index.
|
||||
*/
|
||||
public static int getExpandedStartByRegex(TextEntity entity, String regexPattern) {
|
||||
|
||||
int expandedStart;
|
||||
@ -87,6 +144,14 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Identifies all lines within a text block that fall within a specified vertical range.
|
||||
*
|
||||
* @param maxY The maximum Y-coordinate of the vertical range.
|
||||
* @param minY The minimum Y-coordinate of the vertical range.
|
||||
* @param textBlock The text block containing the lines to be checked.
|
||||
* @return A {@link TextRange} encompassing all lines within the specified Y-coordinate range.
|
||||
*/
|
||||
public static TextRange findTextRangesOfAllLinesInYRange(double maxY, double minY, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> lineBoundaries = IntStream.range(0, textBlock.numberOfLines()).boxed()
|
||||
@ -107,6 +172,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds TextRanges matching a regex pattern within a TextBlock.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @param textBlock The TextBlock to search within.
|
||||
* @return A list of TextRanges corresponding to regex matches.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegex(String regexPattern, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
@ -115,6 +187,14 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds TextRanges matching a regex pattern within a TextBlock capturing groups.
|
||||
*
|
||||
* @param regexPattern The regex pattern to match against.
|
||||
* @param group The group to capture within the regex pattern.
|
||||
* @param textBlock The TextBlock to search within.
|
||||
* @return A list of TextRanges corresponding to regex matches.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegex(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, false);
|
||||
@ -122,6 +202,14 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds text ranges that match a regex pattern with consideration for line breaks within a text block.
|
||||
*
|
||||
* @param regexPattern The regex pattern to search for, allowing for multiline matches.
|
||||
* @param group The regex pattern group to extract from matches.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects corresponding to the matches found.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegexWithLineBreaks(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, false);
|
||||
@ -129,6 +217,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds text ranges within a text block that match a given regex pattern, case-insensitive.
|
||||
*
|
||||
* @param regexPattern The regex pattern to search for, with case-insensitive matching.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects corresponding to the matches found.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegexWithLineBreaksIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledMultilinePattern(regexPattern, true);
|
||||
@ -136,6 +231,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds text ranges within a text block that match a given regex pattern, and case-insensitive.
|
||||
*
|
||||
* @param regexPattern The regex pattern to search for.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects corresponding to the group matches found, with case-insensitive matching.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegexIgnoreCase(String regexPattern, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
|
||||
@ -143,6 +245,14 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds text ranges within a text block that match a given regex pattern, capturing a specific group, and case-insensitive.
|
||||
*
|
||||
* @param regexPattern The regex pattern to search for.
|
||||
* @param group The group within the regex pattern to capture.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects corresponding to the group matches found, with case-insensitive matching.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByRegexIgnoreCase(String regexPattern, int group, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Patterns.getCompiledPattern(regexPattern, true);
|
||||
@ -173,6 +283,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds all occurrences of a specified string within a text block and returns their positions as text ranges.
|
||||
*
|
||||
* @param searchString The string to search for within the text block.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects representing the start and end positions of each occurrence of the search string.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByString(String searchString, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
@ -183,6 +300,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds all occurrences of a specified string within a text block, case-insensitive, and returns their positions as text ranges.
|
||||
*
|
||||
* @param searchString The string to search for within the text block, case-insensitively.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects representing the start and end positions of each occurrence of the search string, case-insensitive.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByStringIgnoreCase(String searchString, TextBlock textBlock) {
|
||||
|
||||
Pattern pattern = Pattern.compile(Pattern.quote(searchString), Pattern.CASE_INSENSITIVE);
|
||||
@ -190,6 +314,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Searches a text block for all occurrences of each string in a list and returns their positions as text ranges.
|
||||
*
|
||||
* @param searchList A list of strings to search for within the text block.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects representing the start and end positions of occurrences of each string in the list.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByList(List<String> searchList, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
@ -200,6 +331,13 @@ public class RedactionSearchUtility {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Searches a text block for all occurrences of each string in a list, case-insensitive, and returns their positions as text ranges.
|
||||
*
|
||||
* @param searchList A list of strings to search for within the text block, case-insensitively.
|
||||
* @param textBlock The text block to search within.
|
||||
* @return A list of {@link TextRange} objects representing the start and end positions of occurrences of each string in the list, case-insensitive.
|
||||
*/
|
||||
public static List<TextRange> findTextRangesByListIgnoreCase(List<String> searchList, TextBlock textBlock) {
|
||||
|
||||
List<TextRange> boundaries = new LinkedList<>();
|
||||
|
||||
@ -12,14 +12,12 @@ import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
||||
|
||||
@ -12,14 +12,12 @@ import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
||||
|
||||
@ -12,14 +12,12 @@ import java.util.Collection;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.TextRange;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.IEntity;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.EntityType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.TextEntity
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.entity.MatchedRule
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
@ -31,14 +29,8 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.ImageType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Page;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SectionIdentifier;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Footer;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Header;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.NodeType;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.*;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.TextBlockCollector;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.AtomicTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.textblock.ConcatenatedTextBlock;
|
||||
import com.iqser.red.service.redaction.v1.server.model.NerEntities;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.Dictionary;
|
||||
import com.iqser.red.service.redaction.v1.server.model.dictionary.DictionaryModel;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user