/** * Searches the provided SemanticNode for the keyword and creates an Entity for each occurrence. * @param keyword the string to search for * @param type The type of the RedactionEntity to be created * @param entityType The EntityType of the RedactionEntity to be created * @param node The SemanticNode to search in * @return A Stream of RedactionEntities with the keyword as value, the type as type and the provided EntityType */ public Stream byString(String keyword, String type, EntityType entityType, SemanticNode node) /** * Same as byString, but case insensitive. */ public Stream byStringIgnoreCase(String keyword, String type, EntityType entityType, SemanticNode node) /** * Searches the provided SemanticNode with the regexPattern and creates a new RedactionEntity with the provided group for each occurrence. * @param regexPattern The regexPattern * @param type The type of the RedactionEntity to be created * @param entityType The EntityType of the RedactionEntity to be created * @param group the regexPattern group, that should be the entity * @param node The SemanticNode to search in * @return A Stream of RedactionEntities with the keyword as value, the type as type and the provided EntityType */ public Stream byRegex(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) /** * Same as byRegex, but case insensitive. */ public Stream byRegexIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) /** * Same as byRegex, but can handle patterns with linebreaks. */ public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) /** * Same as byRegexWithLineBreaks, but case insensitive. */ public Stream byRegexWithLineBreaksIgnoreCase(String regexPattern, String type, EntityType entityType, int group, SemanticNode node) /** * Finds the provided string, and creates a new RedactionEntity from the text after until the end of the line it is found in. * @param string The keyword to search for * @param type The type of the RedactionEntity to be created * @param entityType The EntityType of the RedactionEntity to be created * @param node The SemanticNode to search in * @return A Stream of RedactionEntities with the keyword as value, the type as type and the provided EntityType */ public Stream lineAfterString(String string, String type, EntityType entityType, SemanticNode node) /** * Same as lineAfterString, but with multiple keywords */ public Stream lineAfterStrings(List strings, String type, EntityType entityType, SemanticNode node) /** * Finds the provided string in a TableCell, and creates a new RedactionEntity in the same line but adjacent table cells to the right. * @param string The keyword to search for * @param type The type of the RedactionEntity to be created * @param entityType The EntityType of the RedactionEntity to be created * @param table The TableNode to search in * @return A Stream of RedactionEntities with the keyword as value, the type as type and the provided EntityType */ public Stream lineAfterStringAcrossColumns(String string, String type, EntityType entityType, TableNode table) /** * Creates a redaction entity based on the given boundary, type, entity type, and semantic node. * * @param boundary The boundary of the redaction entity. * @param type The type of the redaction entity. * @param entityType The entity type of the redaction entity. * @param node The semantic node where the boundary is. * @return An Optional containing the new redaction entity. */ public Optional byBoundary(Boundary boundary, String type, EntityType entityType, SemanticNode node) /** * Creates new RedactionEntities between the provided start and stop boundaries. The start and stop boundaries are excluded. * If any boundaries of the new RedactionEntities overlap, only the shortest boundary will be used. * @param startBoundaries List of start boundaries * @param stopBoundaries List of stop boundaries * @param type The type of the redaction entity. * @param entityType The entity type of the redaction entity. * @param node The semantic node where the boundaries are. * @return A Stream of new RedactionEntities between the start and stop boundaries */ public Stream betweenBoundaries(List startBoundaries, List stopBoundaries, String type, EntityType entityType, SemanticNode node) /** * Same as betweenBoundaries, but it creates the start and stop boundaries by performing a text search on the provided SemanticNode. */ public Stream betweenStrings(String start, String stop, String type, EntityType entityType, SemanticNode node) /** * Same as betweenStrings, but case insensitive. */ public Stream betweenStringsIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) /** * These 6 functions work the same as betweenStrings, but they also include the start and/or stop strings or are case insensitive, depending on their name. */ public Stream betweenStringsIncludeStart(String start, String stop, String type, EntityType entityType, SemanticNode node) public Stream betweenStringsIncludeStartIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) public Stream betweenStringsIncludeEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) public Stream betweenStringsIncludeEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) public Stream betweenStringsIncludeStartAndEnd(String start, String stop, String type, EntityType entityType, SemanticNode node) public Stream betweenStringsIncludeStartAndEndIgnoreCase(String start, String stop, String type, EntityType entityType, SemanticNode node) /** * Same as betweenBoundaries, but it creates the start and stop boundaries by performing a regex search on the provided SemanticNode. */ public Stream betweenRegexes(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) /** * Same as betweenRegexes, but case insensitive. */ public Stream betweenRegexesIgnoreCase(String regexStart, String regexStop, String type, EntityType entityType, SemanticNode node) /** * Creates a new RedactionEntity which has the same boundary as the provided SemanticNode. * @param node The SemanticNode to create a new RedactionEntity from. * @param type The type of the redaction entity. * @param entityType The entity type of the redaction entity. * @return An optional RedactionEntity. Is empty, if the provided SemanticNode is empty. */ public Optional bySemanticNode(SemanticNode node, String type, EntityType entityType) /** * Same as bySemanticNode, but ignores the SemanticNode, if its not a Paragraph and all its child SemanticNodes, that are not Paragraphs. */ public Stream bySemanticNodeParagraphsOnly(SemanticNode node, String type, EntityType entityType) /** * Searches the provided SemanticNode for the provided string, and creates a new RedactionEntity, from the end of the first occurrence of the string until the end of the SemanticNode. * @param string The string to search for * @param type The type of the redaction entity. * @param entityType The entity type of the redaction entity. * @param node The SemanticNode to use and search in * @return An optional RedactionEntity, is empty, if the SemanticNode is empty, or the string isn't found in the SemanticNode. */ public Optional semanticNodeAfterString(String string, String type, EntityType entityType, SemanticNode node)