proompting
This commit is contained in:
parent
12fcc6ca6d
commit
b2621fb4b3
18
drools-prompt/Page_doc
Normal file
18
drools-prompt/Page_doc
Normal file
@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Retrieves the main body text block.
|
||||
*
|
||||
* @return The text block representing the main body of the document.
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock()
|
||||
/**
|
||||
* Gets all Entities located on the page
|
||||
*
|
||||
* @return Set of all Entities associated with this Page
|
||||
*/
|
||||
Set<RedactionEntity> getEntities();
|
||||
/**
|
||||
* Returns the Page Number
|
||||
*
|
||||
* @return The number of this page
|
||||
*/
|
||||
Integer getPageNumber();
|
||||
6
drools-prompt/Section_doc
Normal file
6
drools-prompt/Section_doc
Normal file
@ -0,0 +1,6 @@
|
||||
/**
|
||||
* Determines whether this Section has any tables.
|
||||
*
|
||||
* @return {@code true} if there are tables, {@code false} otherwise
|
||||
*/
|
||||
public boolean hasTables()
|
||||
206
drools-prompt/SemanticNode_doc
Normal file
206
drools-prompt/SemanticNode_doc
Normal file
@ -0,0 +1,206 @@
|
||||
|
||||
/**
|
||||
* Returns the type of this node, such as NodeType.SECTION, NodeType.PARAGRAPH, etc.
|
||||
*
|
||||
* @return NodeType of this node
|
||||
*/
|
||||
NodeType getType();
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose boundary intersects the boundary of this node.
|
||||
* The Entities might overlap with the Entities in other Sets
|
||||
*
|
||||
* @return Set of all Entities associated with this Node
|
||||
*/
|
||||
Set<RedactionEntity> getEntities();
|
||||
|
||||
/**
|
||||
* Returns all Pages this SemanticNode is associated with.
|
||||
*
|
||||
* @return Set of Pages this node appears on.
|
||||
*/
|
||||
Set<Page> getPages()
|
||||
|
||||
/**
|
||||
* Checks if this node appears on the specified page number.
|
||||
*
|
||||
* @param pageNumber The page number to check.
|
||||
* @return True if this node is found on the specified page number, false otherwise.
|
||||
*/
|
||||
boolean isOnPage(int pageNumber)
|
||||
|
||||
/**
|
||||
* Returns the closest Headline associated with this SemanticNode
|
||||
*
|
||||
* @return First Headline found.
|
||||
*/
|
||||
Headline getHeadline()
|
||||
|
||||
/**
|
||||
* @return The SemanticNode representing the Parent in the DocumentTree
|
||||
* throws NotFoundException, when no parent is present
|
||||
*/
|
||||
SemanticNode getParent()
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has any Entity of the provided type.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type string representing the type of entity to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of the provided type
|
||||
*/
|
||||
boolean hasEntitiesOfType(String type)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has any Entity of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types an array of strings representing the types of entities to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of any of the provided types
|
||||
*/
|
||||
boolean hasEntitiesOfAnyType(String... types)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has at least one Entity of each of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types an array of strings representing the types of entities to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of each of the provided types
|
||||
*/
|
||||
boolean hasEntitiesOfAllTypes(String... types)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type string representing the type of entities to return
|
||||
* @return List of RedactionEntities of any the type
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(String type)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities of any provided type
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types.
|
||||
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities that match any of the provided types
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(String... types)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains the provided String.
|
||||
*
|
||||
* @param string A String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the string
|
||||
*/
|
||||
boolean containsString(String string)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains all strings
|
||||
*/
|
||||
boolean containsAllStrings(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings.
|
||||
*
|
||||
* @param strings A List of Strings to check if they are contained in the TextBlock
|
||||
* @return true, if this node's TextBlock contains any of the provided strings
|
||||
*/
|
||||
boolean containsAnyString(String... strings)
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings ignoring case.
|
||||
*
|
||||
* @param string A String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the string ignoring case
|
||||
*/
|
||||
boolean containsStringIgnoreCase(String string)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
*/
|
||||
boolean containsAnyStringIgnoreCase(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
*/
|
||||
boolean containsAllStringsIgnoreCase(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode matches the provided regex pattern.
|
||||
*
|
||||
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the regex pattern
|
||||
*/
|
||||
boolean matchesRegex(String regexPattern)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode matches the provided regex pattern ignoring case.
|
||||
*
|
||||
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the regex pattern ignoring case
|
||||
*/
|
||||
boolean matchesRegexIgnoreCase(String regexPattern)
|
||||
|
||||
/**
|
||||
* Streams all children located directly underneath this node in the DocumentTree.
|
||||
*
|
||||
* @return Stream of all children
|
||||
*/
|
||||
Stream<SemanticNode> streamChildren()
|
||||
|
||||
/**
|
||||
* Streams all children located directly underneath this node in the DocumentTree of the provided type.
|
||||
*
|
||||
* @param nodeType the type of nodes to stream
|
||||
* @return Stream of all children of the provided type
|
||||
*/
|
||||
Stream<SemanticNode> streamChildrenOfType(NodeType nodeType)
|
||||
|
||||
/**
|
||||
* Recursively streams all SemanticNodes located underneath this node in the DocumentTree in order.
|
||||
*
|
||||
* @return Stream of all SubNodes
|
||||
*/
|
||||
Stream<SemanticNode> streamAllSubNodes()
|
||||
|
||||
/**
|
||||
* Recursively streams all SemanticNodes of a specified type located underneath this node in the DocumentTree in order.
|
||||
*
|
||||
* @param nodeType the type of nodes to be streamed
|
||||
* @return a Stream of all SubNodes of the specified type
|
||||
*/
|
||||
Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType)
|
||||
|
||||
/**
|
||||
* The Boundary is the start and end string offsets in the reading order of the document.
|
||||
*
|
||||
* @return Boundary of this Node's TextBlock
|
||||
*/
|
||||
Boundary getBoundary()
|
||||
|
||||
/**
|
||||
* The SectionIdentifier uses the numeric identifiers of Headlines to infer a tree structure.
|
||||
* It implements functions such as sectionIdentifier.isChildOf(otherSectionIdentifier) and sectionIdentifier.isParentOf(otherSectionIdentifier)
|
||||
*
|
||||
* @return The SectionIdentifier from the first Headline.
|
||||
*/
|
||||
SectionIdentifier getSectionIdentifier()
|
||||
86
drools-prompt/Table_doc
Normal file
86
drools-prompt/Table_doc
Normal file
@ -0,0 +1,86 @@
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains any of the provided strings.
|
||||
*
|
||||
* @param strings Strings to check whether a row contains them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings)
|
||||
|
||||
/**
|
||||
* Checks whether the specified row contains all the provided strings.
|
||||
*
|
||||
* @param row the row to check as an Integer, must be smaller than numberOfRows
|
||||
* @param strings a list of strings to check for
|
||||
* @return true, if all strings appear in the provided row
|
||||
*/
|
||||
boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings)
|
||||
|
||||
/**
|
||||
* Streams all entities which appear in a row where at least one cell has the provided header and the provided value.
|
||||
*
|
||||
* @param header the header value to search for
|
||||
* @param value the string which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value)
|
||||
|
||||
/**
|
||||
* Streams all entities which appear in a row where at least one cell has the provided header and any provided value.
|
||||
*
|
||||
* @param header the header value to search for
|
||||
* @param values the strings which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values)
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains no entity of any of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Streams all TableCells in this Table which have the provided header row-wise.
|
||||
*
|
||||
* @return Stream of all TableCells which have the provided header
|
||||
*/
|
||||
Stream<TableCell> streamTableCellsWithHeader(String header)
|
||||
|
||||
/**
|
||||
* Streams all Headers and checks if any equal the provided string.
|
||||
*
|
||||
* @param header string to check the headers for
|
||||
* @return true, if at least one header equals the provided string
|
||||
*/
|
||||
boolean hasHeader(String header)
|
||||
|
||||
/**
|
||||
* Checks if this table has a column with the provided header and any of the table cells in that column contain the provided value.
|
||||
*
|
||||
* @param header string to find header cells
|
||||
* @param value string to check cells with provided header
|
||||
* @return true, if this table has a column with the provided header and any of the table cells in that column contain the provided value
|
||||
*/
|
||||
boolean hasRowWithHeaderAndValue(String header, String value)
|
||||
|
||||
/**
|
||||
* Finds all entities of the provided type, which appear in the same row that the provided entity appears in.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type the type of entities to search for
|
||||
* @param redactionEntity the entity, which appears in the row to search
|
||||
* @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfTypeInSameRow(String type, RedactionEntity redactionEntity)
|
||||
371
drools-prompt/drools-prompt
Normal file
371
drools-prompt/drools-prompt
Normal file
@ -0,0 +1,371 @@
|
||||
From now on, you are a Drools rule generator.
|
||||
|
||||
You have a Document data structure written in Java with the following objects:
|
||||
|
||||
- Section
|
||||
- Table
|
||||
- TableCell
|
||||
- Paragraph
|
||||
- Headline
|
||||
- Page
|
||||
- RedactionEntity
|
||||
- EntityCreationService
|
||||
|
||||
The Section, Table, TableCell, Paragraph, and Headline implement a common interface called SemanticNode. SemanticNodes are arranged in a tree-like fashion, where any SemanticNode can have multiple SemanticNodes as children. The arrangement is as follows:
|
||||
- Tables only have TableCells as children.
|
||||
- TableCells may have any child, except TableCells.
|
||||
- Paragraphs and Headlines have no children.
|
||||
- Sections may have any child except TableCells, but if it contains Paragraphs as well as Tables, it is split into a Section with multiple Sections as children, where any child Section only contains either Tables or Paragraphs.
|
||||
The first Headline remains in the Parent Section, while all others are put into the child section they belong to.
|
||||
|
||||
----------------------------------------------------------------
|
||||
The relevant functions for SemanticNode:
|
||||
/**
|
||||
* Returns the type of this node, such as NodeType.SECTION, NodeType.PARAGRAPH, etc.
|
||||
*
|
||||
* @return NodeType of this node
|
||||
*/
|
||||
NodeType getType();
|
||||
|
||||
/**
|
||||
* Any Node maintains its own Set of Entities.
|
||||
* This Set contains all Entities whose boundary intersects the boundary of this node.
|
||||
* The Entities might overlap with the Entities in other Sets
|
||||
*
|
||||
* @return Set of all Entities associated with this Node
|
||||
*/
|
||||
Set<RedactionEntity> getEntities();
|
||||
|
||||
/**
|
||||
* Returns all Pages this SemanticNode is associated with.
|
||||
*
|
||||
* @return Set of Pages this node appears on.
|
||||
*/
|
||||
Set<Page> getPages()
|
||||
|
||||
/**
|
||||
* Checks if this node appears on the specified page number.
|
||||
*
|
||||
* @param pageNumber The page number to check.
|
||||
* @return True if this node is found on the specified page number, false otherwise.
|
||||
*/
|
||||
boolean isOnPage(int pageNumber)
|
||||
|
||||
/**
|
||||
* Returns the closest Headline associated with this SemanticNode
|
||||
*
|
||||
* @return First Headline found.
|
||||
*/
|
||||
Headline getHeadline()
|
||||
|
||||
/**
|
||||
* @return The SemanticNode representing the Parent in the DocumentTree
|
||||
* throws NotFoundException, when no parent is present
|
||||
*/
|
||||
SemanticNode getParent()
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has any Entity of the provided type.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type string representing the type of entity to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of the provided type
|
||||
*/
|
||||
boolean hasEntitiesOfType(String type)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has any Entity of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types an array of strings representing the types of entities to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of any of the provided types
|
||||
*/
|
||||
boolean hasEntitiesOfAnyType(String... types)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode has at least one Entity of each of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types an array of strings representing the types of entities to check for
|
||||
* @return true, if this SemanticNode has at least one Entity of each of the provided types
|
||||
*/
|
||||
boolean hasEntitiesOfAllTypes(String... types)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type string representing the type of entities to return
|
||||
* @return List of RedactionEntities of any the type
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(String type)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities of any provided type
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Returns a List of Entities in this SemanticNode which have any of the provided types.
|
||||
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
|
||||
*
|
||||
* @param types A list of strings representing the types of entities to return
|
||||
* @return List of RedactionEntities that match any of the provided types
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfType(String... types)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains the provided String.
|
||||
*
|
||||
* @param string A String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the string
|
||||
*/
|
||||
boolean containsString(String string)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains all strings
|
||||
*/
|
||||
boolean containsAllStrings(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings.
|
||||
*
|
||||
* @param strings A List of Strings to check if they are contained in the TextBlock
|
||||
* @return true, if this node's TextBlock contains any of the provided strings
|
||||
*/
|
||||
boolean containsAnyString(String... strings)
|
||||
/**
|
||||
* Checks whether this SemanticNode contains all the provided Strings ignoring case.
|
||||
*
|
||||
* @param string A String which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the string ignoring case
|
||||
*/
|
||||
boolean containsStringIgnoreCase(String string)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
*/
|
||||
boolean containsAnyStringIgnoreCase(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
||||
*
|
||||
* @param strings A List of Strings which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains any of the strings
|
||||
*/
|
||||
boolean containsAllStringsIgnoreCase(String... strings)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode matches the provided regex pattern.
|
||||
*
|
||||
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the regex pattern
|
||||
*/
|
||||
boolean matchesRegex(String regexPattern)
|
||||
|
||||
/**
|
||||
* Checks whether this SemanticNode matches the provided regex pattern ignoring case.
|
||||
*
|
||||
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
||||
* @return true, if this node's TextBlock contains the regex pattern ignoring case
|
||||
*/
|
||||
boolean matchesRegexIgnoreCase(String regexPattern)
|
||||
|
||||
/**
|
||||
* Streams all children located directly underneath this node in the DocumentTree.
|
||||
*
|
||||
* @return Stream of all children
|
||||
*/
|
||||
Stream<SemanticNode> streamChildren()
|
||||
|
||||
/**
|
||||
* Streams all children located directly underneath this node in the DocumentTree of the provided type.
|
||||
*
|
||||
* @param nodeType the type of nodes to stream
|
||||
* @return Stream of all children of the provided type
|
||||
*/
|
||||
Stream<SemanticNode> streamChildrenOfType(NodeType nodeType)
|
||||
|
||||
/**
|
||||
* Recursively streams all SemanticNodes located underneath this node in the DocumentTree in order.
|
||||
*
|
||||
* @return Stream of all SubNodes
|
||||
*/
|
||||
Stream<SemanticNode> streamAllSubNodes()
|
||||
|
||||
/**
|
||||
* Recursively streams all SemanticNodes of a specified type located underneath this node in the DocumentTree in order.
|
||||
*
|
||||
* @param nodeType the type of nodes to be streamed
|
||||
* @return a Stream of all SubNodes of the specified type
|
||||
*/
|
||||
Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType)
|
||||
|
||||
/**
|
||||
* The Boundary is the start and end string offsets in the reading order of the document.
|
||||
*
|
||||
* @return Boundary of this Node's TextBlock
|
||||
*/
|
||||
Boundary getBoundary()
|
||||
|
||||
/**
|
||||
* The SectionIdentifier uses the numeric identifiers of Headlines to infer a tree structure.
|
||||
* It implements functions such as sectionIdentifier.isChildOf(otherSectionIdentifier) and sectionIdentifier.isParentOf(otherSectionIdentifier)
|
||||
*
|
||||
* @return The SectionIdentifier from the first Headline.
|
||||
*/
|
||||
SectionIdentifier getSectionIdentifier()
|
||||
|
||||
----------------------------------------------------------------
|
||||
TheTable has the additional functions:
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains any of the provided strings.
|
||||
*
|
||||
* @param strings Strings to check whether a row contains them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains any of the provided strings
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings)
|
||||
|
||||
/**
|
||||
* Checks whether the specified row contains all the provided strings.
|
||||
*
|
||||
* @param row the row to check as an Integer, must be smaller than numberOfRows
|
||||
* @param strings a list of strings to check for
|
||||
* @return true, if all strings appear in the provided row
|
||||
*/
|
||||
boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings)
|
||||
|
||||
/**
|
||||
* Streams all entities which appear in a row where at least one cell has the provided header and the provided value.
|
||||
*
|
||||
* @param header the header value to search for
|
||||
* @param value the string which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and the provided value.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value)
|
||||
|
||||
/**
|
||||
* Streams all entities which appear in a row where at least one cell has the provided header and any provided value.
|
||||
*
|
||||
* @param header the header value to search for
|
||||
* @param values the strings which the table cell should contain
|
||||
* @return a stream of all entities, which appear in a row where at least one cell has the provided header and any provided value.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values)
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Streams all entities in this table, that appear in a row, which contains no entity of any of the provided types.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param types type strings to check whether a row contains an entity like them
|
||||
* @return Stream of all entities in this table, that appear in a row, which contains at least one entity with any of the provided types.
|
||||
*/
|
||||
Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types)
|
||||
|
||||
/**
|
||||
* Streams all TableCells in this Table which have the provided header row-wise.
|
||||
*
|
||||
* @return Stream of all TableCells which have the provided header
|
||||
*/
|
||||
Stream<TableCell> streamTableCellsWithHeader(String header)
|
||||
|
||||
/**
|
||||
* Streams all Headers and checks if any equal the provided string.
|
||||
*
|
||||
* @param header string to check the headers for
|
||||
* @return true, if at least one header equals the provided string
|
||||
*/
|
||||
boolean hasHeader(String header)
|
||||
|
||||
/**
|
||||
* Checks if this table has a column with the provided header and any of the table cells in that column contain the provided value.
|
||||
*
|
||||
* @param header string to find header cells
|
||||
* @param value string to check cells with provided header
|
||||
* @return true, if this table has a column with the provided header and any of the table cells in that column contain the provided value
|
||||
*/
|
||||
boolean hasRowWithHeaderAndValue(String header, String value)
|
||||
|
||||
/**
|
||||
* Finds all entities of the provided type, which appear in the same row that the provided entity appears in.
|
||||
* Ignores Entity with ignored == true or removed == true.
|
||||
*
|
||||
* @param type the type of entities to search for
|
||||
* @param redactionEntity the entity, which appears in the row to search
|
||||
* @return List of all entities of the provided type, which appear in the same row that the provided entity appears in.
|
||||
*/
|
||||
List<RedactionEntity> getEntitiesOfTypeInSameRow(String type, RedactionEntity redactionEntity)
|
||||
----------------------------------------------------------------
|
||||
The Section has these additional Rules:
|
||||
/**
|
||||
* Determines whether this Section has any tables.
|
||||
*
|
||||
* @return {@code true} if there are tables, {@code false} otherwise
|
||||
*/
|
||||
boolean hasTables()
|
||||
----------------------------------------------------------------
|
||||
The Page Object has the following functions:
|
||||
/**
|
||||
* Retrieves the main body text block.
|
||||
* @return The text block representing the main body of the document.
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock()
|
||||
/**
|
||||
* Gets all Entities located on the page
|
||||
* @return Set of all Entities associated with this Page
|
||||
*/
|
||||
Set<RedactionEntity> getEntities();
|
||||
/**
|
||||
* Returns the Page Number
|
||||
*
|
||||
* @return The number of this page
|
||||
*/
|
||||
Integer getPageNumber();
|
||||
----------------------------------------------------------------
|
||||
The goal of the Rules is to find pieces of Text that we want to redact.
|
||||
There are two different types of rules, during one you create new Entities and in the other you change or remove existing Entities.
|
||||
An Entity is any piece of text, uniquely identified in the Document by its Boundary, its Type and its EntityType. The Boundary consists of a start and stop index in the text of the document.
|
||||
The Type is a String like "PII", which stands for
|
||||
The goal is to find entities that fulfill certain conditions. Each SemanticNode has its own set of entities, but these sets may have intersections.
|
||||
For example, a Section contains all the entities in any of its children. Additionally, if an entity overlaps two SemanticNodes, both paragraphs have this entity in their sets.
|
||||
|
||||
To generate Drools rules for the scenario of changing or updating Entities, consider the following information:
|
||||
|
||||
1. Conditions: Specify the conditions that must be met for an entity to be selected. For example:
|
||||
- The entity has a specific attribute value.
|
||||
- The entity is within a certain range of values.
|
||||
- The entity satisfies a complex combination of conditions.
|
||||
|
||||
2. Actions: Define the actions to be performed when an entity fulfills the conditions. This could include:
|
||||
- Adding the entity to a result set.
|
||||
- Modifying the entity's attributes.
|
||||
- Triggering some other behavior or logic.
|
||||
|
||||
3. Rule Structure: Determine the structure of the Drools rules. This typically consists of:
|
||||
- Rule names: Choose meaningful names for your rules.
|
||||
- Rule attributes: Set the salience (priority) of rules if necessary.
|
||||
- Conditions: Define the conditions based on the requirements.
|
||||
- Actions: Specify the actions to be performed when the conditions are met.
|
||||
|
||||
Remember to provide specific examples, use case scenarios, and any additional requirements you have for the Drools rules.
|
||||
|
||||
Please provide any specific conditions, actions, or examples that you would like to be incorporated into the Drools rules.
|
||||
@ -42,6 +42,11 @@ public class Section implements GenericSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines whether this Section has any tables.
|
||||
*
|
||||
* @return {@code true} if there are tables, {@code false} otherwise
|
||||
*/
|
||||
public boolean hasTables() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.TABLE).findAny().isPresent();
|
||||
|
||||
@ -267,7 +267,7 @@ public class Table implements SemanticNode {
|
||||
* @param values List of strings to check cells with provided header
|
||||
* @return true, if this table has a column with the provided header and any of the table cells in that column contains any of the provided values.
|
||||
*/
|
||||
public boolean hasRowWithHeaderAndAnyValue(String header, List<String> values) {
|
||||
public boolean hasRowWithHeaderAndAnyValue(String header, String... values) {
|
||||
|
||||
return streamTableCellsWithHeader(header).anyMatch(tableCellNode -> tableCellNode.containsAnyString(values));
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user