206 lines
6.8 KiB
Plaintext
206 lines
6.8 KiB
Plaintext
|
|
/**
|
|
* Returns the type of this node, such as NodeType.SECTION, NodeType.PARAGRAPH, etc.
|
|
*
|
|
* @return NodeType of this node
|
|
*/
|
|
NodeType getType();
|
|
|
|
/**
|
|
* Any Node maintains its own Set of Entities.
|
|
* This Set contains all Entities whose boundary intersects the boundary of this node.
|
|
* The Entities might overlap with the Entities in other Sets
|
|
*
|
|
* @return Set of all Entities associated with this Node
|
|
*/
|
|
Set<RedactionEntity> getEntities();
|
|
|
|
/**
|
|
* Returns all Pages this SemanticNode is associated with.
|
|
*
|
|
* @return Set of Pages this node appears on.
|
|
*/
|
|
Set<Page> getPages()
|
|
|
|
/**
|
|
* Checks if this node appears on the specified page number.
|
|
*
|
|
* @param pageNumber The page number to check.
|
|
* @return True if this node is found on the specified page number, false otherwise.
|
|
*/
|
|
boolean isOnPage(int pageNumber)
|
|
|
|
/**
|
|
* Returns the closest Headline associated with this SemanticNode
|
|
*
|
|
* @return First Headline found.
|
|
*/
|
|
Headline getHeadline()
|
|
|
|
/**
|
|
* @return The SemanticNode representing the Parent in the DocumentTree
|
|
* throws NotFoundException, when no parent is present
|
|
*/
|
|
SemanticNode getParent()
|
|
|
|
/**
|
|
* Checks whether this SemanticNode has any Entity of the provided type.
|
|
* Ignores Entity with ignored == true or removed == true.
|
|
*
|
|
* @param type string representing the type of entity to check for
|
|
* @return true, if this SemanticNode has at least one Entity of the provided type
|
|
*/
|
|
boolean hasEntitiesOfType(String type)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode has any Entity of the provided types.
|
|
* Ignores Entity with ignored == true or removed == true.
|
|
*
|
|
* @param types an array of strings representing the types of entities to check for
|
|
* @return true, if this SemanticNode has at least one Entity of any of the provided types
|
|
*/
|
|
boolean hasEntitiesOfAnyType(String... types)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode has at least one Entity of each of the provided types.
|
|
* Ignores Entity with ignored == true or removed == true.
|
|
*
|
|
* @param types an array of strings representing the types of entities to check for
|
|
* @return true, if this SemanticNode has at least one Entity of each of the provided types
|
|
*/
|
|
boolean hasEntitiesOfAllTypes(String... types)
|
|
|
|
/**
|
|
* Returns a List of Entities in this SemanticNode which are of the provided type such as "CBI_author".
|
|
* Ignores Entity with ignored == true or removed == true.
|
|
*
|
|
* @param type string representing the type of entities to return
|
|
* @return List of RedactionEntities of any the type
|
|
*/
|
|
List<RedactionEntity> getEntitiesOfType(String type)
|
|
|
|
/**
|
|
* Returns a List of Entities in this SemanticNode which have any of the provided types such as "CBI_author".
|
|
* Ignores Entity with ignored == true or removed == true.
|
|
*
|
|
* @param types A list of strings representing the types of entities to return
|
|
* @return List of RedactionEntities of any provided type
|
|
*/
|
|
List<RedactionEntity> getEntitiesOfType(List<String> types)
|
|
|
|
/**
|
|
* Returns a List of Entities in this SemanticNode which have any of the provided types.
|
|
* Ignores Entity with the ignored flag set to true or the removed flag set to true.
|
|
*
|
|
* @param types A list of strings representing the types of entities to return
|
|
* @return List of RedactionEntities that match any of the provided types
|
|
*/
|
|
List<RedactionEntity> getEntitiesOfType(String... types)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode contains the provided String.
|
|
*
|
|
* @param string A String which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains the string
|
|
*/
|
|
boolean containsString(String string)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode contains all the provided Strings.
|
|
*
|
|
* @param strings A List of Strings which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains all strings
|
|
*/
|
|
boolean containsAllStrings(String... strings)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode contains any of the provided Strings.
|
|
*
|
|
* @param strings A List of Strings to check if they are contained in the TextBlock
|
|
* @return true, if this node's TextBlock contains any of the provided strings
|
|
*/
|
|
boolean containsAnyString(String... strings)
|
|
/**
|
|
* Checks whether this SemanticNode contains all the provided Strings ignoring case.
|
|
*
|
|
* @param string A String which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains the string ignoring case
|
|
*/
|
|
boolean containsStringIgnoreCase(String string)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
|
*
|
|
* @param strings A List of Strings which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains any of the strings
|
|
*/
|
|
boolean containsAnyStringIgnoreCase(String... strings)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode contains any of the provided Strings ignoring case.
|
|
*
|
|
* @param strings A List of Strings which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains any of the strings
|
|
*/
|
|
boolean containsAllStringsIgnoreCase(String... strings)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode matches the provided regex pattern.
|
|
*
|
|
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains the regex pattern
|
|
*/
|
|
boolean matchesRegex(String regexPattern)
|
|
|
|
/**
|
|
* Checks whether this SemanticNode matches the provided regex pattern ignoring case.
|
|
*
|
|
* @param regexPattern A String representing a regex pattern, which the TextBlock might contain
|
|
* @return true, if this node's TextBlock contains the regex pattern ignoring case
|
|
*/
|
|
boolean matchesRegexIgnoreCase(String regexPattern)
|
|
|
|
/**
|
|
* Streams all children located directly underneath this node in the DocumentTree.
|
|
*
|
|
* @return Stream of all children
|
|
*/
|
|
Stream<SemanticNode> streamChildren()
|
|
|
|
/**
|
|
* Streams all children located directly underneath this node in the DocumentTree of the provided type.
|
|
*
|
|
* @param nodeType the type of nodes to stream
|
|
* @return Stream of all children of the provided type
|
|
*/
|
|
Stream<SemanticNode> streamChildrenOfType(NodeType nodeType)
|
|
|
|
/**
|
|
* Recursively streams all SemanticNodes located underneath this node in the DocumentTree in order.
|
|
*
|
|
* @return Stream of all SubNodes
|
|
*/
|
|
Stream<SemanticNode> streamAllSubNodes()
|
|
|
|
/**
|
|
* Recursively streams all SemanticNodes of a specified type located underneath this node in the DocumentTree in order.
|
|
*
|
|
* @param nodeType the type of nodes to be streamed
|
|
* @return a Stream of all SubNodes of the specified type
|
|
*/
|
|
Stream<SemanticNode> streamAllSubNodesOfType(NodeType nodeType)
|
|
|
|
/**
|
|
* The Boundary is the start and end string offsets in the reading order of the document.
|
|
*
|
|
* @return Boundary of this Node's TextBlock
|
|
*/
|
|
Boundary getBoundary()
|
|
|
|
/**
|
|
* The SectionIdentifier uses the numeric identifiers of Headlines to infer a tree structure.
|
|
* It implements functions such as sectionIdentifier.isChildOf(otherSectionIdentifier) and sectionIdentifier.isParentOf(otherSectionIdentifier)
|
|
*
|
|
* @return The SectionIdentifier from the first Headline.
|
|
*/
|
|
SectionIdentifier getSectionIdentifier() |