Merge branch 'DM-305' into 'master'

DM-305: Improved anyHeadlineContains

Closes DM-305

See merge request redactmanager/redaction-service!41
This commit is contained in:
Dominique Eifländer 2023-07-10 13:26:28 +02:00
commit ca8fd18a0e
6 changed files with 11 additions and 32 deletions

View File

@ -83,21 +83,13 @@ public class Section implements GenericSemanticNode {
public boolean anyHeadlineContainsString(String value) {
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsString(value));
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsString(value));
}
public boolean anyHeadlineContainsStringIgnoreCase(String value) {
return streamChildrenOfType(NodeType.HEADLINE)//
.map(node -> (Headline) node).anyMatch(h -> h.containsStringIgnoreCase(value));
}
public boolean hasParagraphs() {
return streamAllSubNodesOfType(NodeType.PARAGRAPH).findFirst().isPresent();
return streamAllSubNodesOfType(NodeType.HEADLINE).anyMatch(h -> h.containsStringIgnoreCase(value));
}
}

View File

@ -45,7 +45,9 @@ public class DocumineFloraTest extends AbstractRedactionIntegrationTest {
@Disabled
public void titleExtraction() throws IOException {
AnalyzeRequest request = uploadFileToStorage("files/Documine/Flora/ProblemDocs/15 - EVIDIS - Toxicidade oral aguda.pdf");
AnalyzeRequest request = prepareStorage("files/Documine/Flora/ProblemDocs/403-17_Fantom_ToxicidadeInalatoriaAguda.pdf",
"files/Documine/Flora/ProblemDocs/d75cd9358f7949552697764428183472.TABLES.json");
System.out.println("Start Full integration test");
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));

View File

@ -422,8 +422,8 @@ rule "DOC.10.0: Batch number from CoA"
when
$section: Section(
(
getHeadline().containsString("Analytical Report")
|| getHeadline().containsString("Certificate of Analysis")
anyHeadlineContainsString("Analytical Report")
|| anyHeadlineContainsString("Certificate of Analysis")
|| containsStringIgnoreCase("certificate of analysis")
)
&& (
@ -475,7 +475,7 @@ rule "DOC.10.1: Batch number"
&& containsStringIgnoreCase("batch")
)
then
Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, $section),
Stream.of(entityCreationService.byRegex("Batch ID ([A-Z\\d\\-]{7,14})", "batch_number", EntityType.ENTITY, 1, $section),
entityCreationService.lineAfterStrings(List.of("Batch Identification",
"Batch number:",
"Batch reference number:",
@ -875,8 +875,8 @@ rule "DOC.35.2: Animal Number 429"
FileAttribute(label == "OECD Number", value == "429")
$section: Section(
(
getHeadline().containsStringIgnoreCase("animal")
|| getHeadline().containsStringIgnoreCase("test system")
anyHeadlineContainsStringIgnoreCase("animal")
|| anyHeadlineContainsStringIgnoreCase("test system")
)
&& !getHeadline().containsString("selection")
&& containsStringIgnoreCase("number of animals")
@ -1126,22 +1126,6 @@ rule "X.0.0: remove Entity contained by Entity of same type"
end
// Rule unit: X.1
rule "X.1.0: merge intersecting Entities of same type"
salience 64
when
$first: RedactionEntity($type: type, $entityType: entityType, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
$second: RedactionEntity(intersects($first), type == $type, entityType == $entityType, this != $first, !resized, !skipRemoveEntitiesContainedInLarger, isActive())
then
RedactionEntity mergedEntity = entityCreationService.byEntities(List.of($first, $second), $type, $entityType, document);
$first.remove("X.1.0", "merge intersecting Entities of same type");
$second.remove("X.1.0", "merge intersecting Entities of same type");
retract($first);
retract($second);
mergedEntity.getIntersectingNodes().forEach(node -> update(node));
end
//------------------------------------ File attributes rules ------------------------------------
// Rule unit: FA.1