RED-6009: Document Tree Structure

*added testcase for collecting all entities
This commit is contained in:
Kilian Schuettler 2023-04-13 12:03:18 +02:00
parent 9ed5c3c9d4
commit e722b3df7a

View File

@ -31,16 +31,37 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
@Test
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
public void assertCollectAllEntitiesWorks() {
DocumentGraph documentGraph = buildGraph("files/crafted document");
String searchTerm = "Clarissa";
createAndInsertEntity(documentGraph, "Clarissa");
createAndInsertEntity(documentGraph, "Lastname");
createAndInsertEntity(documentGraph, "David Ksenia");
createAndInsertEntity(documentGraph, "Michael N.");
createAndInsertEntity(documentGraph, "Page-Footer");
createAndInsertEntity(documentGraph, "CTL/with dictionary entry 1234 with Slash");
assertEquals(6, documentGraph.getEntities().size());
}
private TestEntity createAndInsertEntity(DocumentGraph documentGraph, String searchTerm) {
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
return entityNode;
}
@Test
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
DocumentGraph documentGraph = buildGraph("files/crafted document");
String searchTerm = "Clarissa";
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
assertEquals("Expand to Hint ", entityNode.getTextBefore());
assertEquals("s Donut ←", entityNode.getTextAfter());
@ -51,7 +72,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertEquals(5, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
@ -60,12 +81,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/crafted document");
String searchTerm = "Rule 39:";
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
assertEquals("", entityNode.getTextBefore());
assertEquals(" Purity Hint", entityNode.getTextAfter());
@ -75,7 +91,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertEquals(6, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
@ -84,12 +100,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/crafted document");
String searchTerm = "1998";
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
assertEquals("", entityNode.getTextBefore());
assertEquals("", entityNode.getTextAfter());
@ -99,7 +110,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertEquals(15, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
@ -171,12 +182,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
String searchTerm = "Cucurbit";
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
assertEquals(", Group 9;", entityNode.getTextAfter());
@ -187,7 +193,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
@ -198,7 +204,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
String searchTerm = "absorption, distribution, metabolism";
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
start = documentGraph.getTextBlock().indexOf(searchTerm, start + 1);
start = documentGraph.getTextBlock().indexOf(searchTerm, start);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
@ -215,7 +221,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
@ -224,12 +230,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
String searchTerm = "N-deacetylation product";
int start = documentGraph.getTextBlock().indexOf(searchTerm);
assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
assertEquals(" of metabolite of", entityNode.getTextAfter());
@ -242,18 +243,18 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode());
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
}
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, EntityNode entityNode) {
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
.map(SemanticNode::buildTextBlock)//
.map(textBlock -> textBlock.indexOf(searchTerm))//
.toList();
paragraphStart.forEach(nodeStart -> assertEquals(start, nodeStart));
paragraphStart.forEach(nodeStart -> assertEquals(entityNode.getBoundary().start(), nodeStart));
}
@ -272,7 +273,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
assertTrue(pageNode.getEntities().contains(entityNode));
assertTrue(documentGraph.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(entityNode)));
assertTrue(entityNode.getPages().contains(pageNode));
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
assertTrue(entityNode.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(entityNode)));
}