RED-6009: Document Tree Structure
*added testcase for collecting all entities
This commit is contained in:
parent
9ed5c3c9d4
commit
e722b3df7a
@ -31,16 +31,37 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
|
||||
public void assertCollectAllEntitiesWorks() {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||
String searchTerm = "Clarissa";
|
||||
createAndInsertEntity(documentGraph, "Clarissa");
|
||||
createAndInsertEntity(documentGraph, "Lastname");
|
||||
createAndInsertEntity(documentGraph, "David Ksenia");
|
||||
createAndInsertEntity(documentGraph, "Michael N.");
|
||||
createAndInsertEntity(documentGraph, "Page-Footer");
|
||||
createAndInsertEntity(documentGraph, "CTL/with dictionary entry 1234 with Slash");
|
||||
assertEquals(6, documentGraph.getEntities().size());
|
||||
}
|
||||
|
||||
|
||||
private TestEntity createAndInsertEntity(DocumentGraph documentGraph, String searchTerm) {
|
||||
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
return entityNode;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||
String searchTerm = "Clarissa";
|
||||
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
|
||||
|
||||
assertEquals("Expand to Hint ", entityNode.getTextBefore());
|
||||
assertEquals("’s Donut ←", entityNode.getTextAfter());
|
||||
@ -51,7 +72,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertEquals(5, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
@ -60,12 +81,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||
String searchTerm = "Rule 39:";
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
|
||||
|
||||
assertEquals("", entityNode.getTextBefore());
|
||||
assertEquals(" Purity Hint", entityNode.getTextAfter());
|
||||
@ -75,7 +91,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertEquals(6, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
@ -84,12 +100,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||
String searchTerm = "1998";
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
|
||||
|
||||
assertEquals("", entityNode.getTextBefore());
|
||||
assertEquals("", entityNode.getTextAfter());
|
||||
@ -99,7 +110,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertEquals(15, entityNode.getDeepestFullyContainingNode().getNumberOnPage());
|
||||
assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
@ -171,12 +182,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
String searchTerm = "Cucurbit";
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
|
||||
|
||||
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
|
||||
assertEquals(", Group 9;", entityNode.getTextAfter());
|
||||
@ -187,7 +193,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10));
|
||||
assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
@ -198,7 +204,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
String searchTerm = "absorption, distribution, metabolism";
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
start = documentGraph.getTextBlock().indexOf(searchTerm, start + 1);
|
||||
start = documentGraph.getTextBlock().indexOf(searchTerm, start);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
@ -215,7 +221,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33));
|
||||
assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
@ -224,12 +230,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
String searchTerm = "N-deacetylation product";
|
||||
int start = documentGraph.getTextBlock().indexOf(searchTerm);
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm);
|
||||
|
||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
|
||||
assertEquals(" of metabolite of", entityNode.getTextAfter());
|
||||
@ -242,18 +243,18 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode());
|
||||
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
}
|
||||
|
||||
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, EntityNode entityNode) {
|
||||
|
||||
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
|
||||
.map(SemanticNode::buildTextBlock)//
|
||||
.map(textBlock -> textBlock.indexOf(searchTerm))//
|
||||
.toList();
|
||||
|
||||
paragraphStart.forEach(nodeStart -> assertEquals(start, nodeStart));
|
||||
paragraphStart.forEach(nodeStart -> assertEquals(entityNode.getBoundary().start(), nodeStart));
|
||||
}
|
||||
|
||||
|
||||
@ -272,7 +273,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
assertTrue(pageNode.getEntities().contains(entityNode));
|
||||
assertTrue(documentGraph.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(entityNode)));
|
||||
assertTrue(entityNode.getPages().contains(pageNode));
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode);
|
||||
assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode);
|
||||
assertTrue(entityNode.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(entityNode)));
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user