From e722b3df7aab7876a7f63e31db622aba59b9dc1a Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Thu, 13 Apr 2023 12:03:18 +0200 Subject: [PATCH] RED-6009: Document Tree Structure *added testcase for collecting all entities --- .../DocumentGraphEntityInsertionTest.java | 73 ++++++++++--------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphEntityInsertionTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphEntityInsertionTest.java index 7dfd8db..a5bf575 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphEntityInsertionTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/graph/DocumentGraphEntityInsertionTest.java @@ -31,16 +31,37 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { @Test - public void assertTextBeforeAndTextAfterForParagraphCrafted() { + public void assertCollectAllEntitiesWorks() { DocumentGraph documentGraph = buildGraph("files/crafted document"); - String searchTerm = "Clarissa"; + createAndInsertEntity(documentGraph, "Clarissa"); + createAndInsertEntity(documentGraph, "Lastname"); + createAndInsertEntity(documentGraph, "David Ksenia"); + createAndInsertEntity(documentGraph, "Michael N."); + createAndInsertEntity(documentGraph, "Page-Footer"); + createAndInsertEntity(documentGraph, "CTL/with dictionary entry 1234 with Slash"); + assertEquals(6, documentGraph.getEntities().size()); + } + + + private TestEntity createAndInsertEntity(DocumentGraph documentGraph, String searchTerm) { + int start = documentGraph.getTextBlock().indexOf(searchTerm); assert start != -1; Boundary boundary = new Boundary(start, start + searchTerm.length()); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); + return entityNode; + } + + + @Test + public void assertTextBeforeAndTextAfterForParagraphCrafted() { + + DocumentGraph documentGraph = buildGraph("files/crafted document"); + String searchTerm = "Clarissa"; + TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm); assertEquals("Expand to Hint ", entityNode.getTextBefore()); assertEquals("’s Donut ←", entityNode.getTextAfter()); @@ -51,7 +72,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertEquals(5, entityNode.getDeepestFullyContainingNode().getNumberOnPage()); assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } @@ -60,12 +81,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { DocumentGraph documentGraph = buildGraph("files/crafted document"); String searchTerm = "Rule 39:"; - int start = documentGraph.getTextBlock().indexOf(searchTerm); - assert start != -1; - - Boundary boundary = new Boundary(start, start + searchTerm.length()); - TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); - entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); + TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm); assertEquals("", entityNode.getTextBefore()); assertEquals(" Purity Hint", entityNode.getTextAfter()); @@ -75,7 +91,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertEquals(6, entityNode.getDeepestFullyContainingNode().getNumberOnPage()); assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } @@ -84,12 +100,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { DocumentGraph documentGraph = buildGraph("files/crafted document"); String searchTerm = "1998"; - int start = documentGraph.getTextBlock().indexOf(searchTerm); - assert start != -1; - - Boundary boundary = new Boundary(start, start + searchTerm.length()); - TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); - entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); + TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm); assertEquals("", entityNode.getTextBefore()); assertEquals("", entityNode.getTextAfter()); @@ -99,7 +110,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertEquals(15, entityNode.getDeepestFullyContainingNode().getNumberOnPage()); assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } @@ -171,12 +182,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); String searchTerm = "Cucurbit"; - int start = documentGraph.getTextBlock().indexOf(searchTerm); - assert start != -1; - - Boundary boundary = new Boundary(start, start + searchTerm.length()); - TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); - entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); + TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm); assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore()); assertEquals(", Group 9;", entityNode.getTextAfter()); @@ -187,7 +193,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 10)); assertInstanceOf(ParagraphNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } @@ -198,7 +204,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { String searchTerm = "absorption, distribution, metabolism"; int start = documentGraph.getTextBlock().indexOf(searchTerm); assert start != -1; - start = documentGraph.getTextBlock().indexOf(searchTerm, start + 1); + start = documentGraph.getTextBlock().indexOf(searchTerm, start); assert start != -1; Boundary boundary = new Boundary(start, start + searchTerm.length()); @@ -215,7 +221,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertTrue(entityNode.getPages().stream().allMatch(pageNode -> pageNode.getNumber() == 33)); assertInstanceOf(HeadlineNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } @@ -224,12 +230,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); String searchTerm = "N-deacetylation product"; - int start = documentGraph.getTextBlock().indexOf(searchTerm); - assert start != -1; - - Boundary boundary = new Boundary(start, start + searchTerm.length()); - TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); - entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); + TestEntity entityNode = createAndInsertEntity(documentGraph, searchTerm); assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore()); assertEquals(" of metabolite of", entityNode.getTextAfter()); @@ -242,18 +243,18 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertInstanceOf(TableCellNode.class, entityNode.getDeepestFullyContainingNode()); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); } - private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) { + private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, EntityNode entityNode) { List paragraphStart = entityNode.getIntersectingNodes().stream()// .map(SemanticNode::buildTextBlock)// .map(textBlock -> textBlock.indexOf(searchTerm))// .toList(); - paragraphStart.forEach(nodeStart -> assertEquals(start, nodeStart)); + paragraphStart.forEach(nodeStart -> assertEquals(entityNode.getBoundary().start(), nodeStart)); } @@ -272,7 +273,7 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest { assertTrue(pageNode.getEntities().contains(entityNode)); assertTrue(documentGraph.getPages().stream().filter(page -> page != pageNode).noneMatch(page -> page.getEntities().contains(entityNode))); assertTrue(entityNode.getPages().contains(pageNode)); - assertSameOffsetInAllIntersectingNodes(searchTerm, start, entityNode); + assertSameOffsetInAllIntersectingNodes(searchTerm, entityNode); assertTrue(entityNode.getIntersectingNodes().stream().allMatch(node -> node.getEntities().contains(entityNode))); }