RED-6009: Document Tree Structure

*fixed dependecny issus
This commit is contained in:
Kilian Schuettler 2023-04-12 13:55:54 +02:00
parent aac0259caf
commit 2ed617bb03
25 changed files with 310 additions and 220 deletions

6
.gitignore vendored
View File

@ -4,6 +4,12 @@ target/
!**/src/main/**/target/ !**/src/main/**/target/
!**/src/test/**/target/ !**/src/test/**/target/
### maven build ###
*.class
/out/
**/out/
**/target/
### STS ### ### STS ###
.apt_generated .apt_generated
.classpath .classpath

View File

@ -2,8 +2,8 @@
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0" xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<modelVersion>4.0.0</modelVersion>
<parent> <parent>
<groupId>com.knecon.fforesight</groupId> <groupId>com.knecon.fforesight</groupId>
@ -12,9 +12,6 @@
</parent> </parent>
<artifactId>layoutparser-service-internal-api</artifactId> <artifactId>layoutparser-service-internal-api</artifactId>
<version>1.0.0</version>
<packaging>pom</packaging>
<dependencies> <dependencies>
<dependency> <dependency>
@ -29,21 +26,4 @@
</dependency> </dependency>
</dependencies> </dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project> </project>

View File

@ -5,8 +5,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Stream; import java.util.stream.Stream;
import javax.management.openmbean.InvalidKeyException;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType; import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
import lombok.AccessLevel; import lombok.AccessLevel;
@ -23,15 +21,15 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
public class TableOfContentsData { public class TableOfContentsData {
List<EntryData> entries; EntryData root;
public EntryData get(List<Integer> tocId) { public EntryData get(List<Integer> tocId) {
if (tocId.size() < 1) { if (tocId.isEmpty()) {
throw new InvalidKeyException(String.format("ClassificationSection Identifier: \"%s\" is not valid.", tocId)); return root;
} }
EntryData entry = entries.get(tocId.get(0)); EntryData entry = root.subEntries.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) { for (int id : tocId.subList(1, tocId.size())) {
entry = entry.subEntries().get(id); entry = entry.subEntries().get(id);
} }
@ -41,7 +39,7 @@ public class TableOfContentsData {
public Stream<EntryData> streamAllEntries() { public Stream<EntryData> streamAllEntries() {
return entries.stream().flatMap(TableOfContentsData::flatten); return Stream.concat(Stream.of(root), root.subEntries.stream()).flatMap(TableOfContentsData::flatten);
} }

View File

@ -10,6 +10,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SectionNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SectionNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SemanticNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SemanticNode;
@ -20,11 +21,13 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;
import lombok.Data; import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults; import lombok.experimental.FieldDefaults;
@Data @Data
@Builder @Builder
@AllArgsConstructor @AllArgsConstructor
@NoArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE) @FieldDefaults(level = AccessLevel.PRIVATE)
public class DocumentGraph implements SemanticNode { public class DocumentGraph implements SemanticNode {
@ -42,7 +45,7 @@ public class DocumentGraph implements SemanticNode {
public List<SectionNode> getMainSections() { public List<SectionNode> getMainSections() {
return tableOfContents.entries.stream().filter(entry -> entry.node() instanceof SectionNode).map(entry -> (SectionNode) entry.node()).collect(Collectors.toList()); return streamChildren().filter(node -> node instanceof SectionNode).map(node -> (SectionNode) node).collect(Collectors.toList());
} }
@ -74,14 +77,14 @@ public class DocumentGraph implements SemanticNode {
private Stream<SemanticNode> streamAllNodes() { private Stream<SemanticNode> streamAllNodes() {
return tableOfContents.streamEntriesInOrder().map(TableOfContents.Entry::node); return tableOfContents.streamAllEntriesInOrder().map(TableOfContents.Entry::node);
} }
@Override @Override
public String toString() { public String toString() {
return tableOfContents.toString(); return NodeType.DOCUMENT + ": " + buildTextBlock().buildSummary();
} }

View File

@ -1,10 +1,11 @@
package com.knecon.fforesight.service.layoutparser.internal.api.graph; package com.knecon.fforesight.service.layoutparser.internal.api.graph;
import static java.lang.String.format;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collections; import java.util.Collections;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException;
import java.util.stream.Stream; import java.util.stream.Stream;
import com.google.common.hash.Hashing; import com.google.common.hash.Hashing;
@ -19,22 +20,22 @@ import lombok.Data;
@Data @Data
public class TableOfContents { public class TableOfContents {
List<Entry> entries; private final Entry root;
public TableOfContents() { public TableOfContents(DocumentGraph documentGraph) {
entries = new LinkedList<>(); root = Entry.builder().tocId(Collections.emptyList()).type(NodeType.DOCUMENT).children(new LinkedList<>()).node(documentGraph).build();
} }
public TextBlock buildTextBlock() { public TextBlock buildTextBlock() {
return streamEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector()); return streamAllEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
} }
public List<Integer> createNewEntryAndReturnId(NodeType nodeType, SemanticNode node) { public List<Integer> createNewMainEntryAndReturnId(NodeType nodeType, SemanticNode node) {
return createNewChildEntryAndReturnId(Collections.emptyList(), nodeType, node); return createNewChildEntryAndReturnId(Collections.emptyList(), nodeType, node);
} }
@ -42,27 +43,25 @@ public class TableOfContents {
public List<Integer> createNewChildEntryAndReturnId(List<Integer> parentId, NodeType nodeType, SemanticNode node) { public List<Integer> createNewChildEntryAndReturnId(List<Integer> parentId, NodeType nodeType, SemanticNode node) {
List<Integer> newId; if (!entryExists(parentId)) {
if (entryExists(parentId)) { throw new UnsupportedOperationException(format("parentId %s does not exist!", parentId));
Entry parent = getEntryById(parentId);
newId = new LinkedList<>(parentId);
newId.add(parent.children().size());
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
} else {
newId = List.of(entries.size());
entries.add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
} }
Entry parent = getEntryById(parentId);
List<Integer> newId = new LinkedList<>(parentId);
newId.add(parent.children().size());
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
return newId; return newId;
} }
private boolean entryExists(List<Integer> tocId) { private boolean entryExists(List<Integer> tocId) {
if (tocId.size() < 1) { if (tocId.isEmpty()) {
return false; return root != null;
} }
Entry entry = entries.get(tocId.get(0)); Entry entry = root.children.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) { for (int id : tocId.subList(1, tocId.size())) {
if (id >= entry.children.size() || 0 > id) { if (id >= entry.children.size() || 0 > id) {
return false; return false;
@ -75,22 +74,17 @@ public class TableOfContents {
public Entry getParentEntryById(List<Integer> tocId) { public Entry getParentEntryById(List<Integer> tocId) {
List<Integer> parentIds = getParentId(tocId); return getEntryById(getParentId(tocId));
if (parentIds.size() < 1) {
throw new NoSuchElementException(String.format("Node with tocId \"%s\" has no parent!", tocId));
}
return getEntryById(parentIds);
} }
public boolean hasParentById(List<Integer> tocId) { public boolean hasParentById(List<Integer> tocId) {
List<Integer> parentId = getParentId(tocId); return entryExists(getParentId(tocId));
return entryExists(parentId);
} }
public Stream<SemanticNode> streamChildren(List<Integer> tocId) { public Stream<SemanticNode> streamChildrenNodes(List<Integer> tocId) {
return getEntryById(tocId).children().stream().map(Entry::node); return getEntryById(tocId).children().stream().map(Entry::node);
} }
@ -98,13 +92,22 @@ public class TableOfContents {
private static List<Integer> getParentId(List<Integer> tocId) { private static List<Integer> getParentId(List<Integer> tocId) {
if (tocId.isEmpty()) {
throw new UnsupportedOperationException("Root has no parent!");
}
if (tocId.size() < 2) {
return Collections.emptyList();
}
return tocId.subList(0, tocId.size() - 1); return tocId.subList(0, tocId.size() - 1);
} }
public Entry getEntryById(List<Integer> tocId) { public Entry getEntryById(List<Integer> tocId) {
Entry entry = entries.get(tocId.get(0)); if (tocId.isEmpty()) {
return root;
}
Entry entry = root.children.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) { for (int id : tocId.subList(1, tocId.size())) {
entry = entry.children().get(id); entry = entry.children().get(id);
} }
@ -112,13 +115,19 @@ public class TableOfContents {
} }
public Stream<Entry> streamEntriesInOrder() { public Stream<Entry> streamMainEntries() {
return entries.stream().flatMap(TableOfContents::flatten); return root.children.stream();
} }
public Stream<Entry> streamSubEntriesInOrder(List<Integer> parentId) { public Stream<Entry> streamAllEntriesInOrder() {
return Stream.of(root).flatMap(TableOfContents::flatten);
}
public Stream<Entry> streamAllSubEntriesInOrder(List<Integer> parentId) {
return Stream.of(getEntryById(parentId)).flatMap(TableOfContents::flatten); return Stream.of(getEntryById(parentId)).flatMap(TableOfContents::flatten);
} }
@ -127,13 +136,13 @@ public class TableOfContents {
@Override @Override
public String toString() { public String toString() {
return String.join("\n", streamEntriesInOrder().map(Entry::toString).toList()); return String.join("\n", streamAllEntriesInOrder().map(Entry::toString).toList());
} }
public String toString(List<Integer> id) { public String toString(List<Integer> id) {
return String.join("\n", streamSubEntriesInOrder(id).map(Entry::toString).toList()); return String.join("\n", streamAllSubEntriesInOrder(id).map(Entry::toString).toList());
} }

View File

@ -1,6 +1,7 @@
package com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes; package com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes;
public enum NodeType { public enum NodeType {
DOCUMENT,
SECTION, SECTION,
HEADLINE, HEADLINE,
PARAGRAPH, PARAGRAPH,

View File

@ -205,7 +205,7 @@ public interface SemanticNode {
*/ */
default Stream<SemanticNode> streamChildren() { default Stream<SemanticNode> streamChildren() {
return getTableOfContents().streamChildren(getTocId()); return getTableOfContents().streamChildrenNodes(getTocId());
} }
@ -216,7 +216,7 @@ public interface SemanticNode {
*/ */
default Stream<SemanticNode> streamAllSubNodes() { default Stream<SemanticNode> streamAllSubNodes() {
return getTableOfContents().streamSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node); return getTableOfContents().streamAllSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
} }

View File

@ -48,7 +48,7 @@ public class DocumentDataMapper {
private TableOfContentsData toTableOfContentsData(TableOfContents tableOfContents) { private TableOfContentsData toTableOfContentsData(TableOfContents tableOfContents) {
return new TableOfContentsData(tableOfContents.getEntries().stream().map(this::toEntryData).toList()); return new TableOfContentsData(toEntryData(tableOfContents.getRoot()));
} }

View File

@ -38,8 +38,10 @@ public class DocumentGraphMapper {
public DocumentGraph toDocumentGraph(DocumentData documentData) { public DocumentGraph toDocumentGraph(DocumentData documentData) {
DocumentGraph documentGraph = new DocumentGraph();
Context context = new Context(documentData, Context context = new Context(documentData,
new TableOfContents(), new TableOfContents(documentGraph),
new LinkedList<>(), new LinkedList<>(),
new LinkedList<>(), new LinkedList<>(),
Arrays.stream(documentData.getAtomicTextBlocks()).toList(), Arrays.stream(documentData.getAtomicTextBlocks()).toList(),
@ -47,13 +49,12 @@ public class DocumentGraphMapper {
context.pages.addAll(Arrays.stream(documentData.getPages()).map(this::buildPage).toList()); context.pages.addAll(Arrays.stream(documentData.getPages()).map(this::buildPage).toList());
context.tableOfContents.setEntries(buildEntries(documentData.getTableOfContents().getEntries(), context)); context.tableOfContents.getRoot().children().addAll(buildEntries(documentData.getTableOfContents().getRoot().subEntries(), context));
documentGraph.setTableOfContents(context.tableOfContents);
documentGraph.setPages(new HashSet<>(context.pages));
documentGraph.setNumberOfPages(documentData.getPages().length);
DocumentGraph documentGraph = DocumentGraph.builder()
.numberOfPages(documentData.getPages().length)
.pages(new HashSet<>(context.pages))
.tableOfContents(context.tableOfContents)
.build();
documentGraph.setTextBlock(documentGraph.buildTextBlock()); documentGraph.setTextBlock(documentGraph.buildTextBlock());
return documentGraph; return documentGraph;
} }

View File

@ -1,5 +1,6 @@
package com.knecon.fforesight.service.layoutparser.internal.api.services; package com.knecon.fforesight.service.layoutparser.internal.api.services;
import java.util.Collections;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.Set; import java.util.Set;
@ -21,9 +22,7 @@ public class EntityInsertionService {
public void addEntityToGraph(EntityNode entity, TableOfContents tableOfContents) { public void addEntityToGraph(EntityNode entity, TableOfContents tableOfContents) {
try { try {
SemanticNode containingNode = tableOfContents.getEntries() SemanticNode containingNode = tableOfContents.streamChildrenNodes(Collections.emptyList())
.stream()
.map(TableOfContents.Entry::node)
.filter(node -> node.buildTextBlock().containsBoundary(entity.getBoundary())) .filter(node -> node.buildTextBlock().containsBoundary(entity.getBoundary()))
.findFirst() .findFirst()
.orElseThrow(() -> new NoSuchElementException("No containing Node found!")); .orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
@ -37,7 +36,6 @@ public class EntityInsertionService {
addToNodeEntitySets(entity); addToNodeEntitySets(entity);
} catch (NoSuchElementException e) { } catch (NoSuchElementException e) {
entityEnrichmentService.enrichEntity(entity, tableOfContents.buildTextBlock());
entity.removeFromGraph(); entity.removeFromGraph();
} }
} }

View File

@ -10,94 +10,78 @@
</parent> </parent>
<artifactId>layoutparser-service-processor</artifactId> <artifactId>layoutparser-service-processor</artifactId>
<version>1.0.0</version>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>com.iqser.red.service</groupId> <groupId>com.iqser.red.service</groupId>
<artifactId>persistence-service-internal-api-v1</artifactId> <artifactId>persistence-service-internal-api-v1</artifactId>
<version>2.36.0</version> <version>2.36.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.knecon.fforesight</groupId> <groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service-internal-api</artifactId> <artifactId>layoutparser-service-internal-api</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>
<version>6.2.0</version>
</dependency>
<dependency> <dependency>
<groupId>com.iqser.red.commons</groupId> <groupId>com.iqser.red.commons</groupId>
<artifactId>storage-commons</artifactId> <artifactId>storage-commons</artifactId>
<version>1.13.0</version> <version>1.13.0</version>
</dependency> </dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>
<version>6.2.0</version>
</dependency>
<dependency> <dependency>
<groupId>com.dslplatform</groupId> <groupId>com.dslplatform</groupId>
<artifactId>dsl-json-java8</artifactId> <artifactId>dsl-json-java8</artifactId>
<version>1.10.0</version> <version>1.10.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId> <artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version> <version>${pdfbox.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.pdfbox</groupId> <groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId> <artifactId>pdfbox-tools</artifactId>
<version>${pdfbox.version}</version> <version>${pdfbox.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
<version>31.1-jre</version> <version>31.1-jre</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.module</groupId> <groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-afterburner</artifactId> <artifactId>jackson-module-afterburner</artifactId>
<version>${jackson.version}</version> <version>${jackson.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.datatype</groupId> <groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId> <artifactId>jackson-datatype-jsr310</artifactId>
<version>${jackson.version}</version> <version>${jackson.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId> <artifactId>spring-boot-starter-security</artifactId>
<version>${spring.version}</version> <version>${spring.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
<version>${spring.version}</version> <version>${spring.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.projectlombok</groupId> <groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
<optional>true</optional> <optional>true</optional>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.cloud</groupId> <groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId> <artifactId>spring-cloud-starter-openfeign</artifactId>
<version>4.0.2</version> <version>4.0.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId> <artifactId>spring-boot-starter-amqp</artifactId>
@ -105,22 +89,6 @@
</dependency> </dependency>
</dependencies> </dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<repositories> <repositories>
<repository> <repository>
<id>spring-milestones</id> <id>spring-milestones</id>

View File

@ -52,15 +52,17 @@ public class DocumentGraphFactory {
public DocumentGraph buildDocumentGraph(ClassificationDocument document) { public DocumentGraph buildDocumentGraph(ClassificationDocument document) {
TextBlockFactory textBlockFactory = new TextBlockFactory(); TextBlockFactory textBlockFactory = new TextBlockFactory();
Context context = new Context(new TableOfContents(), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory); DocumentGraph documentGraph = new DocumentGraph();
Context context = new Context(new TableOfContents(documentGraph), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory);
document.getPages().stream().map(this::buildPage).forEach(page -> context.pages().put(page, new AtomicInteger(1))); document.getPages().stream().map(this::buildPage).forEach(page -> context.pages().put(page, new AtomicInteger(1)));
document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.images().add(image)); document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.images().add(image));
addSections(document, context); addSections(document, context);
addHeaderAndFooterToEachPage(document, context); addHeaderAndFooterToEachPage(document, context);
DocumentGraph documentGraph = DocumentGraph.builder().numberOfPages(context.pages.size()).pages(context.pages.keySet()).tableOfContents(context.tableOfContents).build(); documentGraph.setNumberOfPages(context.pages.size());
documentGraph.setPages(context.pages.keySet());
documentGraph.setTableOfContents(context.tableOfContents);
documentGraph.setTextBlock(documentGraph.buildTextBlock()); documentGraph.setTextBlock(documentGraph.buildTextBlock());
return documentGraph; return documentGraph;
} }
@ -82,7 +84,7 @@ public class DocumentGraphFactory {
List<Integer> tocId; List<Integer> tocId;
if (parentNode == null) { if (parentNode == null) {
tocId = context.tableOfContents.createNewEntryAndReturnId(NodeType.SECTION, sectionNode); tocId = context.tableOfContents.createNewMainEntryAndReturnId(NodeType.SECTION, sectionNode);
} else { } else {
tocId = context.tableOfContents.createNewChildEntryAndReturnId(parentNode.getTocId(), NodeType.SECTION, sectionNode); tocId = context.tableOfContents.createNewChildEntryAndReturnId(parentNode.getTocId(), NodeType.SECTION, sectionNode);
} }
@ -309,7 +311,7 @@ public class DocumentGraphFactory {
footer, footer,
context, context,
page); page);
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer); List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
footer.setTocId(tocId); footer.setTocId(tocId);
footer.setTerminalTextBlock(textBlock); footer.setTerminalTextBlock(textBlock);
page.setFooter(footer); page.setFooter(footer);
@ -325,7 +327,7 @@ public class DocumentGraphFactory {
context, context,
0, 0,
page); page);
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header); List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
header.setTocId(tocId); header.setTocId(tocId);
header.setTerminalTextBlock(textBlock); header.setTerminalTextBlock(textBlock);
page.setHeader(header); page.setHeader(header);
@ -337,7 +339,7 @@ public class DocumentGraphFactory {
PageNode page = getPage(pageIndex, context); PageNode page = getPage(pageIndex, context);
FooterNode footer = FooterNode.builder().tableOfContents(context.tableOfContents()).build(); FooterNode footer = FooterNode.builder().tableOfContents(context.tableOfContents()).build();
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page); AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer); List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
footer.setTocId(tocId); footer.setTocId(tocId);
footer.setTerminalTextBlock(textBlock); footer.setTerminalTextBlock(textBlock);
page.setFooter(footer); page.setFooter(footer);
@ -349,7 +351,7 @@ public class DocumentGraphFactory {
PageNode page = getPage(pageIndex, context); PageNode page = getPage(pageIndex, context);
HeaderNode header = HeaderNode.builder().tableOfContents(context.tableOfContents()).build(); HeaderNode header = HeaderNode.builder().tableOfContents(context.tableOfContents()).build();
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page); AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header); List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
header.setTocId(tocId); header.setTocId(tocId);
header.setTerminalTextBlock(textBlock); header.setTerminalTextBlock(textBlock);
page.setHeader(header); page.setHeader(header);

View File

@ -10,7 +10,6 @@
</parent> </parent>
<artifactId>layoutparser-service-server</artifactId> <artifactId>layoutparser-service-server</artifactId>
<version>1.0.0</version>
<dependencies> <dependencies>
<dependency> <dependency>
@ -18,7 +17,11 @@
<artifactId>layoutparser-service-processor</artifactId> <artifactId>layoutparser-service-processor</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency>
<groupId>com.iqser.red.commons</groupId>
<artifactId>spring-commons</artifactId>
<version>6.2.0</version>
</dependency>
<dependency> <dependency>
<groupId>javax.servlet</groupId> <groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId> <artifactId>javax.servlet-api</artifactId>
@ -29,13 +32,11 @@
<artifactId>spring-cloud-starter-openfeign</artifactId> <artifactId>spring-cloud-starter-openfeign</artifactId>
<version>4.0.2</version> <version>4.0.2</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-amqp</artifactId> <artifactId>spring-boot-starter-amqp</artifactId>
<version>${spring.version}</version> <version>${spring.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.junit.jupiter</groupId> <groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId> <artifactId>junit-jupiter-api</artifactId>
@ -54,5 +55,28 @@
<version>6.0.3</version> <version>6.0.3</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>5.3.0</version>
<scope>test</scope>
</dependency>
</dependencies> </dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project> </project>

View File

@ -1,7 +0,0 @@
package com.knecon.fforesight.service.layoutparser.server;
import static org.junit.jupiter.api.Assertions.*;
class ApplicationTest {
}

View File

@ -2,7 +2,9 @@ package com.knecon.fforesight.service.layoutparser.server;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
class LayoutParserApplicationTests extends BaseTest { import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
class ApplicationTests extends BaseTest {
@Test @Test
void contextLoads() { void contextLoads() {

View File

@ -0,0 +1,51 @@
package com.knecon.fforesight.service.layoutparser.server.graph;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.InputStream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
import lombok.SneakyThrows;
public class BuildDocumentGraphTest extends BaseTest {
@Autowired
private LayoutParsingService layoutParsingService;
@Test
public void buildMetolachlor() {
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
assertEquals(221, documentGraph.getPages().size());
assertEquals(220 , documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
assertEquals(0 , documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
}
@SneakyThrows
protected DocumentGraph buildGraph(String filename) {
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename + ".pdf");
}
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
try (InputStream inputStream = fileResource.getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
}
}
}

View File

@ -4,14 +4,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.wildfly.common.Assert.assertTrue; import static org.wildfly.common.Assert.assertTrue;
import java.io.InputStream;
import java.util.List; import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary; import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph; import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
@ -26,21 +22,13 @@ import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.Table
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.TableNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.TableNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock; import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService; import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService; import com.knecon.fforesight.service.layoutparser.server.utils.TestEntity;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
import com.knecon.fforesight.service.layoutparser.server.TestEntity;
import lombok.SneakyThrows; public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
public class DocumentGraphTest extends BaseTest {
@Autowired @Autowired
private EntityInsertionService entityInsertionService; private EntityInsertionService entityInsertionService;
@Autowired
private LayoutParsingService layoutParsingService;
@Test @Test
public void assertTextBeforeAndTextAfterForParagraphCrafted() { public void assertTextBeforeAndTextAfterForParagraphCrafted() {
@ -51,7 +39,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
assertEquals("Expand to Hint ", entityNode.getTextBefore()); assertEquals("Expand to Hint ", entityNode.getTextBefore());
@ -76,7 +64,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
assertEquals("", entityNode.getTextBefore()); assertEquals("", entityNode.getTextBefore());
@ -132,7 +120,7 @@ public class DocumentGraphTest extends BaseTest {
DocumentGraph documentGraph = buildGraph("files/crafted document"); DocumentGraph documentGraph = buildGraph("files/crafted document");
TableNode table = (TableNode) documentGraph.getTableOfContents()// TableNode table = (TableNode) documentGraph.getTableOfContents()//
.streamEntriesInOrder()// .streamAllEntriesInOrder()//
.filter(entry -> entry.type().equals(NodeType.TABLE))// .filter(entry -> entry.type().equals(NodeType.TABLE))//
.map(TableOfContents.Entry::node)// .map(TableOfContents.Entry::node)//
.findFirst().orElseThrow(); .findFirst().orElseThrow();
@ -162,7 +150,7 @@ public class DocumentGraphTest extends BaseTest {
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"); DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
TableNode table = (TableNode) documentGraph.getTableOfContents() TableNode table = (TableNode) documentGraph.getTableOfContents()
.streamEntriesInOrder() .streamAllEntriesInOrder()
.filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22)) .filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22))
.filter(entry -> entry.type().equals(NodeType.TABLE)) .filter(entry -> entry.type().equals(NodeType.TABLE))
.map(TableOfContents.Entry::node) .map(TableOfContents.Entry::node)
@ -187,7 +175,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore()); assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
@ -214,7 +202,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
assertEquals("2.6.1 Summary of ", entityNode.getTextBefore()); assertEquals("2.6.1 Summary of ", entityNode.getTextBefore());
@ -240,7 +228,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore()); assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
@ -258,23 +246,6 @@ public class DocumentGraphTest extends BaseTest {
} }
@SneakyThrows
protected DocumentGraph buildGraph(String filename) {
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
} else {
prepareStorage(filename + ".pdf");
}
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
try (InputStream inputStream = fileResource.getInputStream()) {
PDDocument pdDocument = Loader.loadPDF(inputStream);
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
}
}
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) { private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()// List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
@ -293,7 +264,7 @@ public class DocumentGraphTest extends BaseTest {
assert start != -1; assert start != -1;
Boundary boundary = new Boundary(start, start + searchTerm.length()); Boundary boundary = new Boundary(start, start + searchTerm.length());
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123"); TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents()); entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
PageNode pageNode = documentGraph.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow(); PageNode pageNode = documentGraph.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();

View File

@ -1,31 +1,18 @@
package com.knecon.fforesight.service.layoutparser.server.graph; package com.knecon.fforesight.service.layoutparser.server.graph;
import java.util.Collections;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph; import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper; import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
import com.knecon.fforesight.service.layoutparser.processor.classification.service.PdfParsingService;
import com.knecon.fforesight.service.layoutparser.processor.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
import lombok.SneakyThrows; import lombok.SneakyThrows;
public class DocumentGraphMappingTest extends BaseTest { public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
@Autowired
private DocumentGraphFactory documentGraphFactory;
@Autowired
private PdfParsingService pdfParsingService;
@Autowired @Autowired
private DocumentDataMapper documentDataMapper; private DocumentDataMapper documentDataMapper;
@ -38,17 +25,10 @@ public class DocumentGraphMappingTest extends BaseTest {
@SneakyThrows @SneakyThrows
public void testGraphMapping() { public void testGraphMapping() {
String filename = "files/crafted document"; DocumentGraph document = buildGraph("files/crafted document");
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
prepareStorage(filename + ".pdf");
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
LayoutParsingRequest layoutParsingRequest = prepareStorage(fileResource.getInputStream());
PDDocument pdDocument = Loader.loadPDF(fileResource.getInputStream());
var classifiedDoc = pdfParsingService.parseDocument(pdDocument, Collections.emptyMap(), Collections.emptyMap());
DocumentGraph document = documentGraphFactory.buildDocumentGraph(classifiedDoc);
DocumentData documentData = documentDataMapper.toDocumentData(document); DocumentData documentData = documentDataMapper.toDocumentData(document);
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData); layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest); DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
DocumentGraph newDocumentGraph = documentGraphMapper.toDocumentGraph(documentData2); DocumentGraph newDocumentGraph = documentGraphMapper.toDocumentGraph(documentData2);

View File

@ -12,37 +12,50 @@ import org.springframework.core.io.ClassPathResource;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph; import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock; import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.server.visualizations.PdfDraw; import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
import lombok.SneakyThrows; import lombok.SneakyThrows;
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
@Disabled
public class DocumentGraphVisualizationTest extends DocumentGraphTest {
@Test @Test
@SneakyThrows @SneakyThrows
@Disabled
public void visualizeMetolachlor() { public void visualizeMetolachlor() {
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06"; String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06";
visualizePdf(filename);
DocumentGraph documentGraph = buildGraph(filename);
TextBlock textBlock = documentGraph.buildTextBlock();
visualizeSemanticNodes(filename, documentGraph, textBlock);
} }
@Test @Test
@SneakyThrows @SneakyThrows
@Disabled
public void visualizeRotatedTestDocument() { public void visualizeRotatedTestDocument() {
String filename = "files/RotateTestFileWithImages"; String filename = "files/RotateTestFileWithImages";
visualizePdf(filename);
}
@Test
@SneakyThrows
@Disabled
public void visualizeCraftedDocument() {
String filename = "files/crafted document";
visualizePdf(filename);
}
@SneakyThrows
private void visualizePdf(String filename) {
DocumentGraph documentGraph = buildGraph(filename); DocumentGraph documentGraph = buildGraph(filename);
TextBlock textBlock = documentGraph.buildTextBlock(); TextBlock textBlock = documentGraph.buildTextBlock();
visualizeSemanticNodes(filename, documentGraph, textBlock); visualizeSemanticNodes(filename, documentGraph, textBlock);
} }

View File

@ -1,9 +1,10 @@
package com.knecon.fforesight.service.layoutparser.server; package com.knecon.fforesight.service.layoutparser.server.utils;
import java.io.InputStream; import java.io.InputStream;
import java.util.Optional; import java.util.Optional;
import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.amqp.rabbit.core.RabbitTemplate; import org.springframework.amqp.rabbit.core.RabbitTemplate;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -12,6 +13,7 @@ import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Primary; import org.springframework.context.annotation.Primary;
@ -22,9 +24,12 @@ import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper; import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService; import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService; import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext; import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.server.Application;
import lombok.SneakyThrows; import lombok.SneakyThrows;
@ -39,6 +44,9 @@ public class BaseTest {
@Autowired @Autowired
protected StorageService storageService; protected StorageService storageService;
@Autowired
protected TenantsClient tenantsClient;
@MockBean @MockBean
private RabbitTemplate rabbitTemplate; private RabbitTemplate rabbitTemplate;
@ -49,6 +57,35 @@ public class BaseTest {
protected final static String TEXT_FILE_ID = "texts"; protected final static String TEXT_FILE_ID = "texts";
protected final static String POSITION_FILE_ID = "positions"; protected final static String POSITION_FILE_ID = "positions";
protected final static String PAGES_FILE_ID = "pages"; protected final static String PAGES_FILE_ID = "pages";
protected final static String TENANT_ID = "tenant";
protected LayoutParsingRequest buildStandardLayoutParsingRequest() {
return LayoutParsingRequest.builder()
.imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
.originFileStorageId(ORIGIN_FILE_ID)
.tablesFileStorageId(Optional.of(TABLE_FILE_ID))
.pageFileStorageId(PAGES_FILE_ID)
.positionBlockFileStorageId(POSITION_FILE_ID)
.structureFileStorageId(STRUCTURE_FILE_ID)
.textBlockFileStorageId(TEXT_FILE_ID)
.build();
}
@BeforeEach
public void setupTenantContext() {
TenantContext.setTenantId(TENANT_ID);
}
@AfterEach
public void clearTenantContext() {
TenantContext.clear();
}
@SneakyThrows @SneakyThrows
@ -115,6 +152,7 @@ public class BaseTest {
@Configuration @Configuration
@EnableAutoConfiguration(exclude = RabbitAutoConfiguration.class) @EnableAutoConfiguration(exclude = RabbitAutoConfiguration.class)
@ComponentScan("com.knecon.fforesight.service.layoutparser")
public static class TestConfiguration { public static class TestConfiguration {
@Bean @Bean
@ -125,13 +163,6 @@ public class BaseTest {
} }
@Bean
public EntityEnrichmentService testEntityEnrichmentService() {
return new TestEntityEnrichmentService();
}
@Bean @Bean
public DocumentDataMapper documentDataMapper() { public DocumentDataMapper documentDataMapper() {
@ -145,6 +176,14 @@ public class BaseTest {
return new DocumentGraphMapper(); return new DocumentGraphMapper();
} }
@Bean
@Autowired
public EntityInsertionService entityInsertionService(EntityEnrichmentService entityEnrichmentService) {
return new EntityInsertionService(entityEnrichmentService);
}
} }
} }

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.server; package com.knecon.fforesight.service.layoutparser.server.utils;
import static java.io.File.createTempFile; import static java.io.File.createTempFile;
@ -14,6 +14,7 @@ import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.springframework.core.io.InputStreamResource; import org.springframework.core.io.InputStreamResource;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.iqser.red.commons.jackson.ObjectMapperFactory; import com.iqser.red.commons.jackson.ObjectMapperFactory;
@ -22,6 +23,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import lombok.SneakyThrows; import lombok.SneakyThrows;
@Service
public class FileSystemBackedStorageService implements StorageService { public class FileSystemBackedStorageService implements StorageService {
private final Map<String, File> dataMap = new HashMap<>(); private final Map<String, File> dataMap = new HashMap<>();

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.server; package com.knecon.fforesight.service.layoutparser.server.utils;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.HashSet; import java.util.HashSet;

View File

@ -1,15 +1,18 @@
package com.knecon.fforesight.service.layoutparser.server; package com.knecon.fforesight.service.layoutparser.server.utils;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode; import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock; import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService; import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
@Service
@RequiredArgsConstructor @RequiredArgsConstructor
public class TestEntityEnrichmentService implements EntityEnrichmentService { public class TestEntityEnrichmentService implements EntityEnrichmentService {

View File

@ -0,0 +1,42 @@
package com.knecon.fforesight.service.layoutparser.server.utils;
import java.util.Collections;
import java.util.List;
import org.springframework.stereotype.Service;
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantRequest;
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantResponse;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
@Service
public class TestTenantsClient implements TenantsClient {
@Override
public void createTenant(TenantRequest tenantRequest) {
}
@Override
public List<TenantResponse> getTenants() {
return Collections.emptyList();
}
@Override
public TenantResponse getTenant(String tenantId) {
return null;
}
@Override
public JSONPrimitive<String> getDeploymentKey(String tenantId) {
return null;
}
}

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.server.visualizations; package com.knecon.fforesight.service.layoutparser.server.utils.visualizations;
import java.awt.Color; import java.awt.Color;
import java.awt.geom.Point2D; import java.awt.geom.Point2D;
@ -36,7 +36,7 @@ public class PdfDraw {
public static void drawDocumentGraph(PDDocument document, DocumentGraph documentGraph) { public static void drawDocumentGraph(PDDocument document, DocumentGraph documentGraph) {
documentGraph.getTableOfContents().streamEntriesInOrder().forEach(entry -> drawNode(document, entry)); documentGraph.getTableOfContents().streamAllEntriesInOrder().forEach(entry -> drawNode(document, entry));
} }
@ -72,8 +72,11 @@ public class PdfDraw {
contentStream.setLineWidth(options.getStrokeWidth()); contentStream.setLineWidth(options.getStrokeWidth());
contentStream.beginText(); contentStream.beginText();
contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(30), 0, 0)); if (rotate) {
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY()); contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(15), (float) location.getX(), (float) location.getY()));
} else {
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
}
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10); contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10);
contentStream.showText(string); contentStream.showText(string);
contentStream.endText(); contentStream.endText();
@ -136,6 +139,7 @@ public class PdfDraw {
private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) { private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) {
return Options.builder().stroke(true).strokeColor(switch (entry.type()) { return Options.builder().stroke(true).strokeColor(switch (entry.type()) {
case DOCUMENT -> Color.LIGHT_GRAY;
case HEADER, FOOTER -> Color.GREEN; case HEADER, FOOTER -> Color.GREEN;
case PARAGRAPH -> Color.BLUE; case PARAGRAPH -> Color.BLUE;
case HEADLINE -> Color.RED; case HEADLINE -> Color.RED;