RED-6009: Document Tree Structure
*fixed dependecny issus
This commit is contained in:
parent
aac0259caf
commit
2ed617bb03
6
.gitignore
vendored
6
.gitignore
vendored
@ -4,6 +4,12 @@ target/
|
||||
!**/src/main/**/target/
|
||||
!**/src/test/**/target/
|
||||
|
||||
### maven build ###
|
||||
*.class
|
||||
/out/
|
||||
**/out/
|
||||
**/target/
|
||||
|
||||
### STS ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
|
||||
@ -2,8 +2,8 @@
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.knecon.fforesight</groupId>
|
||||
@ -12,9 +12,6 @@
|
||||
</parent>
|
||||
|
||||
<artifactId>layoutparser-service-internal-api</artifactId>
|
||||
<version>1.0.0</version>
|
||||
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
@ -29,21 +26,4 @@
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
|
||||
@ -5,8 +5,6 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import javax.management.openmbean.InvalidKeyException;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -23,15 +21,15 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class TableOfContentsData {
|
||||
|
||||
List<EntryData> entries;
|
||||
EntryData root;
|
||||
|
||||
|
||||
public EntryData get(List<Integer> tocId) {
|
||||
|
||||
if (tocId.size() < 1) {
|
||||
throw new InvalidKeyException(String.format("ClassificationSection Identifier: \"%s\" is not valid.", tocId));
|
||||
if (tocId.isEmpty()) {
|
||||
return root;
|
||||
}
|
||||
EntryData entry = entries.get(tocId.get(0));
|
||||
EntryData entry = root.subEntries.get(tocId.get(0));
|
||||
for (int id : tocId.subList(1, tocId.size())) {
|
||||
entry = entry.subEntries().get(id);
|
||||
}
|
||||
@ -41,7 +39,7 @@ public class TableOfContentsData {
|
||||
|
||||
public Stream<EntryData> streamAllEntries() {
|
||||
|
||||
return entries.stream().flatMap(TableOfContentsData::flatten);
|
||||
return Stream.concat(Stream.of(root), root.subEntries.stream()).flatMap(TableOfContentsData::flatten);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SectionNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SemanticNode;
|
||||
@ -20,11 +21,13 @@ import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.experimental.FieldDefaults;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentGraph implements SemanticNode {
|
||||
|
||||
@ -42,7 +45,7 @@ public class DocumentGraph implements SemanticNode {
|
||||
|
||||
public List<SectionNode> getMainSections() {
|
||||
|
||||
return tableOfContents.entries.stream().filter(entry -> entry.node() instanceof SectionNode).map(entry -> (SectionNode) entry.node()).collect(Collectors.toList());
|
||||
return streamChildren().filter(node -> node instanceof SectionNode).map(node -> (SectionNode) node).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -74,14 +77,14 @@ public class DocumentGraph implements SemanticNode {
|
||||
|
||||
private Stream<SemanticNode> streamAllNodes() {
|
||||
|
||||
return tableOfContents.streamEntriesInOrder().map(TableOfContents.Entry::node);
|
||||
return tableOfContents.streamAllEntriesInOrder().map(TableOfContents.Entry::node);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return tableOfContents.toString();
|
||||
return NodeType.DOCUMENT + ": " + buildTextBlock().buildSummary();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.graph;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.google.common.hash.Hashing;
|
||||
@ -19,22 +20,22 @@ import lombok.Data;
|
||||
@Data
|
||||
public class TableOfContents {
|
||||
|
||||
List<Entry> entries;
|
||||
private final Entry root;
|
||||
|
||||
|
||||
public TableOfContents() {
|
||||
public TableOfContents(DocumentGraph documentGraph) {
|
||||
|
||||
entries = new LinkedList<>();
|
||||
root = Entry.builder().tocId(Collections.emptyList()).type(NodeType.DOCUMENT).children(new LinkedList<>()).node(documentGraph).build();
|
||||
}
|
||||
|
||||
|
||||
public TextBlock buildTextBlock() {
|
||||
|
||||
return streamEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
|
||||
return streamAllEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
public List<Integer> createNewEntryAndReturnId(NodeType nodeType, SemanticNode node) {
|
||||
public List<Integer> createNewMainEntryAndReturnId(NodeType nodeType, SemanticNode node) {
|
||||
|
||||
return createNewChildEntryAndReturnId(Collections.emptyList(), nodeType, node);
|
||||
}
|
||||
@ -42,27 +43,25 @@ public class TableOfContents {
|
||||
|
||||
public List<Integer> createNewChildEntryAndReturnId(List<Integer> parentId, NodeType nodeType, SemanticNode node) {
|
||||
|
||||
List<Integer> newId;
|
||||
if (entryExists(parentId)) {
|
||||
Entry parent = getEntryById(parentId);
|
||||
newId = new LinkedList<>(parentId);
|
||||
newId.add(parent.children().size());
|
||||
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
||||
} else {
|
||||
newId = List.of(entries.size());
|
||||
entries.add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
||||
if (!entryExists(parentId)) {
|
||||
throw new UnsupportedOperationException(format("parentId %s does not exist!", parentId));
|
||||
}
|
||||
|
||||
Entry parent = getEntryById(parentId);
|
||||
List<Integer> newId = new LinkedList<>(parentId);
|
||||
newId.add(parent.children().size());
|
||||
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
||||
|
||||
return newId;
|
||||
}
|
||||
|
||||
|
||||
private boolean entryExists(List<Integer> tocId) {
|
||||
|
||||
if (tocId.size() < 1) {
|
||||
return false;
|
||||
if (tocId.isEmpty()) {
|
||||
return root != null;
|
||||
}
|
||||
Entry entry = entries.get(tocId.get(0));
|
||||
Entry entry = root.children.get(tocId.get(0));
|
||||
for (int id : tocId.subList(1, tocId.size())) {
|
||||
if (id >= entry.children.size() || 0 > id) {
|
||||
return false;
|
||||
@ -75,22 +74,17 @@ public class TableOfContents {
|
||||
|
||||
public Entry getParentEntryById(List<Integer> tocId) {
|
||||
|
||||
List<Integer> parentIds = getParentId(tocId);
|
||||
if (parentIds.size() < 1) {
|
||||
throw new NoSuchElementException(String.format("Node with tocId \"%s\" has no parent!", tocId));
|
||||
}
|
||||
return getEntryById(parentIds);
|
||||
return getEntryById(getParentId(tocId));
|
||||
}
|
||||
|
||||
|
||||
public boolean hasParentById(List<Integer> tocId) {
|
||||
|
||||
List<Integer> parentId = getParentId(tocId);
|
||||
return entryExists(parentId);
|
||||
return entryExists(getParentId(tocId));
|
||||
}
|
||||
|
||||
|
||||
public Stream<SemanticNode> streamChildren(List<Integer> tocId) {
|
||||
public Stream<SemanticNode> streamChildrenNodes(List<Integer> tocId) {
|
||||
|
||||
return getEntryById(tocId).children().stream().map(Entry::node);
|
||||
}
|
||||
@ -98,13 +92,22 @@ public class TableOfContents {
|
||||
|
||||
private static List<Integer> getParentId(List<Integer> tocId) {
|
||||
|
||||
if (tocId.isEmpty()) {
|
||||
throw new UnsupportedOperationException("Root has no parent!");
|
||||
}
|
||||
if (tocId.size() < 2) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return tocId.subList(0, tocId.size() - 1);
|
||||
}
|
||||
|
||||
|
||||
public Entry getEntryById(List<Integer> tocId) {
|
||||
|
||||
Entry entry = entries.get(tocId.get(0));
|
||||
if (tocId.isEmpty()) {
|
||||
return root;
|
||||
}
|
||||
Entry entry = root.children.get(tocId.get(0));
|
||||
for (int id : tocId.subList(1, tocId.size())) {
|
||||
entry = entry.children().get(id);
|
||||
}
|
||||
@ -112,13 +115,19 @@ public class TableOfContents {
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> streamEntriesInOrder() {
|
||||
public Stream<Entry> streamMainEntries() {
|
||||
|
||||
return entries.stream().flatMap(TableOfContents::flatten);
|
||||
return root.children.stream();
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> streamSubEntriesInOrder(List<Integer> parentId) {
|
||||
public Stream<Entry> streamAllEntriesInOrder() {
|
||||
|
||||
return Stream.of(root).flatMap(TableOfContents::flatten);
|
||||
}
|
||||
|
||||
|
||||
public Stream<Entry> streamAllSubEntriesInOrder(List<Integer> parentId) {
|
||||
|
||||
return Stream.of(getEntryById(parentId)).flatMap(TableOfContents::flatten);
|
||||
}
|
||||
@ -127,13 +136,13 @@ public class TableOfContents {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
return String.join("\n", streamEntriesInOrder().map(Entry::toString).toList());
|
||||
return String.join("\n", streamAllEntriesInOrder().map(Entry::toString).toList());
|
||||
}
|
||||
|
||||
|
||||
public String toString(List<Integer> id) {
|
||||
|
||||
return String.join("\n", streamSubEntriesInOrder(id).map(Entry::toString).toList());
|
||||
return String.join("\n", streamAllSubEntriesInOrder(id).map(Entry::toString).toList());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes;
|
||||
|
||||
public enum NodeType {
|
||||
DOCUMENT,
|
||||
SECTION,
|
||||
HEADLINE,
|
||||
PARAGRAPH,
|
||||
|
||||
@ -205,7 +205,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<SemanticNode> streamChildren() {
|
||||
|
||||
return getTableOfContents().streamChildren(getTocId());
|
||||
return getTableOfContents().streamChildrenNodes(getTocId());
|
||||
}
|
||||
|
||||
|
||||
@ -216,7 +216,7 @@ public interface SemanticNode {
|
||||
*/
|
||||
default Stream<SemanticNode> streamAllSubNodes() {
|
||||
|
||||
return getTableOfContents().streamSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
|
||||
return getTableOfContents().streamAllSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@ public class DocumentDataMapper {
|
||||
|
||||
private TableOfContentsData toTableOfContentsData(TableOfContents tableOfContents) {
|
||||
|
||||
return new TableOfContentsData(tableOfContents.getEntries().stream().map(this::toEntryData).toList());
|
||||
return new TableOfContentsData(toEntryData(tableOfContents.getRoot()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -38,8 +38,10 @@ public class DocumentGraphMapper {
|
||||
|
||||
public DocumentGraph toDocumentGraph(DocumentData documentData) {
|
||||
|
||||
|
||||
DocumentGraph documentGraph = new DocumentGraph();
|
||||
Context context = new Context(documentData,
|
||||
new TableOfContents(),
|
||||
new TableOfContents(documentGraph),
|
||||
new LinkedList<>(),
|
||||
new LinkedList<>(),
|
||||
Arrays.stream(documentData.getAtomicTextBlocks()).toList(),
|
||||
@ -47,13 +49,12 @@ public class DocumentGraphMapper {
|
||||
|
||||
context.pages.addAll(Arrays.stream(documentData.getPages()).map(this::buildPage).toList());
|
||||
|
||||
context.tableOfContents.setEntries(buildEntries(documentData.getTableOfContents().getEntries(), context));
|
||||
context.tableOfContents.getRoot().children().addAll(buildEntries(documentData.getTableOfContents().getRoot().subEntries(), context));
|
||||
|
||||
documentGraph.setTableOfContents(context.tableOfContents);
|
||||
documentGraph.setPages(new HashSet<>(context.pages));
|
||||
documentGraph.setNumberOfPages(documentData.getPages().length);
|
||||
|
||||
DocumentGraph documentGraph = DocumentGraph.builder()
|
||||
.numberOfPages(documentData.getPages().length)
|
||||
.pages(new HashSet<>(context.pages))
|
||||
.tableOfContents(context.tableOfContents)
|
||||
.build();
|
||||
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
||||
return documentGraph;
|
||||
}
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.services;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
@ -21,9 +22,7 @@ public class EntityInsertionService {
|
||||
public void addEntityToGraph(EntityNode entity, TableOfContents tableOfContents) {
|
||||
|
||||
try {
|
||||
SemanticNode containingNode = tableOfContents.getEntries()
|
||||
.stream()
|
||||
.map(TableOfContents.Entry::node)
|
||||
SemanticNode containingNode = tableOfContents.streamChildrenNodes(Collections.emptyList())
|
||||
.filter(node -> node.buildTextBlock().containsBoundary(entity.getBoundary()))
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
|
||||
@ -37,7 +36,6 @@ public class EntityInsertionService {
|
||||
addToNodeEntitySets(entity);
|
||||
|
||||
} catch (NoSuchElementException e) {
|
||||
entityEnrichmentService.enrichEntity(entity, tableOfContents.buildTextBlock());
|
||||
entity.removeFromGraph();
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,94 +10,78 @@
|
||||
</parent>
|
||||
|
||||
<artifactId>layoutparser-service-processor</artifactId>
|
||||
<version>1.0.0</version>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.service</groupId>
|
||||
<artifactId>persistence-service-internal-api-v1</artifactId>
|
||||
<version>2.36.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.knecon.fforesight</groupId>
|
||||
<artifactId>layoutparser-service-internal-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>spring-commons</artifactId>
|
||||
<version>6.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>storage-commons</artifactId>
|
||||
<version>1.13.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>spring-commons</artifactId>
|
||||
<version>6.2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.dslplatform</groupId>
|
||||
<artifactId>dsl-json-java8</artifactId>
|
||||
<version>1.10.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>31.1-jre</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-afterburner</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-jsr310</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-security</artifactId>
|
||||
<version>${spring.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
<version>${spring.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-amqp</artifactId>
|
||||
@ -105,22 +89,6 @@
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>spring-milestones</id>
|
||||
|
||||
@ -52,15 +52,17 @@ public class DocumentGraphFactory {
|
||||
public DocumentGraph buildDocumentGraph(ClassificationDocument document) {
|
||||
|
||||
TextBlockFactory textBlockFactory = new TextBlockFactory();
|
||||
Context context = new Context(new TableOfContents(), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory);
|
||||
DocumentGraph documentGraph = new DocumentGraph();
|
||||
Context context = new Context(new TableOfContents(documentGraph), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory);
|
||||
|
||||
document.getPages().stream().map(this::buildPage).forEach(page -> context.pages().put(page, new AtomicInteger(1)));
|
||||
document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.images().add(image));
|
||||
addSections(document, context);
|
||||
addHeaderAndFooterToEachPage(document, context);
|
||||
|
||||
DocumentGraph documentGraph = DocumentGraph.builder().numberOfPages(context.pages.size()).pages(context.pages.keySet()).tableOfContents(context.tableOfContents).build();
|
||||
|
||||
documentGraph.setNumberOfPages(context.pages.size());
|
||||
documentGraph.setPages(context.pages.keySet());
|
||||
documentGraph.setTableOfContents(context.tableOfContents);
|
||||
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
||||
return documentGraph;
|
||||
}
|
||||
@ -82,7 +84,7 @@ public class DocumentGraphFactory {
|
||||
|
||||
List<Integer> tocId;
|
||||
if (parentNode == null) {
|
||||
tocId = context.tableOfContents.createNewEntryAndReturnId(NodeType.SECTION, sectionNode);
|
||||
tocId = context.tableOfContents.createNewMainEntryAndReturnId(NodeType.SECTION, sectionNode);
|
||||
} else {
|
||||
tocId = context.tableOfContents.createNewChildEntryAndReturnId(parentNode.getTocId(), NodeType.SECTION, sectionNode);
|
||||
}
|
||||
@ -309,7 +311,7 @@ public class DocumentGraphFactory {
|
||||
footer,
|
||||
context,
|
||||
page);
|
||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer);
|
||||
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
|
||||
footer.setTocId(tocId);
|
||||
footer.setTerminalTextBlock(textBlock);
|
||||
page.setFooter(footer);
|
||||
@ -325,7 +327,7 @@ public class DocumentGraphFactory {
|
||||
context,
|
||||
0,
|
||||
page);
|
||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header);
|
||||
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
|
||||
header.setTocId(tocId);
|
||||
header.setTerminalTextBlock(textBlock);
|
||||
page.setHeader(header);
|
||||
@ -337,7 +339,7 @@ public class DocumentGraphFactory {
|
||||
PageNode page = getPage(pageIndex, context);
|
||||
FooterNode footer = FooterNode.builder().tableOfContents(context.tableOfContents()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer);
|
||||
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
|
||||
footer.setTocId(tocId);
|
||||
footer.setTerminalTextBlock(textBlock);
|
||||
page.setFooter(footer);
|
||||
@ -349,7 +351,7 @@ public class DocumentGraphFactory {
|
||||
PageNode page = getPage(pageIndex, context);
|
||||
HeaderNode header = HeaderNode.builder().tableOfContents(context.tableOfContents()).build();
|
||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header);
|
||||
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
|
||||
header.setTocId(tocId);
|
||||
header.setTerminalTextBlock(textBlock);
|
||||
page.setHeader(header);
|
||||
|
||||
@ -10,7 +10,6 @@
|
||||
</parent>
|
||||
|
||||
<artifactId>layoutparser-service-server</artifactId>
|
||||
<version>1.0.0</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
@ -18,7 +17,11 @@
|
||||
<artifactId>layoutparser-service-processor</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.iqser.red.commons</groupId>
|
||||
<artifactId>spring-commons</artifactId>
|
||||
<version>6.2.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>javax.servlet-api</artifactId>
|
||||
@ -29,13 +32,11 @@
|
||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-amqp</artifactId>
|
||||
<version>${spring.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter-api</artifactId>
|
||||
@ -54,5 +55,28 @@
|
||||
<version>6.0.3</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<version>5.3.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class ApplicationTest {
|
||||
|
||||
}
|
||||
@ -2,7 +2,9 @@ package com.knecon.fforesight.service.layoutparser.server;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class LayoutParserApplicationTests extends BaseTest {
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
|
||||
|
||||
class ApplicationTests extends BaseTest {
|
||||
|
||||
@Test
|
||||
void contextLoads() {
|
||||
@ -0,0 +1,51 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class BuildDocumentGraphTest extends BaseTest {
|
||||
|
||||
@Autowired
|
||||
private LayoutParsingService layoutParsingService;
|
||||
|
||||
@Test
|
||||
public void buildMetolachlor() {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
assertEquals(221, documentGraph.getPages().size());
|
||||
assertEquals(220 , documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
|
||||
assertEquals(0 , documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected DocumentGraph buildGraph(String filename) {
|
||||
|
||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
||||
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
} else {
|
||||
prepareStorage(filename + ".pdf");
|
||||
}
|
||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
||||
|
||||
try (InputStream inputStream = fileResource.getInputStream()) {
|
||||
PDDocument pdDocument = Loader.loadPDF(inputStream);
|
||||
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -4,14 +4,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||
import static org.wildfly.common.Assert.assertTrue;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||
@ -26,21 +22,13 @@ import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.Table
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.TableNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.TestEntity;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.TestEntity;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentGraphTest extends BaseTest {
|
||||
public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||
|
||||
@Autowired
|
||||
private EntityInsertionService entityInsertionService;
|
||||
|
||||
@Autowired
|
||||
private LayoutParsingService layoutParsingService;
|
||||
|
||||
|
||||
@Test
|
||||
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
|
||||
@ -51,7 +39,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
|
||||
assertEquals("Expand to Hint ", entityNode.getTextBefore());
|
||||
@ -76,7 +64,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
|
||||
assertEquals("", entityNode.getTextBefore());
|
||||
@ -132,7 +120,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||
TableNode table = (TableNode) documentGraph.getTableOfContents()//
|
||||
.streamEntriesInOrder()//
|
||||
.streamAllEntriesInOrder()//
|
||||
.filter(entry -> entry.type().equals(NodeType.TABLE))//
|
||||
.map(TableOfContents.Entry::node)//
|
||||
.findFirst().orElseThrow();
|
||||
@ -162,7 +150,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||
TableNode table = (TableNode) documentGraph.getTableOfContents()
|
||||
.streamEntriesInOrder()
|
||||
.streamAllEntriesInOrder()
|
||||
.filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22))
|
||||
.filter(entry -> entry.type().equals(NodeType.TABLE))
|
||||
.map(TableOfContents.Entry::node)
|
||||
@ -187,7 +175,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
|
||||
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
|
||||
@ -214,7 +202,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
|
||||
assertEquals("2.6.1 Summary of ", entityNode.getTextBefore());
|
||||
@ -240,7 +228,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
|
||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
|
||||
@ -258,23 +246,6 @@ public class DocumentGraphTest extends BaseTest {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
protected DocumentGraph buildGraph(String filename) {
|
||||
|
||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
||||
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||
} else {
|
||||
prepareStorage(filename + ".pdf");
|
||||
}
|
||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
||||
|
||||
try (InputStream inputStream = fileResource.getInputStream()) {
|
||||
PDDocument pdDocument = Loader.loadPDF(inputStream);
|
||||
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
|
||||
|
||||
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
|
||||
@ -293,7 +264,7 @@ public class DocumentGraphTest extends BaseTest {
|
||||
assert start != -1;
|
||||
|
||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||
PageNode pageNode = documentGraph.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
|
||||
|
||||
@ -1,31 +1,18 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.classification.service.PdfParsingService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentGraphMappingTest extends BaseTest {
|
||||
public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
|
||||
|
||||
@Autowired
|
||||
private DocumentGraphFactory documentGraphFactory;
|
||||
|
||||
@Autowired
|
||||
private PdfParsingService pdfParsingService;
|
||||
|
||||
@Autowired
|
||||
private DocumentDataMapper documentDataMapper;
|
||||
@ -38,17 +25,10 @@ public class DocumentGraphMappingTest extends BaseTest {
|
||||
@SneakyThrows
|
||||
public void testGraphMapping() {
|
||||
|
||||
String filename = "files/crafted document";
|
||||
DocumentGraph document = buildGraph("files/crafted document");
|
||||
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
|
||||
|
||||
prepareStorage(filename + ".pdf");
|
||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
||||
LayoutParsingRequest layoutParsingRequest = prepareStorage(fileResource.getInputStream());
|
||||
PDDocument pdDocument = Loader.loadPDF(fileResource.getInputStream());
|
||||
|
||||
var classifiedDoc = pdfParsingService.parseDocument(pdDocument, Collections.emptyMap(), Collections.emptyMap());
|
||||
DocumentGraph document = documentGraphFactory.buildDocumentGraph(classifiedDoc);
|
||||
DocumentData documentData = documentDataMapper.toDocumentData(document);
|
||||
|
||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
|
||||
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
|
||||
DocumentGraph newDocumentGraph = documentGraphMapper.toDocumentGraph(documentData2);
|
||||
|
||||
@ -12,37 +12,50 @@ import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.server.visualizations.PdfDraw;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
|
||||
@Disabled
|
||||
public class DocumentGraphVisualizationTest extends DocumentGraphTest {
|
||||
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@Disabled
|
||||
public void visualizeMetolachlor() {
|
||||
|
||||
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06";
|
||||
|
||||
DocumentGraph documentGraph = buildGraph(filename);
|
||||
TextBlock textBlock = documentGraph.buildTextBlock();
|
||||
|
||||
visualizeSemanticNodes(filename, documentGraph, textBlock);
|
||||
visualizePdf(filename);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@Disabled
|
||||
public void visualizeRotatedTestDocument() {
|
||||
|
||||
String filename = "files/RotateTestFileWithImages";
|
||||
visualizePdf(filename);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@Disabled
|
||||
public void visualizeCraftedDocument() {
|
||||
|
||||
String filename = "files/crafted document";
|
||||
visualizePdf(filename);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void visualizePdf(String filename) {
|
||||
|
||||
DocumentGraph documentGraph = buildGraph(filename);
|
||||
TextBlock textBlock = documentGraph.buildTextBlock();
|
||||
|
||||
visualizeSemanticNodes(filename, documentGraph, textBlock);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
@ -12,6 +13,7 @@ import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
@ -22,9 +24,12 @@ import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.Application;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@ -39,6 +44,9 @@ public class BaseTest {
|
||||
@Autowired
|
||||
protected StorageService storageService;
|
||||
|
||||
@Autowired
|
||||
protected TenantsClient tenantsClient;
|
||||
|
||||
@MockBean
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
@ -49,6 +57,35 @@ public class BaseTest {
|
||||
protected final static String TEXT_FILE_ID = "texts";
|
||||
protected final static String POSITION_FILE_ID = "positions";
|
||||
protected final static String PAGES_FILE_ID = "pages";
|
||||
protected final static String TENANT_ID = "tenant";
|
||||
|
||||
|
||||
protected LayoutParsingRequest buildStandardLayoutParsingRequest() {
|
||||
|
||||
return LayoutParsingRequest.builder()
|
||||
.imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
|
||||
.originFileStorageId(ORIGIN_FILE_ID)
|
||||
.tablesFileStorageId(Optional.of(TABLE_FILE_ID))
|
||||
.pageFileStorageId(PAGES_FILE_ID)
|
||||
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||
.structureFileStorageId(STRUCTURE_FILE_ID)
|
||||
.textBlockFileStorageId(TEXT_FILE_ID)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void setupTenantContext() {
|
||||
|
||||
TenantContext.setTenantId(TENANT_ID);
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void clearTenantContext() {
|
||||
|
||||
TenantContext.clear();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@ -115,6 +152,7 @@ public class BaseTest {
|
||||
|
||||
@Configuration
|
||||
@EnableAutoConfiguration(exclude = RabbitAutoConfiguration.class)
|
||||
@ComponentScan("com.knecon.fforesight.service.layoutparser")
|
||||
public static class TestConfiguration {
|
||||
|
||||
@Bean
|
||||
@ -125,13 +163,6 @@ public class BaseTest {
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public EntityEnrichmentService testEntityEnrichmentService() {
|
||||
|
||||
return new TestEntityEnrichmentService();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
public DocumentDataMapper documentDataMapper() {
|
||||
|
||||
@ -145,6 +176,14 @@ public class BaseTest {
|
||||
return new DocumentGraphMapper();
|
||||
}
|
||||
|
||||
|
||||
@Bean
|
||||
@Autowired
|
||||
public EntityInsertionService entityInsertionService(EntityEnrichmentService entityEnrichmentService) {
|
||||
|
||||
return new EntityInsertionService(entityEnrichmentService);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import static java.io.File.createTempFile;
|
||||
|
||||
@ -14,6 +14,7 @@ import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.springframework.core.io.InputStreamResource;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
@ -22,6 +23,7 @@ import com.iqser.red.storage.commons.service.StorageService;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Service
|
||||
public class FileSystemBackedStorageService implements StorageService {
|
||||
|
||||
private final Map<String, File> dataMap = new HashMap<>();
|
||||
@ -1,4 +1,4 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashSet;
|
||||
@ -1,15 +1,18 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class TestEntityEnrichmentService implements EntityEnrichmentService {
|
||||
|
||||
@ -0,0 +1,42 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantRequest;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
|
||||
|
||||
@Service
|
||||
public class TestTenantsClient implements TenantsClient {
|
||||
|
||||
@Override
|
||||
public void createTenant(TenantRequest tenantRequest) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<TenantResponse> getTenants() {
|
||||
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TenantResponse getTenant(String tenantId) {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public JSONPrimitive<String> getDeploymentKey(String tenantId) {
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.visualizations;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils.visualizations;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Point2D;
|
||||
@ -36,7 +36,7 @@ public class PdfDraw {
|
||||
|
||||
public static void drawDocumentGraph(PDDocument document, DocumentGraph documentGraph) {
|
||||
|
||||
documentGraph.getTableOfContents().streamEntriesInOrder().forEach(entry -> drawNode(document, entry));
|
||||
documentGraph.getTableOfContents().streamAllEntriesInOrder().forEach(entry -> drawNode(document, entry));
|
||||
}
|
||||
|
||||
|
||||
@ -72,8 +72,11 @@ public class PdfDraw {
|
||||
contentStream.setLineWidth(options.getStrokeWidth());
|
||||
|
||||
contentStream.beginText();
|
||||
contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(30), 0, 0));
|
||||
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
||||
if (rotate) {
|
||||
contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(15), (float) location.getX(), (float) location.getY()));
|
||||
} else {
|
||||
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
||||
}
|
||||
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10);
|
||||
contentStream.showText(string);
|
||||
contentStream.endText();
|
||||
@ -136,6 +139,7 @@ public class PdfDraw {
|
||||
private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) {
|
||||
|
||||
return Options.builder().stroke(true).strokeColor(switch (entry.type()) {
|
||||
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||
case HEADER, FOOTER -> Color.GREEN;
|
||||
case PARAGRAPH -> Color.BLUE;
|
||||
case HEADLINE -> Color.RED;
|
||||
Loading…
x
Reference in New Issue
Block a user