RED-6009: Document Tree Structure
*fixed dependecny issus
This commit is contained in:
parent
aac0259caf
commit
2ed617bb03
6
.gitignore
vendored
6
.gitignore
vendored
@ -4,6 +4,12 @@ target/
|
|||||||
!**/src/main/**/target/
|
!**/src/main/**/target/
|
||||||
!**/src/test/**/target/
|
!**/src/test/**/target/
|
||||||
|
|
||||||
|
### maven build ###
|
||||||
|
*.class
|
||||||
|
/out/
|
||||||
|
**/out/
|
||||||
|
**/target/
|
||||||
|
|
||||||
### STS ###
|
### STS ###
|
||||||
.apt_generated
|
.apt_generated
|
||||||
.classpath
|
.classpath
|
||||||
|
|||||||
@ -2,8 +2,8 @@
|
|||||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
|
||||||
|
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>com.knecon.fforesight</groupId>
|
<groupId>com.knecon.fforesight</groupId>
|
||||||
@ -12,9 +12,6 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>layoutparser-service-internal-api</artifactId>
|
<artifactId>layoutparser-service-internal-api</artifactId>
|
||||||
<version>1.0.0</version>
|
|
||||||
|
|
||||||
<packaging>pom</packaging>
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -29,21 +26,4 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.springframework.boot</groupId>
|
|
||||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<excludes>
|
|
||||||
<exclude>
|
|
||||||
<groupId>org.projectlombok</groupId>
|
|
||||||
<artifactId>lombok</artifactId>
|
|
||||||
</exclude>
|
|
||||||
</excludes>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@ -5,8 +5,6 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import javax.management.openmbean.InvalidKeyException;
|
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
|
||||||
|
|
||||||
import lombok.AccessLevel;
|
import lombok.AccessLevel;
|
||||||
@ -23,15 +21,15 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class TableOfContentsData {
|
public class TableOfContentsData {
|
||||||
|
|
||||||
List<EntryData> entries;
|
EntryData root;
|
||||||
|
|
||||||
|
|
||||||
public EntryData get(List<Integer> tocId) {
|
public EntryData get(List<Integer> tocId) {
|
||||||
|
|
||||||
if (tocId.size() < 1) {
|
if (tocId.isEmpty()) {
|
||||||
throw new InvalidKeyException(String.format("ClassificationSection Identifier: \"%s\" is not valid.", tocId));
|
return root;
|
||||||
}
|
}
|
||||||
EntryData entry = entries.get(tocId.get(0));
|
EntryData entry = root.subEntries.get(tocId.get(0));
|
||||||
for (int id : tocId.subList(1, tocId.size())) {
|
for (int id : tocId.subList(1, tocId.size())) {
|
||||||
entry = entry.subEntries().get(id);
|
entry = entry.subEntries().get(id);
|
||||||
}
|
}
|
||||||
@ -41,7 +39,7 @@ public class TableOfContentsData {
|
|||||||
|
|
||||||
public Stream<EntryData> streamAllEntries() {
|
public Stream<EntryData> streamAllEntries() {
|
||||||
|
|
||||||
return entries.stream().flatMap(TableOfContentsData::flatten);
|
return Stream.concat(Stream.of(root), root.subEntries.stream()).flatMap(TableOfContentsData::flatten);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import java.util.stream.Collectors;
|
|||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SectionNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SectionNode;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SemanticNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.SemanticNode;
|
||||||
@ -20,11 +21,13 @@ import lombok.AccessLevel;
|
|||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Builder;
|
import lombok.Builder;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.experimental.FieldDefaults;
|
import lombok.experimental.FieldDefaults;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@Builder
|
@Builder
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class DocumentGraph implements SemanticNode {
|
public class DocumentGraph implements SemanticNode {
|
||||||
|
|
||||||
@ -42,7 +45,7 @@ public class DocumentGraph implements SemanticNode {
|
|||||||
|
|
||||||
public List<SectionNode> getMainSections() {
|
public List<SectionNode> getMainSections() {
|
||||||
|
|
||||||
return tableOfContents.entries.stream().filter(entry -> entry.node() instanceof SectionNode).map(entry -> (SectionNode) entry.node()).collect(Collectors.toList());
|
return streamChildren().filter(node -> node instanceof SectionNode).map(node -> (SectionNode) node).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -74,14 +77,14 @@ public class DocumentGraph implements SemanticNode {
|
|||||||
|
|
||||||
private Stream<SemanticNode> streamAllNodes() {
|
private Stream<SemanticNode> streamAllNodes() {
|
||||||
|
|
||||||
return tableOfContents.streamEntriesInOrder().map(TableOfContents.Entry::node);
|
return tableOfContents.streamAllEntriesInOrder().map(TableOfContents.Entry::node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return tableOfContents.toString();
|
return NodeType.DOCUMENT + ": " + buildTextBlock().buildSummary();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.internal.api.graph;
|
package com.knecon.fforesight.service.layoutparser.internal.api.graph;
|
||||||
|
|
||||||
|
import static java.lang.String.format;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import com.google.common.hash.Hashing;
|
import com.google.common.hash.Hashing;
|
||||||
@ -19,22 +20,22 @@ import lombok.Data;
|
|||||||
@Data
|
@Data
|
||||||
public class TableOfContents {
|
public class TableOfContents {
|
||||||
|
|
||||||
List<Entry> entries;
|
private final Entry root;
|
||||||
|
|
||||||
|
|
||||||
public TableOfContents() {
|
public TableOfContents(DocumentGraph documentGraph) {
|
||||||
|
|
||||||
entries = new LinkedList<>();
|
root = Entry.builder().tocId(Collections.emptyList()).type(NodeType.DOCUMENT).children(new LinkedList<>()).node(documentGraph).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public TextBlock buildTextBlock() {
|
public TextBlock buildTextBlock() {
|
||||||
|
|
||||||
return streamEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
|
return streamAllEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<Integer> createNewEntryAndReturnId(NodeType nodeType, SemanticNode node) {
|
public List<Integer> createNewMainEntryAndReturnId(NodeType nodeType, SemanticNode node) {
|
||||||
|
|
||||||
return createNewChildEntryAndReturnId(Collections.emptyList(), nodeType, node);
|
return createNewChildEntryAndReturnId(Collections.emptyList(), nodeType, node);
|
||||||
}
|
}
|
||||||
@ -42,27 +43,25 @@ public class TableOfContents {
|
|||||||
|
|
||||||
public List<Integer> createNewChildEntryAndReturnId(List<Integer> parentId, NodeType nodeType, SemanticNode node) {
|
public List<Integer> createNewChildEntryAndReturnId(List<Integer> parentId, NodeType nodeType, SemanticNode node) {
|
||||||
|
|
||||||
List<Integer> newId;
|
if (!entryExists(parentId)) {
|
||||||
if (entryExists(parentId)) {
|
throw new UnsupportedOperationException(format("parentId %s does not exist!", parentId));
|
||||||
Entry parent = getEntryById(parentId);
|
|
||||||
newId = new LinkedList<>(parentId);
|
|
||||||
newId.add(parent.children().size());
|
|
||||||
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
|
||||||
} else {
|
|
||||||
newId = List.of(entries.size());
|
|
||||||
entries.add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Entry parent = getEntryById(parentId);
|
||||||
|
List<Integer> newId = new LinkedList<>(parentId);
|
||||||
|
newId.add(parent.children().size());
|
||||||
|
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
|
||||||
|
|
||||||
return newId;
|
return newId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private boolean entryExists(List<Integer> tocId) {
|
private boolean entryExists(List<Integer> tocId) {
|
||||||
|
|
||||||
if (tocId.size() < 1) {
|
if (tocId.isEmpty()) {
|
||||||
return false;
|
return root != null;
|
||||||
}
|
}
|
||||||
Entry entry = entries.get(tocId.get(0));
|
Entry entry = root.children.get(tocId.get(0));
|
||||||
for (int id : tocId.subList(1, tocId.size())) {
|
for (int id : tocId.subList(1, tocId.size())) {
|
||||||
if (id >= entry.children.size() || 0 > id) {
|
if (id >= entry.children.size() || 0 > id) {
|
||||||
return false;
|
return false;
|
||||||
@ -75,22 +74,17 @@ public class TableOfContents {
|
|||||||
|
|
||||||
public Entry getParentEntryById(List<Integer> tocId) {
|
public Entry getParentEntryById(List<Integer> tocId) {
|
||||||
|
|
||||||
List<Integer> parentIds = getParentId(tocId);
|
return getEntryById(getParentId(tocId));
|
||||||
if (parentIds.size() < 1) {
|
|
||||||
throw new NoSuchElementException(String.format("Node with tocId \"%s\" has no parent!", tocId));
|
|
||||||
}
|
|
||||||
return getEntryById(parentIds);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean hasParentById(List<Integer> tocId) {
|
public boolean hasParentById(List<Integer> tocId) {
|
||||||
|
|
||||||
List<Integer> parentId = getParentId(tocId);
|
return entryExists(getParentId(tocId));
|
||||||
return entryExists(parentId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Stream<SemanticNode> streamChildren(List<Integer> tocId) {
|
public Stream<SemanticNode> streamChildrenNodes(List<Integer> tocId) {
|
||||||
|
|
||||||
return getEntryById(tocId).children().stream().map(Entry::node);
|
return getEntryById(tocId).children().stream().map(Entry::node);
|
||||||
}
|
}
|
||||||
@ -98,13 +92,22 @@ public class TableOfContents {
|
|||||||
|
|
||||||
private static List<Integer> getParentId(List<Integer> tocId) {
|
private static List<Integer> getParentId(List<Integer> tocId) {
|
||||||
|
|
||||||
|
if (tocId.isEmpty()) {
|
||||||
|
throw new UnsupportedOperationException("Root has no parent!");
|
||||||
|
}
|
||||||
|
if (tocId.size() < 2) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
return tocId.subList(0, tocId.size() - 1);
|
return tocId.subList(0, tocId.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Entry getEntryById(List<Integer> tocId) {
|
public Entry getEntryById(List<Integer> tocId) {
|
||||||
|
|
||||||
Entry entry = entries.get(tocId.get(0));
|
if (tocId.isEmpty()) {
|
||||||
|
return root;
|
||||||
|
}
|
||||||
|
Entry entry = root.children.get(tocId.get(0));
|
||||||
for (int id : tocId.subList(1, tocId.size())) {
|
for (int id : tocId.subList(1, tocId.size())) {
|
||||||
entry = entry.children().get(id);
|
entry = entry.children().get(id);
|
||||||
}
|
}
|
||||||
@ -112,13 +115,19 @@ public class TableOfContents {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Stream<Entry> streamEntriesInOrder() {
|
public Stream<Entry> streamMainEntries() {
|
||||||
|
|
||||||
return entries.stream().flatMap(TableOfContents::flatten);
|
return root.children.stream();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Stream<Entry> streamSubEntriesInOrder(List<Integer> parentId) {
|
public Stream<Entry> streamAllEntriesInOrder() {
|
||||||
|
|
||||||
|
return Stream.of(root).flatMap(TableOfContents::flatten);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Stream<Entry> streamAllSubEntriesInOrder(List<Integer> parentId) {
|
||||||
|
|
||||||
return Stream.of(getEntryById(parentId)).flatMap(TableOfContents::flatten);
|
return Stream.of(getEntryById(parentId)).flatMap(TableOfContents::flatten);
|
||||||
}
|
}
|
||||||
@ -127,13 +136,13 @@ public class TableOfContents {
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
|
||||||
return String.join("\n", streamEntriesInOrder().map(Entry::toString).toList());
|
return String.join("\n", streamAllEntriesInOrder().map(Entry::toString).toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String toString(List<Integer> id) {
|
public String toString(List<Integer> id) {
|
||||||
|
|
||||||
return String.join("\n", streamSubEntriesInOrder(id).map(Entry::toString).toList());
|
return String.join("\n", streamAllSubEntriesInOrder(id).map(Entry::toString).toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes;
|
package com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes;
|
||||||
|
|
||||||
public enum NodeType {
|
public enum NodeType {
|
||||||
|
DOCUMENT,
|
||||||
SECTION,
|
SECTION,
|
||||||
HEADLINE,
|
HEADLINE,
|
||||||
PARAGRAPH,
|
PARAGRAPH,
|
||||||
|
|||||||
@ -205,7 +205,7 @@ public interface SemanticNode {
|
|||||||
*/
|
*/
|
||||||
default Stream<SemanticNode> streamChildren() {
|
default Stream<SemanticNode> streamChildren() {
|
||||||
|
|
||||||
return getTableOfContents().streamChildren(getTocId());
|
return getTableOfContents().streamChildrenNodes(getTocId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -216,7 +216,7 @@ public interface SemanticNode {
|
|||||||
*/
|
*/
|
||||||
default Stream<SemanticNode> streamAllSubNodes() {
|
default Stream<SemanticNode> streamAllSubNodes() {
|
||||||
|
|
||||||
return getTableOfContents().streamSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
|
return getTableOfContents().streamAllSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -48,7 +48,7 @@ public class DocumentDataMapper {
|
|||||||
|
|
||||||
private TableOfContentsData toTableOfContentsData(TableOfContents tableOfContents) {
|
private TableOfContentsData toTableOfContentsData(TableOfContents tableOfContents) {
|
||||||
|
|
||||||
return new TableOfContentsData(tableOfContents.getEntries().stream().map(this::toEntryData).toList());
|
return new TableOfContentsData(toEntryData(tableOfContents.getRoot()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -38,8 +38,10 @@ public class DocumentGraphMapper {
|
|||||||
|
|
||||||
public DocumentGraph toDocumentGraph(DocumentData documentData) {
|
public DocumentGraph toDocumentGraph(DocumentData documentData) {
|
||||||
|
|
||||||
|
|
||||||
|
DocumentGraph documentGraph = new DocumentGraph();
|
||||||
Context context = new Context(documentData,
|
Context context = new Context(documentData,
|
||||||
new TableOfContents(),
|
new TableOfContents(documentGraph),
|
||||||
new LinkedList<>(),
|
new LinkedList<>(),
|
||||||
new LinkedList<>(),
|
new LinkedList<>(),
|
||||||
Arrays.stream(documentData.getAtomicTextBlocks()).toList(),
|
Arrays.stream(documentData.getAtomicTextBlocks()).toList(),
|
||||||
@ -47,13 +49,12 @@ public class DocumentGraphMapper {
|
|||||||
|
|
||||||
context.pages.addAll(Arrays.stream(documentData.getPages()).map(this::buildPage).toList());
|
context.pages.addAll(Arrays.stream(documentData.getPages()).map(this::buildPage).toList());
|
||||||
|
|
||||||
context.tableOfContents.setEntries(buildEntries(documentData.getTableOfContents().getEntries(), context));
|
context.tableOfContents.getRoot().children().addAll(buildEntries(documentData.getTableOfContents().getRoot().subEntries(), context));
|
||||||
|
|
||||||
|
documentGraph.setTableOfContents(context.tableOfContents);
|
||||||
|
documentGraph.setPages(new HashSet<>(context.pages));
|
||||||
|
documentGraph.setNumberOfPages(documentData.getPages().length);
|
||||||
|
|
||||||
DocumentGraph documentGraph = DocumentGraph.builder()
|
|
||||||
.numberOfPages(documentData.getPages().length)
|
|
||||||
.pages(new HashSet<>(context.pages))
|
|
||||||
.tableOfContents(context.tableOfContents)
|
|
||||||
.build();
|
|
||||||
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
||||||
return documentGraph;
|
return documentGraph;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.internal.api.services;
|
package com.knecon.fforesight.service.layoutparser.internal.api.services;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
@ -21,9 +22,7 @@ public class EntityInsertionService {
|
|||||||
public void addEntityToGraph(EntityNode entity, TableOfContents tableOfContents) {
|
public void addEntityToGraph(EntityNode entity, TableOfContents tableOfContents) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SemanticNode containingNode = tableOfContents.getEntries()
|
SemanticNode containingNode = tableOfContents.streamChildrenNodes(Collections.emptyList())
|
||||||
.stream()
|
|
||||||
.map(TableOfContents.Entry::node)
|
|
||||||
.filter(node -> node.buildTextBlock().containsBoundary(entity.getBoundary()))
|
.filter(node -> node.buildTextBlock().containsBoundary(entity.getBoundary()))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
|
.orElseThrow(() -> new NoSuchElementException("No containing Node found!"));
|
||||||
@ -37,7 +36,6 @@ public class EntityInsertionService {
|
|||||||
addToNodeEntitySets(entity);
|
addToNodeEntitySets(entity);
|
||||||
|
|
||||||
} catch (NoSuchElementException e) {
|
} catch (NoSuchElementException e) {
|
||||||
entityEnrichmentService.enrichEntity(entity, tableOfContents.buildTextBlock());
|
|
||||||
entity.removeFromGraph();
|
entity.removeFromGraph();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,94 +10,78 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>layoutparser-service-processor</artifactId>
|
<artifactId>layoutparser-service-processor</artifactId>
|
||||||
<version>1.0.0</version>
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.iqser.red.service</groupId>
|
<groupId>com.iqser.red.service</groupId>
|
||||||
<artifactId>persistence-service-internal-api-v1</artifactId>
|
<artifactId>persistence-service-internal-api-v1</artifactId>
|
||||||
<version>2.36.0</version>
|
<version>2.36.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.knecon.fforesight</groupId>
|
<groupId>com.knecon.fforesight</groupId>
|
||||||
<artifactId>layoutparser-service-internal-api</artifactId>
|
<artifactId>layoutparser-service-internal-api</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.iqser.red.commons</groupId>
|
|
||||||
<artifactId>spring-commons</artifactId>
|
|
||||||
<version>6.2.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.iqser.red.commons</groupId>
|
<groupId>com.iqser.red.commons</groupId>
|
||||||
<artifactId>storage-commons</artifactId>
|
<artifactId>storage-commons</artifactId>
|
||||||
<version>1.13.0</version>
|
<version>1.13.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.iqser.red.commons</groupId>
|
||||||
|
<artifactId>spring-commons</artifactId>
|
||||||
|
<version>6.2.0</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.dslplatform</groupId>
|
<groupId>com.dslplatform</groupId>
|
||||||
<artifactId>dsl-json-java8</artifactId>
|
<artifactId>dsl-json-java8</artifactId>
|
||||||
<version>1.10.0</version>
|
<version>1.10.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.pdfbox</groupId>
|
<groupId>org.apache.pdfbox</groupId>
|
||||||
<artifactId>pdfbox</artifactId>
|
<artifactId>pdfbox</artifactId>
|
||||||
<version>${pdfbox.version}</version>
|
<version>${pdfbox.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.pdfbox</groupId>
|
<groupId>org.apache.pdfbox</groupId>
|
||||||
<artifactId>pdfbox-tools</artifactId>
|
<artifactId>pdfbox-tools</artifactId>
|
||||||
<version>${pdfbox.version}</version>
|
<version>${pdfbox.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>31.1-jre</version>
|
<version>31.1-jre</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
<artifactId>jackson-module-afterburner</artifactId>
|
<artifactId>jackson-module-afterburner</artifactId>
|
||||||
<version>${jackson.version}</version>
|
<version>${jackson.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||||
<artifactId>jackson-datatype-jsr310</artifactId>
|
<artifactId>jackson-datatype-jsr310</artifactId>
|
||||||
<version>${jackson.version}</version>
|
<version>${jackson.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-security</artifactId>
|
<artifactId>spring-boot-starter-security</artifactId>
|
||||||
<version>${spring.version}</version>
|
<version>${spring.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-web</artifactId>
|
<artifactId>spring-boot-starter-web</artifactId>
|
||||||
<version>${spring.version}</version>
|
<version>${spring.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.projectlombok</groupId>
|
<groupId>org.projectlombok</groupId>
|
||||||
<artifactId>lombok</artifactId>
|
<artifactId>lombok</artifactId>
|
||||||
<optional>true</optional>
|
<optional>true</optional>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.cloud</groupId>
|
<groupId>org.springframework.cloud</groupId>
|
||||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||||
<version>4.0.2</version>
|
<version>4.0.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-amqp</artifactId>
|
<artifactId>spring-boot-starter-amqp</artifactId>
|
||||||
@ -105,22 +89,6 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>org.springframework.boot</groupId>
|
|
||||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
|
||||||
<configuration>
|
|
||||||
<excludes>
|
|
||||||
<exclude>
|
|
||||||
<groupId>org.projectlombok</groupId>
|
|
||||||
<artifactId>lombok</artifactId>
|
|
||||||
</exclude>
|
|
||||||
</excludes>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
<id>spring-milestones</id>
|
<id>spring-milestones</id>
|
||||||
|
|||||||
@ -52,15 +52,17 @@ public class DocumentGraphFactory {
|
|||||||
public DocumentGraph buildDocumentGraph(ClassificationDocument document) {
|
public DocumentGraph buildDocumentGraph(ClassificationDocument document) {
|
||||||
|
|
||||||
TextBlockFactory textBlockFactory = new TextBlockFactory();
|
TextBlockFactory textBlockFactory = new TextBlockFactory();
|
||||||
Context context = new Context(new TableOfContents(), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory);
|
DocumentGraph documentGraph = new DocumentGraph();
|
||||||
|
Context context = new Context(new TableOfContents(documentGraph), new HashMap<>(), new LinkedList<>(), new LinkedList<>(), textBlockFactory);
|
||||||
|
|
||||||
document.getPages().stream().map(this::buildPage).forEach(page -> context.pages().put(page, new AtomicInteger(1)));
|
document.getPages().stream().map(this::buildPage).forEach(page -> context.pages().put(page, new AtomicInteger(1)));
|
||||||
document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.images().add(image));
|
document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.images().add(image));
|
||||||
addSections(document, context);
|
addSections(document, context);
|
||||||
addHeaderAndFooterToEachPage(document, context);
|
addHeaderAndFooterToEachPage(document, context);
|
||||||
|
|
||||||
DocumentGraph documentGraph = DocumentGraph.builder().numberOfPages(context.pages.size()).pages(context.pages.keySet()).tableOfContents(context.tableOfContents).build();
|
documentGraph.setNumberOfPages(context.pages.size());
|
||||||
|
documentGraph.setPages(context.pages.keySet());
|
||||||
|
documentGraph.setTableOfContents(context.tableOfContents);
|
||||||
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
documentGraph.setTextBlock(documentGraph.buildTextBlock());
|
||||||
return documentGraph;
|
return documentGraph;
|
||||||
}
|
}
|
||||||
@ -82,7 +84,7 @@ public class DocumentGraphFactory {
|
|||||||
|
|
||||||
List<Integer> tocId;
|
List<Integer> tocId;
|
||||||
if (parentNode == null) {
|
if (parentNode == null) {
|
||||||
tocId = context.tableOfContents.createNewEntryAndReturnId(NodeType.SECTION, sectionNode);
|
tocId = context.tableOfContents.createNewMainEntryAndReturnId(NodeType.SECTION, sectionNode);
|
||||||
} else {
|
} else {
|
||||||
tocId = context.tableOfContents.createNewChildEntryAndReturnId(parentNode.getTocId(), NodeType.SECTION, sectionNode);
|
tocId = context.tableOfContents.createNewChildEntryAndReturnId(parentNode.getTocId(), NodeType.SECTION, sectionNode);
|
||||||
}
|
}
|
||||||
@ -309,7 +311,7 @@ public class DocumentGraphFactory {
|
|||||||
footer,
|
footer,
|
||||||
context,
|
context,
|
||||||
page);
|
page);
|
||||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer);
|
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
|
||||||
footer.setTocId(tocId);
|
footer.setTocId(tocId);
|
||||||
footer.setTerminalTextBlock(textBlock);
|
footer.setTerminalTextBlock(textBlock);
|
||||||
page.setFooter(footer);
|
page.setFooter(footer);
|
||||||
@ -325,7 +327,7 @@ public class DocumentGraphFactory {
|
|||||||
context,
|
context,
|
||||||
0,
|
0,
|
||||||
page);
|
page);
|
||||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header);
|
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
|
||||||
header.setTocId(tocId);
|
header.setTocId(tocId);
|
||||||
header.setTerminalTextBlock(textBlock);
|
header.setTerminalTextBlock(textBlock);
|
||||||
page.setHeader(header);
|
page.setHeader(header);
|
||||||
@ -337,7 +339,7 @@ public class DocumentGraphFactory {
|
|||||||
PageNode page = getPage(pageIndex, context);
|
PageNode page = getPage(pageIndex, context);
|
||||||
FooterNode footer = FooterNode.builder().tableOfContents(context.tableOfContents()).build();
|
FooterNode footer = FooterNode.builder().tableOfContents(context.tableOfContents()).build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
||||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.FOOTER, footer);
|
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.FOOTER, footer);
|
||||||
footer.setTocId(tocId);
|
footer.setTocId(tocId);
|
||||||
footer.setTerminalTextBlock(textBlock);
|
footer.setTerminalTextBlock(textBlock);
|
||||||
page.setFooter(footer);
|
page.setFooter(footer);
|
||||||
@ -349,7 +351,7 @@ public class DocumentGraphFactory {
|
|||||||
PageNode page = getPage(pageIndex, context);
|
PageNode page = getPage(pageIndex, context);
|
||||||
HeaderNode header = HeaderNode.builder().tableOfContents(context.tableOfContents()).build();
|
HeaderNode header = HeaderNode.builder().tableOfContents(context.tableOfContents()).build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
||||||
List<Integer> tocId = context.tableOfContents().createNewEntryAndReturnId(NodeType.HEADER, header);
|
List<Integer> tocId = context.tableOfContents().createNewMainEntryAndReturnId(NodeType.HEADER, header);
|
||||||
header.setTocId(tocId);
|
header.setTocId(tocId);
|
||||||
header.setTerminalTextBlock(textBlock);
|
header.setTerminalTextBlock(textBlock);
|
||||||
page.setHeader(header);
|
page.setHeader(header);
|
||||||
|
|||||||
@ -10,7 +10,6 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>layoutparser-service-server</artifactId>
|
<artifactId>layoutparser-service-server</artifactId>
|
||||||
<version>1.0.0</version>
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -18,7 +17,11 @@
|
|||||||
<artifactId>layoutparser-service-processor</artifactId>
|
<artifactId>layoutparser-service-processor</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.iqser.red.commons</groupId>
|
||||||
|
<artifactId>spring-commons</artifactId>
|
||||||
|
<version>6.2.0</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>javax.servlet</groupId>
|
<groupId>javax.servlet</groupId>
|
||||||
<artifactId>javax.servlet-api</artifactId>
|
<artifactId>javax.servlet-api</artifactId>
|
||||||
@ -29,13 +32,11 @@
|
|||||||
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
<artifactId>spring-cloud-starter-openfeign</artifactId>
|
||||||
<version>4.0.2</version>
|
<version>4.0.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-amqp</artifactId>
|
<artifactId>spring-boot-starter-amqp</artifactId>
|
||||||
<version>${spring.version}</version>
|
<version>${spring.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.junit.jupiter</groupId>
|
<groupId>org.junit.jupiter</groupId>
|
||||||
<artifactId>junit-jupiter-api</artifactId>
|
<artifactId>junit-jupiter-api</artifactId>
|
||||||
@ -54,5 +55,28 @@
|
|||||||
<version>6.0.3</version>
|
<version>6.0.3</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-core</artifactId>
|
||||||
|
<version>5.3.0</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.springframework.boot</groupId>
|
||||||
|
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<excludes>
|
||||||
|
<exclude>
|
||||||
|
<groupId>org.projectlombok</groupId>
|
||||||
|
<artifactId>lombok</artifactId>
|
||||||
|
</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@ -1,7 +0,0 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
class ApplicationTest {
|
|
||||||
|
|
||||||
}
|
|
||||||
@ -2,7 +2,9 @@ package com.knecon.fforesight.service.layoutparser.server;
|
|||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class LayoutParserApplicationTests extends BaseTest {
|
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
|
||||||
|
|
||||||
|
class ApplicationTests extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void contextLoads() {
|
void contextLoads() {
|
||||||
@ -0,0 +1,51 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import org.apache.pdfbox.Loader;
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.server.utils.BaseTest;
|
||||||
|
|
||||||
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
|
public class BuildDocumentGraphTest extends BaseTest {
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
private LayoutParsingService layoutParsingService;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void buildMetolachlor() {
|
||||||
|
|
||||||
|
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||||
|
assertEquals(221, documentGraph.getPages().size());
|
||||||
|
assertEquals(220 , documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
|
||||||
|
assertEquals(0 , documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
protected DocumentGraph buildGraph(String filename) {
|
||||||
|
|
||||||
|
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
||||||
|
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
||||||
|
} else {
|
||||||
|
prepareStorage(filename + ".pdf");
|
||||||
|
}
|
||||||
|
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
||||||
|
|
||||||
|
try (InputStream inputStream = fileResource.getInputStream()) {
|
||||||
|
PDDocument pdDocument = Loader.loadPDF(inputStream);
|
||||||
|
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -4,14 +4,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
|||||||
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||||
import static org.wildfly.common.Assert.assertTrue;
|
import static org.wildfly.common.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.pdfbox.Loader;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||||
@ -26,21 +22,13 @@ import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.Table
|
|||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.TableNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.TableNode;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
|
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingService;
|
import com.knecon.fforesight.service.layoutparser.server.utils.TestEntity;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.server.TestEntity;
|
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
|
||||||
|
|
||||||
public class DocumentGraphTest extends BaseTest {
|
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private EntityInsertionService entityInsertionService;
|
private EntityInsertionService entityInsertionService;
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private LayoutParsingService layoutParsingService;
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
|
public void assertTextBeforeAndTextAfterForParagraphCrafted() {
|
||||||
@ -51,7 +39,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
|
|
||||||
assertEquals("Expand to Hint ", entityNode.getTextBefore());
|
assertEquals("Expand to Hint ", entityNode.getTextBefore());
|
||||||
@ -76,7 +64,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
|
|
||||||
assertEquals("", entityNode.getTextBefore());
|
assertEquals("", entityNode.getTextBefore());
|
||||||
@ -132,7 +120,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
|
|
||||||
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
DocumentGraph documentGraph = buildGraph("files/crafted document");
|
||||||
TableNode table = (TableNode) documentGraph.getTableOfContents()//
|
TableNode table = (TableNode) documentGraph.getTableOfContents()//
|
||||||
.streamEntriesInOrder()//
|
.streamAllEntriesInOrder()//
|
||||||
.filter(entry -> entry.type().equals(NodeType.TABLE))//
|
.filter(entry -> entry.type().equals(NodeType.TABLE))//
|
||||||
.map(TableOfContents.Entry::node)//
|
.map(TableOfContents.Entry::node)//
|
||||||
.findFirst().orElseThrow();
|
.findFirst().orElseThrow();
|
||||||
@ -162,7 +150,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
|
|
||||||
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
|
||||||
TableNode table = (TableNode) documentGraph.getTableOfContents()
|
TableNode table = (TableNode) documentGraph.getTableOfContents()
|
||||||
.streamEntriesInOrder()
|
.streamAllEntriesInOrder()
|
||||||
.filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22))
|
.filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22))
|
||||||
.filter(entry -> entry.type().equals(NodeType.TABLE))
|
.filter(entry -> entry.type().equals(NodeType.TABLE))
|
||||||
.map(TableOfContents.Entry::node)
|
.map(TableOfContents.Entry::node)
|
||||||
@ -187,7 +175,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
|
|
||||||
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
|
assertEquals("except Cranberry; Vegetable, ", entityNode.getTextBefore());
|
||||||
@ -214,7 +202,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
|
|
||||||
assertEquals("2.6.1 Summary of ", entityNode.getTextBefore());
|
assertEquals("2.6.1 Summary of ", entityNode.getTextBefore());
|
||||||
@ -240,7 +228,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
|
|
||||||
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
|
assertEquals("2-[(2-(1-hydroxy-ethyl)-6methyl-phenyl-amino]propan-1-ol (", entityNode.getTextBefore());
|
||||||
@ -258,23 +246,6 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
|
||||||
protected DocumentGraph buildGraph(String filename) {
|
|
||||||
|
|
||||||
if (filename.equals("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06")) {
|
|
||||||
prepareStorage(filename + ".pdf", "cv_table_parsing_response/empty.json", "image_service_response/S-Metolachlor_RAR_01_Volume_1_2018-09-06.IMAGE_INFO.json");
|
|
||||||
} else {
|
|
||||||
prepareStorage(filename + ".pdf");
|
|
||||||
}
|
|
||||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
|
||||||
|
|
||||||
try (InputStream inputStream = fileResource.getInputStream()) {
|
|
||||||
PDDocument pdDocument = Loader.loadPDF(inputStream);
|
|
||||||
return layoutParsingService.parseLayout(pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
|
private static void assertSameOffsetInAllIntersectingNodes(String searchTerm, int start, EntityNode entityNode) {
|
||||||
|
|
||||||
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
|
List<Integer> paragraphStart = entityNode.getIntersectingNodes().stream()//
|
||||||
@ -293,7 +264,7 @@ public class DocumentGraphTest extends BaseTest {
|
|||||||
assert start != -1;
|
assert start != -1;
|
||||||
|
|
||||||
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
Boundary boundary = new Boundary(start, start + searchTerm.length());
|
||||||
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
TestEntity entityNode = TestEntity.initialEntityNode(boundary, "123", "123");
|
||||||
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
entityInsertionService.addEntityToGraph(entityNode, documentGraph.getTableOfContents());
|
||||||
PageNode pageNode = documentGraph.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
|
PageNode pageNode = documentGraph.getPages().stream().filter(page -> page.getNumber() == pageNumber).findFirst().orElseThrow();
|
||||||
|
|
||||||
@ -1,31 +1,18 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.apache.pdfbox.Loader;
|
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.DocumentData;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.classification.service.PdfParsingService;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.factory.DocumentGraphFactory;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
||||||
import com.knecon.fforesight.service.layoutparser.server.BaseTest;
|
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
public class DocumentGraphMappingTest extends BaseTest {
|
public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private DocumentGraphFactory documentGraphFactory;
|
|
||||||
|
|
||||||
@Autowired
|
|
||||||
private PdfParsingService pdfParsingService;
|
|
||||||
|
|
||||||
@Autowired
|
@Autowired
|
||||||
private DocumentDataMapper documentDataMapper;
|
private DocumentDataMapper documentDataMapper;
|
||||||
@ -38,17 +25,10 @@ public class DocumentGraphMappingTest extends BaseTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testGraphMapping() {
|
public void testGraphMapping() {
|
||||||
|
|
||||||
String filename = "files/crafted document";
|
DocumentGraph document = buildGraph("files/crafted document");
|
||||||
|
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
|
||||||
|
|
||||||
prepareStorage(filename + ".pdf");
|
|
||||||
ClassPathResource fileResource = new ClassPathResource(filename + ".pdf");
|
|
||||||
LayoutParsingRequest layoutParsingRequest = prepareStorage(fileResource.getInputStream());
|
|
||||||
PDDocument pdDocument = Loader.loadPDF(fileResource.getInputStream());
|
|
||||||
|
|
||||||
var classifiedDoc = pdfParsingService.parseDocument(pdDocument, Collections.emptyMap(), Collections.emptyMap());
|
|
||||||
DocumentGraph document = documentGraphFactory.buildDocumentGraph(classifiedDoc);
|
|
||||||
DocumentData documentData = documentDataMapper.toDocumentData(document);
|
DocumentData documentData = documentDataMapper.toDocumentData(document);
|
||||||
|
|
||||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
|
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
|
||||||
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
|
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
|
||||||
DocumentGraph newDocumentGraph = documentGraphMapper.toDocumentGraph(documentData2);
|
DocumentGraph newDocumentGraph = documentGraphMapper.toDocumentGraph(documentData2);
|
||||||
|
|||||||
@ -12,37 +12,50 @@ import org.springframework.core.io.ClassPathResource;
|
|||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.DocumentGraph;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.server.visualizations.PdfDraw;
|
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
|
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
||||||
@Disabled
|
|
||||||
public class DocumentGraphVisualizationTest extends DocumentGraphTest {
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
|
@Disabled
|
||||||
public void visualizeMetolachlor() {
|
public void visualizeMetolachlor() {
|
||||||
|
|
||||||
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06";
|
String filename = "files/S-Metolachlor_RAR_01_Volume_1_2018-09-06";
|
||||||
|
visualizePdf(filename);
|
||||||
DocumentGraph documentGraph = buildGraph(filename);
|
|
||||||
TextBlock textBlock = documentGraph.buildTextBlock();
|
|
||||||
|
|
||||||
visualizeSemanticNodes(filename, documentGraph, textBlock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
|
@Disabled
|
||||||
public void visualizeRotatedTestDocument() {
|
public void visualizeRotatedTestDocument() {
|
||||||
|
|
||||||
String filename = "files/RotateTestFileWithImages";
|
String filename = "files/RotateTestFileWithImages";
|
||||||
|
visualizePdf(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@SneakyThrows
|
||||||
|
@Disabled
|
||||||
|
public void visualizeCraftedDocument() {
|
||||||
|
|
||||||
|
String filename = "files/crafted document";
|
||||||
|
visualizePdf(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@SneakyThrows
|
||||||
|
private void visualizePdf(String filename) {
|
||||||
|
|
||||||
DocumentGraph documentGraph = buildGraph(filename);
|
DocumentGraph documentGraph = buildGraph(filename);
|
||||||
TextBlock textBlock = documentGraph.buildTextBlock();
|
TextBlock textBlock = documentGraph.buildTextBlock();
|
||||||
|
|
||||||
visualizeSemanticNodes(filename, documentGraph, textBlock);
|
visualizeSemanticNodes(filename, documentGraph, textBlock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,10 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server;
|
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.extension.ExtendWith;
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@ -12,6 +13,7 @@ import org.springframework.boot.autoconfigure.amqp.RabbitAutoConfiguration;
|
|||||||
import org.springframework.boot.test.context.SpringBootTest;
|
import org.springframework.boot.test.context.SpringBootTest;
|
||||||
import org.springframework.boot.test.mock.mockito.MockBean;
|
import org.springframework.boot.test.mock.mockito.MockBean;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.ComponentScan;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.context.annotation.Import;
|
import org.springframework.context.annotation.Import;
|
||||||
import org.springframework.context.annotation.Primary;
|
import org.springframework.context.annotation.Primary;
|
||||||
@ -22,9 +24,12 @@ import com.iqser.red.storage.commons.service.StorageService;
|
|||||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentDataMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
import com.knecon.fforesight.service.layoutparser.internal.api.mapper.DocumentGraphMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityInsertionService;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingStorageService;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
|
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantContext;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
import com.knecon.fforesight.service.layoutparser.processor.queue.LayoutParsingRequest;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.server.Application;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
@ -39,6 +44,9 @@ public class BaseTest {
|
|||||||
@Autowired
|
@Autowired
|
||||||
protected StorageService storageService;
|
protected StorageService storageService;
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
protected TenantsClient tenantsClient;
|
||||||
|
|
||||||
@MockBean
|
@MockBean
|
||||||
private RabbitTemplate rabbitTemplate;
|
private RabbitTemplate rabbitTemplate;
|
||||||
|
|
||||||
@ -49,6 +57,35 @@ public class BaseTest {
|
|||||||
protected final static String TEXT_FILE_ID = "texts";
|
protected final static String TEXT_FILE_ID = "texts";
|
||||||
protected final static String POSITION_FILE_ID = "positions";
|
protected final static String POSITION_FILE_ID = "positions";
|
||||||
protected final static String PAGES_FILE_ID = "pages";
|
protected final static String PAGES_FILE_ID = "pages";
|
||||||
|
protected final static String TENANT_ID = "tenant";
|
||||||
|
|
||||||
|
|
||||||
|
protected LayoutParsingRequest buildStandardLayoutParsingRequest() {
|
||||||
|
|
||||||
|
return LayoutParsingRequest.builder()
|
||||||
|
.imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
|
||||||
|
.originFileStorageId(ORIGIN_FILE_ID)
|
||||||
|
.tablesFileStorageId(Optional.of(TABLE_FILE_ID))
|
||||||
|
.pageFileStorageId(PAGES_FILE_ID)
|
||||||
|
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||||
|
.structureFileStorageId(STRUCTURE_FILE_ID)
|
||||||
|
.textBlockFileStorageId(TEXT_FILE_ID)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setupTenantContext() {
|
||||||
|
|
||||||
|
TenantContext.setTenantId(TENANT_ID);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@AfterEach
|
||||||
|
public void clearTenantContext() {
|
||||||
|
|
||||||
|
TenantContext.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@ -115,6 +152,7 @@ public class BaseTest {
|
|||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
@EnableAutoConfiguration(exclude = RabbitAutoConfiguration.class)
|
@EnableAutoConfiguration(exclude = RabbitAutoConfiguration.class)
|
||||||
|
@ComponentScan("com.knecon.fforesight.service.layoutparser")
|
||||||
public static class TestConfiguration {
|
public static class TestConfiguration {
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
@ -125,13 +163,6 @@ public class BaseTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Bean
|
|
||||||
public EntityEnrichmentService testEntityEnrichmentService() {
|
|
||||||
|
|
||||||
return new TestEntityEnrichmentService();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public DocumentDataMapper documentDataMapper() {
|
public DocumentDataMapper documentDataMapper() {
|
||||||
|
|
||||||
@ -145,6 +176,14 @@ public class BaseTest {
|
|||||||
return new DocumentGraphMapper();
|
return new DocumentGraphMapper();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Autowired
|
||||||
|
public EntityInsertionService entityInsertionService(EntityEnrichmentService entityEnrichmentService) {
|
||||||
|
|
||||||
|
return new EntityInsertionService(entityEnrichmentService);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1,4 +1,4 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server;
|
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||||
|
|
||||||
import static java.io.File.createTempFile;
|
import static java.io.File.createTempFile;
|
||||||
|
|
||||||
@ -14,6 +14,7 @@ import java.util.stream.Collectors;
|
|||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.springframework.core.io.InputStreamResource;
|
import org.springframework.core.io.InputStreamResource;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||||
@ -22,6 +23,7 @@ import com.iqser.red.storage.commons.service.StorageService;
|
|||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
|
||||||
|
@Service
|
||||||
public class FileSystemBackedStorageService implements StorageService {
|
public class FileSystemBackedStorageService implements StorageService {
|
||||||
|
|
||||||
private final Map<String, File> dataMap = new HashMap<>();
|
private final Map<String, File> dataMap = new HashMap<>();
|
||||||
@ -1,4 +1,4 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server;
|
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||||
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
@ -1,15 +1,18 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server;
|
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
import com.knecon.fforesight.service.layoutparser.internal.api.services.EntityEnrichmentService;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class TestEntityEnrichmentService implements EntityEnrichmentService {
|
public class TestEntityEnrichmentService implements EntityEnrichmentService {
|
||||||
|
|
||||||
@ -0,0 +1,42 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.common.JSONPrimitive;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantRequest;
|
||||||
|
import com.iqser.red.service.persistence.service.v1.api.shared.model.multitenancy.TenantResponse;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class TestTenantsClient implements TenantsClient {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void createTenant(TenantRequest tenantRequest) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<TenantResponse> getTenants() {
|
||||||
|
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TenantResponse getTenant(String tenantId) {
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONPrimitive<String> getDeploymentKey(String tenantId) {
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@ -1,4 +1,4 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.server.visualizations;
|
package com.knecon.fforesight.service.layoutparser.server.utils.visualizations;
|
||||||
|
|
||||||
import java.awt.Color;
|
import java.awt.Color;
|
||||||
import java.awt.geom.Point2D;
|
import java.awt.geom.Point2D;
|
||||||
@ -36,7 +36,7 @@ public class PdfDraw {
|
|||||||
|
|
||||||
public static void drawDocumentGraph(PDDocument document, DocumentGraph documentGraph) {
|
public static void drawDocumentGraph(PDDocument document, DocumentGraph documentGraph) {
|
||||||
|
|
||||||
documentGraph.getTableOfContents().streamEntriesInOrder().forEach(entry -> drawNode(document, entry));
|
documentGraph.getTableOfContents().streamAllEntriesInOrder().forEach(entry -> drawNode(document, entry));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -72,8 +72,11 @@ public class PdfDraw {
|
|||||||
contentStream.setLineWidth(options.getStrokeWidth());
|
contentStream.setLineWidth(options.getStrokeWidth());
|
||||||
|
|
||||||
contentStream.beginText();
|
contentStream.beginText();
|
||||||
contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(30), 0, 0));
|
if (rotate) {
|
||||||
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
contentStream.setTextMatrix(Matrix.getRotateInstance(Math.toRadians(15), (float) location.getX(), (float) location.getY()));
|
||||||
|
} else {
|
||||||
|
contentStream.newLineAtOffset((float) location.getX(), (float) location.getY());
|
||||||
|
}
|
||||||
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10);
|
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 10);
|
||||||
contentStream.showText(string);
|
contentStream.showText(string);
|
||||||
contentStream.endText();
|
contentStream.endText();
|
||||||
@ -136,6 +139,7 @@ public class PdfDraw {
|
|||||||
private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) {
|
private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) {
|
||||||
|
|
||||||
return Options.builder().stroke(true).strokeColor(switch (entry.type()) {
|
return Options.builder().stroke(true).strokeColor(switch (entry.type()) {
|
||||||
|
case DOCUMENT -> Color.LIGHT_GRAY;
|
||||||
case HEADER, FOOTER -> Color.GREEN;
|
case HEADER, FOOTER -> Color.GREEN;
|
||||||
case PARAGRAPH -> Color.BLUE;
|
case PARAGRAPH -> Color.BLUE;
|
||||||
case HEADLINE -> Color.RED;
|
case HEADLINE -> Color.RED;
|
||||||
Loading…
x
Reference in New Issue
Block a user