RED-6009: Document Tree Structure

*fix pmd violations
This commit is contained in:
Kilian Schuettler 2023-04-12 15:28:03 +02:00
parent f9d258e1fc
commit d5635e9660
42 changed files with 158 additions and 228 deletions

View File

@ -12,6 +12,6 @@
</parent>
<artifactId>layoutparser-service-image</artifactId>
<version>1.0.0</version>
</project>

View File

@ -1,6 +1,5 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
@ -11,6 +10,7 @@ import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.experimental.FieldDefaults;
@ -31,7 +31,7 @@ public class TableOfContentsData {
}
EntryData entry = root.subEntries.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.subEntries().get(id);
entry = entry.subEntries.get(id);
}
return entry;
}
@ -43,12 +43,6 @@ public class TableOfContentsData {
}
private static List<Integer> getIds(String idsAsString) {
return Arrays.stream(idsAsString.split("\\.")).map(Integer::valueOf).toList();
}
public String toString() {
return String.join("\n", streamAllEntries().map(EntryData::toString).toList());
@ -57,12 +51,23 @@ public class TableOfContentsData {
private static Stream<EntryData> flatten(EntryData entry) {
return Stream.concat(Stream.of(entry), entry.subEntries().stream().flatMap(TableOfContentsData::flatten));
return Stream.concat(Stream.of(entry), entry.subEntries.stream().flatMap(TableOfContentsData::flatten));
}
@Builder
public record EntryData(NodeType type, int[] tocId, Long[] atomicBlocks, Long[] pages, Map<String, String> properties, List<EntryData> subEntries) {
@Getter
@AllArgsConstructor
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public static class EntryData {
NodeType type;
int[] tocId;
Long[] atomicBlocks;
Long[] pages;
Map<String, String> properties;
List<EntryData> subEntries;
@Override
public String toString() {

View File

@ -77,7 +77,7 @@ public class DocumentGraph implements SemanticNode {
private Stream<SemanticNode> streamAllNodes() {
return tableOfContents.streamAllEntriesInOrder().map(TableOfContents.Entry::node);
return tableOfContents.streamAllEntriesInOrder().map(TableOfContents.Entry::getNode);
}

View File

@ -14,8 +14,12 @@ import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.Seman
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.textblock.TextBlockCollector;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.Getter;
import lombok.experimental.FieldDefaults;
@Data
public class TableOfContents {
@ -31,7 +35,7 @@ public class TableOfContents {
public TextBlock buildTextBlock() {
return streamAllEntriesInOrder().map(Entry::node).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
return streamAllEntriesInOrder().map(Entry::getNode).filter(SemanticNode::isTerminal).map(SemanticNode::getTerminalTextBlock).collect(new TextBlockCollector());
}
@ -49,8 +53,8 @@ public class TableOfContents {
Entry parent = getEntryById(parentId);
List<Integer> newId = new LinkedList<>(parentId);
newId.add(parent.children().size());
parent.children().add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
newId.add(parent.children.size());
parent.children.add(Entry.builder().tocId(newId).node(node).type(nodeType).children(new LinkedList<>()).build());
return newId;
}
@ -66,7 +70,7 @@ public class TableOfContents {
if (id >= entry.children.size() || 0 > id) {
return false;
}
entry = entry.children().get(id);
entry = entry.children.get(id);
}
return true;
}
@ -86,7 +90,7 @@ public class TableOfContents {
public Stream<SemanticNode> streamChildrenNodes(List<Integer> tocId) {
return getEntryById(tocId).children().stream().map(Entry::node);
return getEntryById(tocId).children.stream().map(Entry::getNode);
}
@ -109,7 +113,7 @@ public class TableOfContents {
}
Entry entry = root.children.get(tocId.get(0));
for (int id : tocId.subList(1, tocId.size())) {
entry = entry.children().get(id);
entry = entry.children.get(id);
}
return entry;
}
@ -148,17 +152,26 @@ public class TableOfContents {
private static Stream<Entry> flatten(Entry entry) {
return Stream.concat(Stream.of(entry), entry.children().stream().flatMap(TableOfContents::flatten));
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(TableOfContents::flatten));
}
@Builder
public record Entry(List<Integer> tocId, NodeType type, SemanticNode node, List<Entry> children) {
@Getter
@AllArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE, makeFinal = true)
public static class Entry {
List<Integer> tocId;
NodeType type;
SemanticNode node;
List<Entry> children;
@Override
public String toString() {
return node().toString();
return node.toString();
}
@ -168,6 +181,13 @@ public class TableOfContents {
return Hashing.murmur3_32_fixed().hashString(toString(), StandardCharsets.UTF_8).hashCode();
}
@Override
public boolean equals(Object o) {
return o instanceof Entry && o.hashCode() == this.hashCode();
}
}
}

View File

@ -28,7 +28,7 @@ public interface EntityNode {
/**
* The deepest fully containing node represents the node which is the deepest node in the document tree structure,
* whose boundary also fully contains the boundary of this entity
* whose boundary also fully contains the boundary of this entity.
*
* @return the deepest fully containing node
*/
@ -62,7 +62,7 @@ public interface EntityNode {
/**
* removes all occurrences of this node in the graph and resets all graph specific fields
* removes all occurrences of this node in the graph and resets all graph specific fields.
*/
default void removeFromGraph() {

View File

@ -36,4 +36,10 @@ public class EntityPosition {
return Hashing.murmur3_128().hashString(sb.toString(), StandardCharsets.UTF_8).hashCode();
}
@Override
public boolean equals(Object o) {
return o instanceof EntityPosition && o.hashCode() == this.hashCode();
}
}

View File

@ -34,10 +34,10 @@ public class ImageNode implements SemanticNode {
boolean transparency;
Rectangle2D position;
@Builder.Default
boolean redaction = false;
@Builder.Default
boolean ignored = false;
boolean redaction;
boolean ignored;
@Builder.Default
String redactionReason = "";
@Builder.Default
@ -55,7 +55,6 @@ public class ImageNode implements SemanticNode {
@EqualsAndHashCode.Exclude
Set<EntityNode> entities = new HashSet<>();
@Override
public TextBlock buildTextBlock() {

View File

@ -37,7 +37,7 @@ public interface SemanticNode {
/**
* Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's ClassificationTextBlock
* Each AtomicTextBlock is assigned a page, so to get the pages this node appears on, it collects the PageNodes from each AtomicTextBlock belonging to this node's ClassificationTextBlock.
*
* @return Set of PageNodes this node appears on.
*/
@ -54,7 +54,7 @@ public interface SemanticNode {
/**
* The id is a List of Integers uniquely identifying this node in the TableOfContents
* The id is a List of Integers uniquely identifying this node in the TableOfContents.
*
* @return the TableOfContents ID
*/
@ -62,7 +62,7 @@ public interface SemanticNode {
/**
* This should only be used during graph construction
* This should only be used during graph construction.
*
* @param tocId List of Integers
*/
@ -96,7 +96,7 @@ public interface SemanticNode {
*/
default SemanticNode getParent() {
return getTableOfContents().getParentEntryById(getTocId()).node();
return getTableOfContents().getParentEntryById(getTocId()).getNode();
}
@ -199,7 +199,7 @@ public interface SemanticNode {
/**
* Streams all children located directly underneath this node in the TableOfContents
* Streams all children located directly underneath this node in the TableOfContents.
*
* @return Stream of all children
*/
@ -216,7 +216,7 @@ public interface SemanticNode {
*/
default Stream<SemanticNode> streamAllSubNodes() {
return getTableOfContents().streamAllSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::node);
return getTableOfContents().streamAllSubEntriesInOrder(getTocId()).map(TableOfContents.Entry::getNode);
}
@ -247,7 +247,7 @@ public interface SemanticNode {
/**
* TODO this does not yet work for sections spanning multiple columns
* TODO this does not yet work for sections spanning multiple columns.
*
* @param bBoxPerPage initial empty BoundingBox
* @return The union of the BoundingBoxes of all children

View File

@ -54,7 +54,7 @@ public class ConcatenatedTextBlock implements TextBlock {
private AtomicTextBlock getAtomicTextBlockByStringIndex(int stringIdx) {
return atomicTextBlocks.stream().filter(textBlock -> (textBlock.getBoundary().contains(stringIdx))).findAny().orElseThrow(IndexOutOfBoundsException::new);
return atomicTextBlocks.stream().filter(textBlock -> textBlock.getBoundary().contains(stringIdx)).findAny().orElseThrow(IndexOutOfBoundsException::new);
}

View File

@ -59,25 +59,25 @@ public class DocumentDataMapper {
Long[] atomicTextBlocks;
if (entry.node().isTerminal()) {
atomicTextBlocks = toAtomicTextBlockIds(entry.node().getTerminalTextBlock());
if (entry.getNode().isTerminal()) {
atomicTextBlocks = toAtomicTextBlockIds(entry.getNode().getTerminalTextBlock());
} else {
atomicTextBlocks = new Long[]{};
}
Map<String, String> properties = switch (entry.type()) {
case TABLE -> PropertiesMapper.buildTableProperties((TableNode) entry.node());
case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCellNode) entry.node());
case IMAGE -> PropertiesMapper.buildImageProperties((ImageNode) entry.node());
Map<String, String> properties = switch (entry.getType()) {
case TABLE -> PropertiesMapper.buildTableProperties((TableNode) entry.getNode());
case TABLE_CELL -> PropertiesMapper.buildTableCellProperties((TableCellNode) entry.getNode());
case IMAGE -> PropertiesMapper.buildImageProperties((ImageNode) entry.getNode());
default -> new HashMap<>();
};
return TableOfContentsData.EntryData.builder()
.tocId(toPrimitiveIntArray(entry.tocId()))
.subEntries(entry.children().stream().map(DocumentDataMapper::toEntryData).toList())
.type(entry.type())
.tocId(toPrimitiveIntArray(entry.getTocId()))
.subEntries(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
.type(entry.getType())
.atomicBlocks(atomicTextBlocks)
.pages(entry.node().getPages().stream().map(PageNode::getNumber).map(Integer::longValue).toArray(Long[]::new))
.pages(entry.getNode().getPages().stream().map(PageNode::getNumber).map(Integer::longValue).toArray(Long[]::new))
.properties(properties)
.build();
}

View File

@ -52,7 +52,7 @@ public class DocumentGraphMapper {
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
context.tableOfContents.getRoot().children().addAll(buildEntries(documentData.getTableOfContents().getRoot().subEntries(), context));
context.tableOfContents.getRoot().getChildren().addAll(buildEntries(documentData.getTableOfContents().getRoot().getSubEntries(), context));
documentGraph.setTableOfContents(context.tableOfContents);
documentGraph.setPages(new HashSet<>(context.pages));
@ -70,35 +70,35 @@ public class DocumentGraphMapper {
for (TableOfContentsData.EntryData entryData : entries) {
boolean terminal = isTerminal(entryData);
List<PageNode> pages = Arrays.stream(entryData.pages()).map(pageNumber -> getPage(pageNumber, context)).toList();
List<PageNode> pages = Arrays.stream(entryData.getPages()).map(pageNumber -> getPage(pageNumber, context)).toList();
SemanticNode node = switch (entryData.type()) {
SemanticNode node = switch (entryData.getType()) {
case SECTION -> buildSection(context);
case PARAGRAPH -> buildParagraph(context, terminal);
case HEADLINE -> buildHeadline(context, terminal);
case HEADER -> buildHeader(context, terminal);
case FOOTER -> buildFooter(context, terminal);
case TABLE -> buildTable(context, entryData.properties());
case TABLE_CELL -> buildTableCell(context, entryData.properties(), terminal);
case IMAGE -> buildImage(context, entryData.properties());
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.type());
case TABLE -> buildTable(context, entryData.getProperties());
case TABLE_CELL -> buildTableCell(context, entryData.getProperties(), terminal);
case IMAGE -> buildImage(context, entryData.getProperties());
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
};
if (node.isTerminal()) {
TextBlock textBlock = toTextBlock(entryData.atomicBlocks(), context, node);
TextBlock textBlock = toTextBlock(entryData.getAtomicBlocks(), context, node);
node.setTerminalTextBlock(textBlock);
}
List<Integer> tocId = Arrays.stream(entryData.tocId()).boxed().toList();
List<Integer> tocId = Arrays.stream(entryData.getTocId()).boxed().toList();
node.setTocId(tocId);
if (entryData.type() == HEADER) {
if (entryData.getType() == HEADER) {
pages.forEach(page -> page.setHeader((HeaderNode) node));
} else if (entryData.type() == FOOTER) {
} else if (entryData.getType() == FOOTER) {
pages.forEach(page -> page.setFooter((FooterNode) node));
} else {
pages.forEach(page -> page.getMainBody().add(node));
}
newEntries.add(TableOfContents.Entry.builder().tocId(tocId).type(entryData.type()).children(buildEntries(entryData.subEntries(), context)).node(node).build());
newEntries.add(TableOfContents.Entry.builder().tocId(tocId).type(entryData.getType()).children(buildEntries(entryData.getSubEntries(), context)).node(node).build());
}
return newEntries;
}
@ -112,7 +112,7 @@ public class DocumentGraphMapper {
private static boolean isTerminal(TableOfContentsData.EntryData entryData) {
return entryData.atomicBlocks().length > 0;
return entryData.getAtomicBlocks().length > 0;
}

View File

@ -4,7 +4,6 @@ import java.util.Collections;
import java.util.NoSuchElementException;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.Boundary;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.TableOfContents;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.entity.EntityNode;
import com.knecon.fforesight.service.layoutparser.internal.api.graph.nodes.PageNode;
@ -54,10 +53,4 @@ public class EntityInsertionService {
entity.getIntersectingNodes().forEach(node -> node.getEntities().add(entity));
}
private static Boundary toLineAfterBoundary(TextBlock textBlock, Boundary boundary) {
return new Boundary(boundary.end(), textBlock.getNextLinebreak(boundary.end()));
}
}

View File

@ -2,8 +2,6 @@ package com.knecon.fforesight.service.layoutparser.processor.classification.dto.
import java.util.List;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.table.Ruling;
import lombok.Builder;
import lombok.Data;

View File

@ -1,132 +0,0 @@
package com.knecon.fforesight.service.layoutparser.processor.factory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.AbstractTextContainer;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.ClassificationSection;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.image.ClassifiedImage;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.table.Table;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.table.TableCell;
import com.knecon.fforesight.service.layoutparser.processor.classification.dto.text.ClassificationTextBlock;
@Service
public class ImageSortService {
public SortedImages sortImagesIntoStructure(ClassificationDocument document) {
SortedImages sortedImages = new SortedImages(new HashMap<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), new HashMap<>());
Map<Integer, List<ClassifiedImage>> imagesByPage = document.getSections()
.stream()
.flatMap(section -> section.getImages().stream())
.distinct()
.collect(Collectors.groupingBy(ClassifiedImage::getPage));
for (int pageNumber : imagesByPage.keySet()) {
List<AbstractTextContainer> textContainersOnPage = document.getSections()
.stream()
.flatMap(section -> section.getPageBlocks().stream())
.filter(abstractTextContainer -> abstractTextContainer.getPage() == pageNumber)
.toList();
List<ClassificationSection> sectionsOnPage = document.getSections()
.stream()
.filter(section -> section.getPageBlocks().stream().anyMatch(block -> block.getPage() == pageNumber))
.toList();
for (ClassifiedImage image : imagesByPage.get(pageNumber)) {
sortImage(textContainersOnPage, sectionsOnPage, image, sortedImages);
}
}
return sortedImages;
}
private void sortImage(List<AbstractTextContainer> textContainersOnPage, List<ClassificationSection> sectionsOnPage, ClassifiedImage image, SortedImages sortedImages) {
Optional<AbstractTextContainer> containingTextContainer = getContainingTextContainer(image, textContainersOnPage);
Optional<ClassificationSection> sectionContainingTextContainer = getContainingSection(image, sectionsOnPage);
List<AbstractTextContainer> containedTextContainers = getContainedTextContainers(image, textContainersOnPage);
List<ClassificationSection> containedSections = getContainedSections(image, sectionsOnPage);
if (containingTextContainer.isPresent()) {
if (sortImageIntoTextContainerOrCell(image, sortedImages, containingTextContainer.get())) {
return;
}
}
}
private static boolean sortImageIntoTextContainerOrCell(ClassifiedImage image, SortedImages sortedImages, AbstractTextContainer containingTextContainer) {
if (containingTextContainer instanceof ClassificationTextBlock) {
sortedImages.containedInTextContainer().computeIfAbsent(containingTextContainer, sortedImage -> new ArrayList<>()).add(image);
return true;
}
if (containingTextContainer instanceof Table) {
Optional<TableCell> containingCell = getContainingCell((Table) containingTextContainer, image);
if (containingCell.isPresent()) {
sortedImages.containedInCell().computeIfAbsent(containingCell.get(), sortedImage -> new ArrayList<>()).add(image);
return true;
}
}
return false;
}
private static Optional<TableCell> getContainingCell(Table table, ClassifiedImage image) {
return table.getRows().stream().flatMap(List::stream).filter(cell -> cell.contains(image.getPosition())).findFirst();
}
private List<ClassificationSection> getContainedSections(ClassifiedImage image, List<ClassificationSection> sectionsOnPage) {
return sectionsOnPage.stream()
.filter(section -> image.getPosition().contains(RectangleTransformations.bBoxUnionAbstractTextContainer(section.getPageBlocks()
.stream()
.filter(block -> block.getPage() == image.getPage())
.toList())))
.toList();
}
private List<AbstractTextContainer> getContainedTextContainers(ClassifiedImage image, List<AbstractTextContainer> textContainersOnPage) {
return textContainersOnPage.stream().filter(textContainer -> image.getPosition().contains(RectangleTransformations.toRectangle2D(textContainer))).toList();
}
private Optional<ClassificationSection> getContainingSection(ClassifiedImage image, List<ClassificationSection> sectionsOnPage) {
return sectionsOnPage.stream()//
.filter(section -> //
RectangleTransformations.bBoxUnionAbstractTextContainer(section.getPageBlocks().stream().filter(block -> block.getPage() == image.getPage()).toList())//
.contains(image.getPosition())).findFirst();
}
private Optional<AbstractTextContainer> getContainingTextContainer(ClassifiedImage image, List<AbstractTextContainer> textContainersOnPage) {
return textContainersOnPage.stream().filter(textContainer -> RectangleTransformations.toRectangle2D(textContainer).contains(image.getPosition())).findFirst();
}
public record SortedImages(
Map<TableCell, List<ClassifiedImage>> containedInCell,
Map<AbstractTextContainer, List<ClassifiedImage>> containedInTextContainer,
Map<ClassificationSection, List<ClassifiedImage>> containedInSection,
Map<ClassifiedImage, List<AbstractTextContainer>> containedByImage,
Map<ClassifiedImage, List<ClassificationSection>> sectionContainedByImage) {
}
}

View File

@ -6,6 +6,7 @@ import java.security.SecureRandom;
import java.security.spec.KeySpec;
import java.util.Base64;
import javax.annotation.PostConstruct;
import javax.crypto.Cipher;
import javax.crypto.SecretKey;
import javax.crypto.SecretKeyFactory;
@ -16,7 +17,6 @@ import javax.crypto.spec.SecretKeySpec;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import jakarta.annotation.PostConstruct;
import lombok.SneakyThrows;
@Service

View File

@ -65,18 +65,54 @@
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessors>
<annotationProcessor>lombok.launch.AnnotationProcessorHider$AnnotationProcessor</annotationProcessor>
<annotationProcessor>com.dslplatform.json.processor.CompiledJsonAnnotationProcessor</annotationProcessor>
</annotationProcessors>
</configuration>
</plugin>
<plugin>
<!-- generate git.properties for exposure in /info -->
<groupId>pl.project13.maven</groupId>
<artifactId>git-commit-id-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>revision</goal>
</goals>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<gitDescribe>
<tags>true</tags>
</gitDescribe>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- repackages the generated jar into a runnable fat-jar and makes it
executable -->
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
<configuration>
<executable>true</executable>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -13,9 +13,10 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutparserServiceP
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.AsyncConfig;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.MultiTenancyMessagingConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.MultiTenancyWebConfiguration;
import com.knecon.fforesight.service.layoutparser.processor.multitenancy.TenantsClient;
@Import({MultiTenancyWebConfiguration.class, AsyncConfig.class, MultiTenancyMessagingConfiguration.class, MetricsConfiguration.class, LayoutparserServiceProcessorConfiguration.class, StorageAutoConfiguration.class})
@EnableFeignClients
@EnableFeignClients(basePackageClasses = TenantsClient.class)
@SpringBootApplication(exclude = {SecurityAutoConfiguration.class, ManagementWebSecurityAutoConfiguration.class})
public class Application {

View File

@ -121,8 +121,8 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/crafted document");
TableNode table = (TableNode) documentGraph.getTableOfContents()//
.streamAllEntriesInOrder()//
.filter(entry -> entry.type().equals(NodeType.TABLE))//
.map(TableOfContents.Entry::node)//
.filter(entry -> entry.getType().equals(NodeType.TABLE))//
.map(TableOfContents.Entry::getNode)//
.findFirst().orElseThrow();
assertEquals(5, table.getNumberOfCols());
assertEquals(4, table.getNumberOfRows());
@ -151,9 +151,9 @@ public class DocumentGraphEntityInsertionTest extends BuildDocumentGraphTest {
DocumentGraph documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");
TableNode table = (TableNode) documentGraph.getTableOfContents()
.streamAllEntriesInOrder()
.filter(entry -> entry.node().getPages().stream().anyMatch(page -> page.getNumber() == 22))
.filter(entry -> entry.type().equals(NodeType.TABLE))
.map(TableOfContents.Entry::node)
.filter(entry -> entry.getNode().getPages().stream().anyMatch(page -> page.getNumber() == 22))
.filter(entry -> entry.getType().equals(NodeType.TABLE))
.map(TableOfContents.Entry::getNode)
.findFirst()
.orElseThrow();
assertEquals(5, table.getNumberOfCols());

View File

@ -138,7 +138,7 @@ public class PdfDraw {
private static Options buildStandardOptionsForNodes(TableOfContents.Entry entry) {
return Options.builder().stroke(true).strokeColor(switch (entry.type()) {
return Options.builder().stroke(true).strokeColor(switch (entry.getType()) {
case DOCUMENT -> Color.LIGHT_GRAY;
case HEADER, FOOTER -> Color.GREEN;
case PARAGRAPH -> Color.BLUE;
@ -153,20 +153,20 @@ public class PdfDraw {
private static void drawBBoxAndLabelAndNumberOnPage(PDDocument document, TableOfContents.Entry entry, Options options) {
Map<PageNode, Rectangle2D> rectanglesPerPage = entry.node().getBBox();
Map<PageNode, Rectangle2D> rectanglesPerPage = entry.getNode().getBBox();
rectanglesPerPage.forEach((page, rectangle2D) -> {
if (entry.type() == NodeType.SECTION) {
if (entry.getType() == NodeType.SECTION) {
rectangle2D = RectangleTransformations.pad(rectangle2D, 10, 10);
}
drawRectangle2DList(document, page.getNumber(), List.of(rectangle2D), options);
drawText(buildString(entry), document, new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), page.getNumber(), options, entry.type() == NodeType.TABLE_CELL);
drawText(buildString(entry), document, new Point2D.Double(rectangle2D.getMinX(), rectangle2D.getMaxY() + 2), page.getNumber(), options, entry.getType() == NodeType.TABLE_CELL);
});
}
private static String buildString(TableOfContents.Entry entry) {
return entry.node().getNumberOnPage() + ": " + entry.tocId() + ": " + entry.type().toString();
return entry.getNode().getNumberOnPage() + ": " + entry.getTocId() + ": " + entry.getType();
}
}

View File

@ -4,11 +4,13 @@
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser</artifactId>
<version>1.0.0</version>
<groupId>com.iqser.red</groupId>
<artifactId>platform-dependency</artifactId>
<version>1.17.0</version>
<relativePath/>
</parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser-service</artifactId>
<version>1.0.0</version>
@ -25,10 +27,12 @@
<jackson.version>2.13.2</jackson.version>
<slf4j.version>2.0.7</slf4j.version>
<pdfbox.version>3.0.0-alpha2</pdfbox.version>
<lombok.version>1.18.26</lombok.version>
<spring.version>3.0.1</spring.version>
<spring.cloud.version>2022.0.1</spring.cloud.version>
<jackson.version>2.15.0-rc2</jackson.version>
<dsljson.version>1.9.9</dsljson.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

View File

@ -3,11 +3,6 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.0.1</version>
</parent>
<groupId>com.knecon.fforesight</groupId>
<artifactId>layoutparser</artifactId>