diff --git a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java index e0d08fd..e446fb0 100644 --- a/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java +++ b/layoutparser-service/layoutparser-service-internal-api/src/main/java/com/knecon/fforesight/service/layoutparser/internal/api/data/redaction/NodeType.java @@ -6,6 +6,7 @@ import java.util.Locale; public enum NodeType implements Serializable { DOCUMENT, SECTION, + SUPER_SECTION, HEADLINE, PARAGRAPH, TABLE, diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java index 5aed41d..995dc7d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/Section.java @@ -21,81 +21,17 @@ import lombok.experimental.FieldDefaults; import lombok.experimental.SuperBuilder; import lombok.extern.slf4j.Slf4j; -@Slf4j @Data @SuperBuilder @AllArgsConstructor @FieldDefaults(level = AccessLevel.PRIVATE) -public class Section implements GenericSemanticNode { - - @Builder.Default - Set engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM)); - List treeId; - - TextBlock textBlock; - @EqualsAndHashCode.Exclude - DocumentTree documentTree; - - @Builder.Default - @EqualsAndHashCode.Exclude - Set entities = new HashSet<>(); - - @EqualsAndHashCode.Exclude - Map bBoxCache; - - - @Override - public NodeType getType() { - - return NodeType.SECTION; - } - - - public boolean hasTables() { - - return streamAllSubNodesOfType(NodeType.TABLE).findAny() - .isPresent(); - } - - public boolean isLeafSection() { - - return streamAllSubNodesOfType(NodeType.SECTION).findAny() - .isEmpty(); - } - - @Override - public TextBlock getTextBlock() { - - if (textBlock == null) { - textBlock = GenericSemanticNode.super.getTextBlock(); - } - return textBlock; - } - +@EqualsAndHashCode(callSuper = true) +public class Section extends SectionNode { @Override public String toString() { - return treeId.toString() + ": " + NodeType.SECTION + ": " + this.getTextBlock().buildSummary(); - } - - - public Headline getHeadline() { - - return streamChildrenOfType(NodeType.HEADLINE)// - .map(node -> (Headline) node)// - .findFirst()// - .orElseGet(() -> getParent().getHeadline()); - } - - - @Override - public Map getBBox() { - - if (bBoxCache == null) { - bBoxCache = GenericSemanticNode.super.getBBox(); - } - return bBoxCache; + return super.toString(); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SectionNode.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SectionNode.java new file mode 100644 index 0000000..7ce8111 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SectionNode.java @@ -0,0 +1,103 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes; + +import java.awt.geom.Rectangle2D; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine; +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NoArgsConstructor; +import lombok.experimental.FieldDefaults; +import lombok.experimental.SuperBuilder; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Data +@SuperBuilder +@AllArgsConstructor +@NoArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) +public abstract class SectionNode implements GenericSemanticNode { + + @Builder.Default + Set engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM)); + List treeId; + + TextBlock textBlock; + @EqualsAndHashCode.Exclude + DocumentTree documentTree; + + @Builder.Default + @EqualsAndHashCode.Exclude + Set entities = new HashSet<>(); + + @EqualsAndHashCode.Exclude + Map bBoxCache; + + + @Override + public NodeType getType() { + + return NodeType.SECTION; + } + + + public boolean hasTables() { + + return streamAllSubNodesOfType(NodeType.TABLE).findAny() + .isPresent(); + } + + public boolean isLeafSection() { + + return streamAllSubNodesOfType(NodeType.SECTION).findAny() + .isEmpty(); + } + + @Override + public TextBlock getTextBlock() { + + if (textBlock == null) { + textBlock = GenericSemanticNode.super.getTextBlock(); + } + return textBlock; + } + + + @Override + public String toString() { + + return treeId.toString() + ": " + NodeType.SECTION + ": " + this.getTextBlock().buildSummary(); + } + + + public Headline getHeadline() { + + return streamChildrenOfType(NodeType.HEADLINE)// + .map(node -> (Headline) node)// + .findFirst()// + .orElseGet(() -> getParent().getHeadline()); + } + + + @Override + public Map getBBox() { + + if (bBoxCache == null) { + bBoxCache = GenericSemanticNode.super.getBBox(); + } + return bBoxCache; + } + +} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java index d839c06..179ac3f 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/graph/nodes/SuperSection.java @@ -1,14 +1,35 @@ package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes; +import java.util.List; +import java.util.Set; + +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine; +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity; + +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; import lombok.ToString; +import lombok.experimental.FieldDefaults; import lombok.experimental.SuperBuilder; @Data @SuperBuilder +@AllArgsConstructor +@FieldDefaults(level = AccessLevel.PRIVATE) @EqualsAndHashCode(callSuper = true) -public class SuperSection extends Section { +public class SuperSection extends SectionNode { + + @Override + public NodeType getType() { + + return NodeType.SUPER_SECTION; + } + @Override public String toString() { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java index 2d229bf..d324795 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TableOfContentItem.java @@ -6,6 +6,7 @@ import java.util.stream.Collectors; import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; @@ -24,7 +25,7 @@ public class TableOfContentItem { private List sectionBlocks = new ArrayList<>(); private List images = new ArrayList<>(); - private Section section; + private SectionNode section; public TableOfContentItem(TextPageBlock headline) { diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java index 50a0251..df8816c 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/DocumentGraphFactory.java @@ -32,6 +32,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Im import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode; import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.outline.TableOfContentItem; @@ -74,7 +75,7 @@ public class DocumentGraphFactory { for (TableOfContentItem tocItem : classificationDocument.getTableOfContents()) { var parent = tocItem.getParent() == null ? null : tocItem.getParent().getSection(); - Optional
section = SectionNodeFactory.addSection(layoutParsingType, + Optional section = SectionNodeFactory.addSection(layoutParsingType, parent, tocItem.getChildren().isEmpty(), tocItem.getNonEmptySectionBlocks(), @@ -239,7 +240,7 @@ public class DocumentGraphFactory { DocumentTree documentTree; Map pages; - List
sections; + List sections; List images; TextBlockFactory textBlockFactory; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java index 6d068ea..6ca5503 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/factory/SectionNodeFactory.java @@ -18,6 +18,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section; +import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SectionNode; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock; @@ -29,7 +30,7 @@ import lombok.experimental.UtilityClass; @UtilityClass public class SectionNodeFactory { - public Optional
addSection(LayoutParsingType layoutParsingType, + public Optional addSection(LayoutParsingType layoutParsingType, GenericSemanticNode parentNode, boolean isLeaf, List pageBlocks, @@ -53,7 +54,7 @@ public class SectionNodeFactory { Map> blocksPerPage = pageBlocks.stream() .collect(groupingBy(AbstractPageBlock::getPage)); - Section section; + SectionNode section; if (isLeaf) { section = Section.builder().documentTree(context.getDocumentTree()).build(); } else { @@ -98,7 +99,7 @@ public class SectionNodeFactory { } - private List getTreeId(GenericSemanticNode parentNode, DocumentGraphFactory.Context context, Section section) { + private List getTreeId(GenericSemanticNode parentNode, DocumentGraphFactory.Context context, SectionNode section) { if (parentNode == null) { return context.getDocumentTree().createNewMainEntryAndReturnId(section); @@ -111,7 +112,7 @@ public class SectionNodeFactory { private void addFirstHeadlineDirectlyToSection(LayoutParsingType layoutParsingType, List pageBlocks, DocumentGraphFactory.Context context, - Section section, + SectionNode section, Document document) { if (pageBlocks.get(0).isHeadline()) { @@ -124,7 +125,7 @@ public class SectionNodeFactory { private void addTablesAndParagraphsAndHeadlinesToSection(LayoutParsingType layoutParsingType, List pageBlocks, DocumentGraphFactory.Context context, - Section section, + SectionNode section, Document document) { Set alreadyMerged = new HashSet<>(); @@ -249,7 +250,7 @@ public class SectionNodeFactory { } - private void addSectionNodeToPageNode(DocumentGraphFactory.Context context, Section section, Integer pageNumber) { + private void addSectionNodeToPageNode(DocumentGraphFactory.Context context, SectionNode section, Integer pageNumber) { Page page = context.getPage(pageNumber); page.getMainBody().add(section); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java index a53c6d8..5e664f9 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentGraphMapper.java @@ -61,7 +61,7 @@ public class DocumentGraphMapper { List pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList(); SemanticNode node = switch (entryData.getType()) { - case SECTION -> buildSection(context); + case SECTION, SUPER_SECTION -> buildSection(context); case PARAGRAPH -> buildParagraph(context, entryData.getProperties()); case HEADLINE -> buildHeadline(context); case HEADER -> buildHeader(context); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java index bf64c12..bcba9e2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/utils/PdfVisualisationUtility.java @@ -112,8 +112,8 @@ public class PdfVisualisationUtility { case DOCUMENT -> Color.LIGHT_GRAY; case HEADER, FOOTER -> Color.GREEN; case PARAGRAPH -> Color.BLUE; + case SUPER_SECTION, SECTION -> Color.BLACK; case HEADLINE -> Color.RED; - case SECTION -> Color.BLACK; case TABLE -> Color.ORANGE; case TABLE_CELL -> Color.GRAY; case IMAGE -> Color.MAGENTA; diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java index 4e3280f..cdd247d 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/utils/visualizations/PdfDraw.java @@ -229,7 +229,7 @@ public class PdfDraw { case HEADER, FOOTER -> Color.GREEN; case PARAGRAPH -> Color.BLUE; case HEADLINE -> Color.RED; - case SECTION -> Color.BLACK; + case SECTION, SUPER_SECTION -> Color.BLACK; case TABLE -> Color.ORANGE; case TABLE_CELL -> Color.GRAY; case IMAGE -> Color.MAGENTA;