RED-8481: Use visual layout parsing to detect signatures
added a new layer for visual parsing results added a source label to image properties to enable rules
This commit is contained in:
parent
f4b6386e1c
commit
a1521877d7
@ -3,8 +3,10 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
@ -43,7 +45,6 @@ public class DocumentStructure implements Serializable {
|
||||
public static final String ID = "id";
|
||||
|
||||
}
|
||||
|
||||
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
|
||||
public static class TableCellProperties implements Serializable {
|
||||
|
||||
@ -115,6 +116,8 @@ public class DocumentStructure implements Serializable {
|
||||
Map<String, String> properties;
|
||||
@Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]")
|
||||
List<EntryData> children;
|
||||
@Schema(description = "Describes the origin of the semantic node",example = "[ALGORITHM]")
|
||||
Set<LayoutEngine> engines;
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
public enum LayoutEngine {
|
||||
ALGORITHM, AI
|
||||
}
|
||||
@ -134,6 +134,8 @@ public class LayoutParsingPipeline {
|
||||
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
|
||||
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile,documentGraph,viewerDocumentFile,false,true);
|
||||
|
||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||
|
||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
||||
@ -219,11 +221,7 @@ public class LayoutParsingPipeline {
|
||||
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
||||
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||
if(signatures.size() > 0) {
|
||||
visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||
}
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||
|
||||
@ -284,7 +282,12 @@ public class LayoutParsingPipeline {
|
||||
}
|
||||
|
||||
if(signatures.containsKey(pageNumber)) {
|
||||
classificationPage.setImages(signatures.get(pageNumber));
|
||||
if(classificationPage.getImages() == null ||classificationPage.getImages().size() == 0) {
|
||||
classificationPage.setImages(signatures.get(pageNumber));
|
||||
}
|
||||
else {
|
||||
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
||||
}
|
||||
}
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
||||
|
||||
@ -10,6 +10,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -30,6 +31,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Document implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
Set<Page> pages;
|
||||
DocumentTree documentTree;
|
||||
Integer numberOfPages;
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -26,6 +27,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Footer implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Header implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Headline implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -29,6 +30,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Image implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
List<Integer> treeId;
|
||||
String id;
|
||||
|
||||
|
||||
@ -6,6 +6,8 @@ public enum ImageType {
|
||||
LOGO,
|
||||
FORMULA,
|
||||
SIGNATURE,
|
||||
|
||||
SIGNATURE_VISUAL,
|
||||
OTHER,
|
||||
OCR;
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -24,6 +25,9 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Paragraph implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
|
||||
List<Integer> treeId;
|
||||
TextBlock leafTextBlock;
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -27,6 +28,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Section implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
List<Integer> treeId;
|
||||
|
||||
TextBlock textBlock;
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
@ -21,6 +22,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
|
||||
import ch.qos.logback.core.Layout;
|
||||
|
||||
public interface SemanticNode {
|
||||
|
||||
/**
|
||||
@ -334,6 +337,11 @@ public interface SemanticNode {
|
||||
}
|
||||
}
|
||||
|
||||
Set<LayoutEngine> getEngines();
|
||||
|
||||
default void addEngine(LayoutEngine engine) {
|
||||
getEngines().add(engine);
|
||||
}
|
||||
|
||||
/**
|
||||
* Streams all children located directly underneath this node in the DocumentTree.
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.Set;
|
||||
import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -31,6 +32,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class Table implements SemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
List<Integer> treeId;
|
||||
DocumentTree documentTree;
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class TableCell implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
List<Integer> treeId;
|
||||
int row;
|
||||
int col;
|
||||
|
||||
@ -4,18 +4,21 @@ import java.awt.geom.Rectangle2D;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NonNull;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
|
||||
@Data
|
||||
@RequiredArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ClassifiedImage {
|
||||
|
||||
@NonNull
|
||||
private Rectangle2D position;
|
||||
@NonNull
|
||||
private ImageType imageType;
|
||||
private boolean sourceByAi;
|
||||
private boolean isAppendedToSection;
|
||||
@NonNull
|
||||
private boolean hasTransparency;
|
||||
|
||||
@ -37,7 +37,7 @@ public class VisualLayoutParsingAdapter {
|
||||
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx()+1, tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||
|
||||
return signatures;
|
||||
}
|
||||
@ -70,7 +70,7 @@ public class VisualLayoutParsingAdapter {
|
||||
tableObjects.stream().forEach(t -> {
|
||||
if(t.getLabel().equals(SIGNATURES)) {
|
||||
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
|
||||
ImageType.SIGNATURE,false,pageNumber);
|
||||
ImageType.SIGNATURE,true,false,false,pageNumber);
|
||||
|
||||
signatures.add(signature);
|
||||
}
|
||||
|
||||
@ -7,12 +7,14 @@ import static java.util.stream.Collectors.toList;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
||||
@ -95,14 +97,17 @@ public class DocumentGraphFactory {
|
||||
|
||||
Rectangle2D position = image.getPosition();
|
||||
Page page = context.getPage(image.getPage());
|
||||
Image imageNode = Image.builder()
|
||||
var imageBuilder = Image.builder()
|
||||
.id(IdBuilder.buildId(Set.of(page), List.of(position)))
|
||||
.imageType(image.getImageType())
|
||||
.position(position)
|
||||
.transparent(image.isHasTransparency())
|
||||
.page(page)
|
||||
.documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
.documentTree(context.getDocumentTree());
|
||||
if(image.isSourceByAi()) {
|
||||
imageBuilder.engines(new HashSet<>(Set.of(LayoutEngine.AI)));
|
||||
}
|
||||
Image imageNode = imageBuilder.build();
|
||||
page.getMainBody().add(imageNode);
|
||||
|
||||
List<Integer> tocId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
||||
|
||||
@ -80,6 +80,7 @@ public class DocumentDataMapper {
|
||||
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
||||
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
||||
.type(entry.getType())
|
||||
.engines(entry.getNode().getEngines())
|
||||
.atomicBlockIds(atomicTextBlocks)
|
||||
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
|
||||
.properties(properties)
|
||||
|
||||
@ -17,6 +17,7 @@ import java.util.stream.Stream;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||
@ -58,52 +59,56 @@ public class LayoutGridService {
|
||||
static Color HEADER_COLOR = new Color(171, 131, 6);
|
||||
static Color IMAGE_COLOR = new Color(253, 63, 146);
|
||||
|
||||
static Color IMAGE_VISUAL_COLOR = new Color(122, 0, 255);
|
||||
|
||||
@SneakyThrows
|
||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue) {
|
||||
this.addLayoutGrid(originFile,document,destinationFile,layerVisibilityDefaultValue,false);
|
||||
}
|
||||
|
||||
LayoutGrid layoutGrid = createLayoutGrid(document);
|
||||
@SneakyThrows
|
||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue, boolean visualParsingGrid) {
|
||||
|
||||
LayoutGrid layoutGrid = createLayoutGrid(document, visualParsingGrid);
|
||||
|
||||
viewerDocumentService.addVisualizationsOnPage(originFile,
|
||||
destinationFile,
|
||||
Visualizations.builder()
|
||||
.layer(ContentStreams.KNECON_LAYOUT)
|
||||
.layer(visualParsingGrid ? ContentStreams.KNECON_VISUAL_PARSING : ContentStreams.KNECON_LAYOUT)
|
||||
.visualizationsOnPages(layoutGrid.getVisualizationsPerPages())
|
||||
.layerVisibilityDefaultValue(layerVisibilityDefaultValue)
|
||||
.build());
|
||||
}
|
||||
|
||||
|
||||
private LayoutGrid createLayoutGrid(Document document) {
|
||||
private LayoutGrid createLayoutGrid(Document document, boolean visualParsingGrid) {
|
||||
|
||||
LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages());
|
||||
document.streamAllSubNodes().forEach(semanticNode -> {
|
||||
Color color = switch (semanticNode.getType()) {
|
||||
case PARAGRAPH -> PARAGRAPH_COLOR;
|
||||
case TABLE -> TABLE_COLOR;
|
||||
case SECTION -> SECTION_COLOR;
|
||||
case HEADLINE -> HEADLINE_COLOR;
|
||||
case HEADER, FOOTER -> HEADER_COLOR;
|
||||
case IMAGE -> IMAGE_COLOR;
|
||||
default -> null;
|
||||
};
|
||||
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
|
||||
addAsRectangle(semanticNode, layoutGrid, color);
|
||||
}
|
||||
if (semanticNode.getType().equals(NodeType.SECTION)) {
|
||||
addSection(semanticNode, layoutGrid, color);
|
||||
}
|
||||
if (semanticNode.getType().equals(NodeType.TABLE)) {
|
||||
Table table = (Table) semanticNode;
|
||||
addInnerTableLines(table, layoutGrid);
|
||||
}
|
||||
});
|
||||
|
||||
document.streamAllSubNodes().filter(node -> (node.getEngines().contains(LayoutEngine.AI) && visualParsingGrid ) || (node.getEngines().contains(LayoutEngine.ALGORITHM) && !visualParsingGrid)).forEach(semanticNode -> {
|
||||
Color color = switch (semanticNode.getType()) {
|
||||
case PARAGRAPH -> PARAGRAPH_COLOR;
|
||||
case TABLE -> TABLE_COLOR;
|
||||
case SECTION -> SECTION_COLOR;
|
||||
case HEADLINE -> HEADLINE_COLOR;
|
||||
case HEADER, FOOTER -> HEADER_COLOR;
|
||||
case IMAGE -> IMAGE_COLOR;
|
||||
default -> null;
|
||||
};
|
||||
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
|
||||
addAsRectangle(semanticNode, layoutGrid, color);
|
||||
}
|
||||
if (semanticNode.getType().equals(NodeType.SECTION)) {
|
||||
addSection(semanticNode, layoutGrid, color);
|
||||
}
|
||||
if (semanticNode.getType().equals(NodeType.TABLE)) {
|
||||
Table table = (Table) semanticNode;
|
||||
addInnerTableLines(table, layoutGrid);
|
||||
}
|
||||
});
|
||||
return layoutGrid;
|
||||
}
|
||||
|
||||
|
||||
private void addInnerTableLines(Table table, LayoutGrid layoutGrid) {
|
||||
|
||||
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {
|
||||
|
||||
@ -12,6 +12,8 @@ public class ContentStreams {
|
||||
|
||||
public static Identifier KNECON_LAYOUT = new Identifier("Layout grid", COSName.getPDFName("KNECON_LAYOUT"), true);
|
||||
|
||||
public static Identifier KNECON_VISUAL_PARSING = new Identifier("Layout grid - visual", COSName.getPDFName("KNECON_VISUAL_PARSING"), true);
|
||||
|
||||
public static Identifier KNECON_OCR = new Identifier("OCR", COSName.getPDFName("KNECON_OCR"), false);
|
||||
|
||||
public static Identifier KNECON_OCR_TEXT_DEBUG = new Identifier("OCR Text", COSName.getPDFName("KNECON_OCR_TEXT_DEBUG"), true);
|
||||
@ -24,7 +26,7 @@ public class ContentStreams {
|
||||
|
||||
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
||||
|
||||
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
|
||||
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_VISUAL_PARSING,KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
|
||||
|
||||
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user