RED-8481: Use visual layout parsing to detect signatures

added a new layer for visual parsing results

added a source label to image properties to enable rules
This commit is contained in:
yhampe 2024-02-23 12:20:11 +01:00
parent f4b6386e1c
commit a1521877d7
20 changed files with 108 additions and 40 deletions

View File

@ -3,8 +3,10 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.awt.geom.Rectangle2D;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import io.swagger.v3.oas.annotations.media.Schema;
@ -43,7 +45,6 @@ public class DocumentStructure implements Serializable {
public static final String ID = "id";
}
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
public static class TableCellProperties implements Serializable {
@ -115,6 +116,8 @@ public class DocumentStructure implements Serializable {
Map<String, String> properties;
@Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]")
List<EntryData> children;
@Schema(description = "Describes the origin of the semantic node",example = "[ALGORITHM]")
Set<LayoutEngine> engines;
@Override

View File

@ -0,0 +1,5 @@
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
public enum LayoutEngine {
ALGORITHM, AI
}

View File

@ -134,6 +134,8 @@ public class LayoutParsingPipeline {
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
layoutGridService.addLayoutGrid(viewerDocumentFile,documentGraph,viewerDocumentFile,false,true);
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
@ -219,11 +221,7 @@ public class LayoutParsingPipeline {
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
if(signatures.size() > 0) {
visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
}
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
ClassificationDocument classificationDocument = new ClassificationDocument();
List<ClassificationPage> classificationPages = new ArrayList<>();
@ -284,7 +282,12 @@ public class LayoutParsingPipeline {
}
if(signatures.containsKey(pageNumber)) {
classificationPage.setImages(signatures.get(pageNumber));
if(classificationPage.getImages() == null ||classificationPage.getImages().size() == 0) {
classificationPage.setImages(signatures.get(pageNumber));
}
else {
classificationPage.getImages().addAll(signatures.get(pageNumber));
}
}
tableExtractionService.extractTables(cleanRulings, classificationPage);

View File

@ -10,6 +10,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -30,6 +31,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Document implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
Set<Page> pages;
DocumentTree documentTree;
Integer numberOfPages;

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -26,6 +27,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Footer implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Header implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Headline implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -8,6 +8,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -29,6 +30,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Image implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
String id;

View File

@ -6,6 +6,8 @@ public enum ImageType {
LOGO,
FORMULA,
SIGNATURE,
SIGNATURE_VISUAL,
OTHER,
OCR;

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -24,6 +25,9 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Paragraph implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
TextBlock leafTextBlock;

View File

@ -6,6 +6,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -27,6 +28,8 @@ import lombok.extern.slf4j.Slf4j;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Section implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
TextBlock textBlock;

View File

@ -12,6 +12,7 @@ import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
@ -21,6 +22,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import ch.qos.logback.core.Layout;
public interface SemanticNode {
/**
@ -334,6 +337,11 @@ public interface SemanticNode {
}
}
Set<LayoutEngine> getEngines();
default void addEngine(LayoutEngine engine) {
getEngines().add(engine);
}
/**
* Streams all children located directly underneath this node in the DocumentTree.

View File

@ -12,6 +12,7 @@ import java.util.Set;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -31,6 +32,8 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class Table implements SemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
DocumentTree documentTree;

View File

@ -7,6 +7,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
@FieldDefaults(level = AccessLevel.PRIVATE)
public class TableCell implements GenericSemanticNode {
@Builder.Default
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
List<Integer> treeId;
int row;
int col;

View File

@ -4,18 +4,21 @@ import java.awt.geom.Rectangle2D;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
@Data
@RequiredArgsConstructor
@AllArgsConstructor
public class ClassifiedImage {
@NonNull
private Rectangle2D position;
@NonNull
private ImageType imageType;
private boolean sourceByAi;
private boolean isAppendedToSection;
@NonNull
private boolean hasTransparency;

View File

@ -37,7 +37,7 @@ public class VisualLayoutParsingAdapter {
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx()+1, tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
return signatures;
}
@ -70,7 +70,7 @@ public class VisualLayoutParsingAdapter {
tableObjects.stream().forEach(t -> {
if(t.getLabel().equals(SIGNATURES)) {
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
ImageType.SIGNATURE,false,pageNumber);
ImageType.SIGNATURE,true,false,false,pageNumber);
signatures.add(signature);
}

View File

@ -7,12 +7,14 @@ import static java.util.stream.Collectors.toList;
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
@ -95,14 +97,17 @@ public class DocumentGraphFactory {
Rectangle2D position = image.getPosition();
Page page = context.getPage(image.getPage());
Image imageNode = Image.builder()
var imageBuilder = Image.builder()
.id(IdBuilder.buildId(Set.of(page), List.of(position)))
.imageType(image.getImageType())
.position(position)
.transparent(image.isHasTransparency())
.page(page)
.documentTree(context.getDocumentTree())
.build();
.documentTree(context.getDocumentTree());
if(image.isSourceByAi()) {
imageBuilder.engines(new HashSet<>(Set.of(LayoutEngine.AI)));
}
Image imageNode = imageBuilder.build();
page.getMainBody().add(imageNode);
List<Integer> tocId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);

View File

@ -80,6 +80,7 @@ public class DocumentDataMapper {
.treeId(toPrimitiveIntArray(entry.getTreeId()))
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
.type(entry.getType())
.engines(entry.getNode().getEngines())
.atomicBlockIds(atomicTextBlocks)
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
.properties(properties)

View File

@ -17,6 +17,7 @@ import java.util.stream.Stream;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
@ -58,52 +59,56 @@ public class LayoutGridService {
static Color HEADER_COLOR = new Color(171, 131, 6);
static Color IMAGE_COLOR = new Color(253, 63, 146);
static Color IMAGE_VISUAL_COLOR = new Color(122, 0, 255);
@SneakyThrows
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue) {
this.addLayoutGrid(originFile,document,destinationFile,layerVisibilityDefaultValue,false);
}
LayoutGrid layoutGrid = createLayoutGrid(document);
@SneakyThrows
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue, boolean visualParsingGrid) {
LayoutGrid layoutGrid = createLayoutGrid(document, visualParsingGrid);
viewerDocumentService.addVisualizationsOnPage(originFile,
destinationFile,
Visualizations.builder()
.layer(ContentStreams.KNECON_LAYOUT)
.layer(visualParsingGrid ? ContentStreams.KNECON_VISUAL_PARSING : ContentStreams.KNECON_LAYOUT)
.visualizationsOnPages(layoutGrid.getVisualizationsPerPages())
.layerVisibilityDefaultValue(layerVisibilityDefaultValue)
.build());
}
private LayoutGrid createLayoutGrid(Document document) {
private LayoutGrid createLayoutGrid(Document document, boolean visualParsingGrid) {
LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages());
document.streamAllSubNodes().forEach(semanticNode -> {
Color color = switch (semanticNode.getType()) {
case PARAGRAPH -> PARAGRAPH_COLOR;
case TABLE -> TABLE_COLOR;
case SECTION -> SECTION_COLOR;
case HEADLINE -> HEADLINE_COLOR;
case HEADER, FOOTER -> HEADER_COLOR;
case IMAGE -> IMAGE_COLOR;
default -> null;
};
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
addAsRectangle(semanticNode, layoutGrid, color);
}
if (semanticNode.getType().equals(NodeType.SECTION)) {
addSection(semanticNode, layoutGrid, color);
}
if (semanticNode.getType().equals(NodeType.TABLE)) {
Table table = (Table) semanticNode;
addInnerTableLines(table, layoutGrid);
}
});
document.streamAllSubNodes().filter(node -> (node.getEngines().contains(LayoutEngine.AI) && visualParsingGrid ) || (node.getEngines().contains(LayoutEngine.ALGORITHM) && !visualParsingGrid)).forEach(semanticNode -> {
Color color = switch (semanticNode.getType()) {
case PARAGRAPH -> PARAGRAPH_COLOR;
case TABLE -> TABLE_COLOR;
case SECTION -> SECTION_COLOR;
case HEADLINE -> HEADLINE_COLOR;
case HEADER, FOOTER -> HEADER_COLOR;
case IMAGE -> IMAGE_COLOR;
default -> null;
};
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
addAsRectangle(semanticNode, layoutGrid, color);
}
if (semanticNode.getType().equals(NodeType.SECTION)) {
addSection(semanticNode, layoutGrid, color);
}
if (semanticNode.getType().equals(NodeType.TABLE)) {
Table table = (Table) semanticNode;
addInnerTableLines(table, layoutGrid);
}
});
return layoutGrid;
}
private void addInnerTableLines(Table table, LayoutGrid layoutGrid) {
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {

View File

@ -12,6 +12,8 @@ public class ContentStreams {
public static Identifier KNECON_LAYOUT = new Identifier("Layout grid", COSName.getPDFName("KNECON_LAYOUT"), true);
public static Identifier KNECON_VISUAL_PARSING = new Identifier("Layout grid - visual", COSName.getPDFName("KNECON_VISUAL_PARSING"), true);
public static Identifier KNECON_OCR = new Identifier("OCR", COSName.getPDFName("KNECON_OCR"), false);
public static Identifier KNECON_OCR_TEXT_DEBUG = new Identifier("OCR Text", COSName.getPDFName("KNECON_OCR_TEXT_DEBUG"), true);
@ -24,7 +26,7 @@ public class ContentStreams {
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_VISUAL_PARSING,KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
public record Identifier(String name, COSName cosName, boolean optionalContent) {