RED-8481: Use visual layout parsing to detect signatures
added a new layer for visual parsing results added a source label to image properties to enable rules
This commit is contained in:
parent
f4b6386e1c
commit
a1521877d7
@ -3,8 +3,10 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
|||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import io.swagger.v3.oas.annotations.media.Schema;
|
import io.swagger.v3.oas.annotations.media.Schema;
|
||||||
@ -43,7 +45,6 @@ public class DocumentStructure implements Serializable {
|
|||||||
public static final String ID = "id";
|
public static final String ID = "id";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
|
@Schema(description = "Object containing the extra field names, a table cell has in its properties field.")
|
||||||
public static class TableCellProperties implements Serializable {
|
public static class TableCellProperties implements Serializable {
|
||||||
|
|
||||||
@ -115,6 +116,8 @@ public class DocumentStructure implements Serializable {
|
|||||||
Map<String, String> properties;
|
Map<String, String> properties;
|
||||||
@Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]")
|
@Schema(description = "All child Entries of this Entry.", example = "[1, 2, 3]")
|
||||||
List<EntryData> children;
|
List<EntryData> children;
|
||||||
|
@Schema(description = "Describes the origin of the semantic node",example = "[ALGORITHM]")
|
||||||
|
Set<LayoutEngine> engines;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@ -0,0 +1,5 @@
|
|||||||
|
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||||
|
|
||||||
|
public enum LayoutEngine {
|
||||||
|
ALGORITHM, AI
|
||||||
|
}
|
||||||
@ -134,6 +134,8 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
|
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
|
||||||
|
|
||||||
|
layoutGridService.addLayoutGrid(viewerDocumentFile,documentGraph,viewerDocumentFile,false,true);
|
||||||
|
|
||||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
|
||||||
@ -219,11 +221,7 @@ public class LayoutParsingPipeline {
|
|||||||
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
||||||
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
||||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
Map<Integer, List<ClassifiedImage>> signatures = visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||||
if(signatures.size() > 0) {
|
|
||||||
visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
|
||||||
}
|
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||||
|
|
||||||
@ -284,7 +282,12 @@ public class LayoutParsingPipeline {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(signatures.containsKey(pageNumber)) {
|
if(signatures.containsKey(pageNumber)) {
|
||||||
classificationPage.setImages(signatures.get(pageNumber));
|
if(classificationPage.getImages() == null ||classificationPage.getImages().size() == 0) {
|
||||||
|
classificationPage.setImages(signatures.get(pageNumber));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -30,6 +31,9 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Document implements GenericSemanticNode {
|
public class Document implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
|
|
||||||
Set<Page> pages;
|
Set<Page> pages;
|
||||||
DocumentTree documentTree;
|
DocumentTree documentTree;
|
||||||
Integer numberOfPages;
|
Integer numberOfPages;
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -26,6 +27,9 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Footer implements GenericSemanticNode {
|
public class Footer implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
|
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
TextBlock leafTextBlock;
|
TextBlock leafTextBlock;
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Header implements GenericSemanticNode {
|
public class Header implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
TextBlock leafTextBlock;
|
TextBlock leafTextBlock;
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Headline implements GenericSemanticNode {
|
public class Headline implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
TextBlock leafTextBlock;
|
TextBlock leafTextBlock;
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -29,6 +30,9 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Image implements GenericSemanticNode {
|
public class Image implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
|
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
String id;
|
String id;
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,8 @@ public enum ImageType {
|
|||||||
LOGO,
|
LOGO,
|
||||||
FORMULA,
|
FORMULA,
|
||||||
SIGNATURE,
|
SIGNATURE,
|
||||||
|
|
||||||
|
SIGNATURE_VISUAL,
|
||||||
OTHER,
|
OTHER,
|
||||||
OCR;
|
OCR;
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -24,6 +25,9 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Paragraph implements GenericSemanticNode {
|
public class Paragraph implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
|
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
TextBlock leafTextBlock;
|
TextBlock leafTextBlock;
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -27,6 +28,8 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Section implements GenericSemanticNode {
|
public class Section implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
|
|
||||||
TextBlock textBlock;
|
TextBlock textBlock;
|
||||||
|
|||||||
@ -12,6 +12,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
@ -21,6 +22,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||||
|
|
||||||
|
import ch.qos.logback.core.Layout;
|
||||||
|
|
||||||
public interface SemanticNode {
|
public interface SemanticNode {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -334,6 +337,11 @@ public interface SemanticNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Set<LayoutEngine> getEngines();
|
||||||
|
|
||||||
|
default void addEngine(LayoutEngine engine) {
|
||||||
|
getEngines().add(engine);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Streams all children located directly underneath this node in the DocumentTree.
|
* Streams all children located directly underneath this node in the DocumentTree.
|
||||||
|
|||||||
@ -12,6 +12,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -31,6 +32,8 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class Table implements SemanticNode {
|
public class Table implements SemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
DocumentTree documentTree;
|
DocumentTree documentTree;
|
||||||
|
|
||||||
|
|||||||
@ -7,6 +7,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.entity.RedactionEntity;
|
||||||
@ -26,6 +27,8 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class TableCell implements GenericSemanticNode {
|
public class TableCell implements GenericSemanticNode {
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||||
List<Integer> treeId;
|
List<Integer> treeId;
|
||||||
int row;
|
int row;
|
||||||
int col;
|
int col;
|
||||||
|
|||||||
@ -4,18 +4,21 @@ import java.awt.geom.Rectangle2D;
|
|||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Data;
|
import lombok.Data;
|
||||||
import lombok.NonNull;
|
import lombok.NonNull;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
public class ClassifiedImage {
|
public class ClassifiedImage {
|
||||||
|
|
||||||
@NonNull
|
@NonNull
|
||||||
private Rectangle2D position;
|
private Rectangle2D position;
|
||||||
@NonNull
|
@NonNull
|
||||||
private ImageType imageType;
|
private ImageType imageType;
|
||||||
|
private boolean sourceByAi;
|
||||||
private boolean isAppendedToSection;
|
private boolean isAppendedToSection;
|
||||||
@NonNull
|
@NonNull
|
||||||
private boolean hasTransparency;
|
private boolean hasTransparency;
|
||||||
|
|||||||
@ -37,7 +37,7 @@ public class VisualLayoutParsingAdapter {
|
|||||||
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||||
|
|
||||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||||
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx()+1, tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||||
|
|
||||||
return signatures;
|
return signatures;
|
||||||
}
|
}
|
||||||
@ -70,7 +70,7 @@ public class VisualLayoutParsingAdapter {
|
|||||||
tableObjects.stream().forEach(t -> {
|
tableObjects.stream().forEach(t -> {
|
||||||
if(t.getLabel().equals(SIGNATURES)) {
|
if(t.getLabel().equals(SIGNATURES)) {
|
||||||
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
|
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
|
||||||
ImageType.SIGNATURE,false,pageNumber);
|
ImageType.SIGNATURE,true,false,false,pageNumber);
|
||||||
|
|
||||||
signatures.add(signature);
|
signatures.add(signature);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,12 +7,14 @@ import static java.util.stream.Collectors.toList;
|
|||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
||||||
@ -95,14 +97,17 @@ public class DocumentGraphFactory {
|
|||||||
|
|
||||||
Rectangle2D position = image.getPosition();
|
Rectangle2D position = image.getPosition();
|
||||||
Page page = context.getPage(image.getPage());
|
Page page = context.getPage(image.getPage());
|
||||||
Image imageNode = Image.builder()
|
var imageBuilder = Image.builder()
|
||||||
.id(IdBuilder.buildId(Set.of(page), List.of(position)))
|
.id(IdBuilder.buildId(Set.of(page), List.of(position)))
|
||||||
.imageType(image.getImageType())
|
.imageType(image.getImageType())
|
||||||
.position(position)
|
.position(position)
|
||||||
.transparent(image.isHasTransparency())
|
.transparent(image.isHasTransparency())
|
||||||
.page(page)
|
.page(page)
|
||||||
.documentTree(context.getDocumentTree())
|
.documentTree(context.getDocumentTree());
|
||||||
.build();
|
if(image.isSourceByAi()) {
|
||||||
|
imageBuilder.engines(new HashSet<>(Set.of(LayoutEngine.AI)));
|
||||||
|
}
|
||||||
|
Image imageNode = imageBuilder.build();
|
||||||
page.getMainBody().add(imageNode);
|
page.getMainBody().add(imageNode);
|
||||||
|
|
||||||
List<Integer> tocId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
List<Integer> tocId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
||||||
|
|||||||
@ -80,6 +80,7 @@ public class DocumentDataMapper {
|
|||||||
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
||||||
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
||||||
.type(entry.getType())
|
.type(entry.getType())
|
||||||
|
.engines(entry.getNode().getEngines())
|
||||||
.atomicBlockIds(atomicTextBlocks)
|
.atomicBlockIds(atomicTextBlocks)
|
||||||
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
|
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
|
||||||
.properties(properties)
|
.properties(properties)
|
||||||
|
|||||||
@ -17,6 +17,7 @@ import java.util.stream.Stream;
|
|||||||
|
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||||
@ -58,52 +59,56 @@ public class LayoutGridService {
|
|||||||
static Color HEADER_COLOR = new Color(171, 131, 6);
|
static Color HEADER_COLOR = new Color(171, 131, 6);
|
||||||
static Color IMAGE_COLOR = new Color(253, 63, 146);
|
static Color IMAGE_COLOR = new Color(253, 63, 146);
|
||||||
|
|
||||||
|
static Color IMAGE_VISUAL_COLOR = new Color(122, 0, 255);
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||||
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue) {
|
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue) {
|
||||||
|
this.addLayoutGrid(originFile,document,destinationFile,layerVisibilityDefaultValue,false);
|
||||||
|
}
|
||||||
|
|
||||||
LayoutGrid layoutGrid = createLayoutGrid(document);
|
@SneakyThrows
|
||||||
|
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||||
|
public void addLayoutGrid(File originFile, Document document, File destinationFile, boolean layerVisibilityDefaultValue, boolean visualParsingGrid) {
|
||||||
|
|
||||||
|
LayoutGrid layoutGrid = createLayoutGrid(document, visualParsingGrid);
|
||||||
|
|
||||||
viewerDocumentService.addVisualizationsOnPage(originFile,
|
viewerDocumentService.addVisualizationsOnPage(originFile,
|
||||||
destinationFile,
|
destinationFile,
|
||||||
Visualizations.builder()
|
Visualizations.builder()
|
||||||
.layer(ContentStreams.KNECON_LAYOUT)
|
.layer(visualParsingGrid ? ContentStreams.KNECON_VISUAL_PARSING : ContentStreams.KNECON_LAYOUT)
|
||||||
.visualizationsOnPages(layoutGrid.getVisualizationsPerPages())
|
.visualizationsOnPages(layoutGrid.getVisualizationsPerPages())
|
||||||
.layerVisibilityDefaultValue(layerVisibilityDefaultValue)
|
.layerVisibilityDefaultValue(layerVisibilityDefaultValue)
|
||||||
.build());
|
.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private LayoutGrid createLayoutGrid(Document document, boolean visualParsingGrid) {
|
||||||
private LayoutGrid createLayoutGrid(Document document) {
|
|
||||||
|
|
||||||
LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages());
|
LayoutGrid layoutGrid = new LayoutGrid(document.getNumberOfPages());
|
||||||
document.streamAllSubNodes().forEach(semanticNode -> {
|
document.streamAllSubNodes().filter(node -> (node.getEngines().contains(LayoutEngine.AI) && visualParsingGrid ) || (node.getEngines().contains(LayoutEngine.ALGORITHM) && !visualParsingGrid)).forEach(semanticNode -> {
|
||||||
Color color = switch (semanticNode.getType()) {
|
Color color = switch (semanticNode.getType()) {
|
||||||
case PARAGRAPH -> PARAGRAPH_COLOR;
|
case PARAGRAPH -> PARAGRAPH_COLOR;
|
||||||
case TABLE -> TABLE_COLOR;
|
case TABLE -> TABLE_COLOR;
|
||||||
case SECTION -> SECTION_COLOR;
|
case SECTION -> SECTION_COLOR;
|
||||||
case HEADLINE -> HEADLINE_COLOR;
|
case HEADLINE -> HEADLINE_COLOR;
|
||||||
case HEADER, FOOTER -> HEADER_COLOR;
|
case HEADER, FOOTER -> HEADER_COLOR;
|
||||||
case IMAGE -> IMAGE_COLOR;
|
case IMAGE -> IMAGE_COLOR;
|
||||||
default -> null;
|
default -> null;
|
||||||
};
|
};
|
||||||
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
|
if (isNotSectionOrTableCellOrDocument(semanticNode)) {
|
||||||
addAsRectangle(semanticNode, layoutGrid, color);
|
addAsRectangle(semanticNode, layoutGrid, color);
|
||||||
}
|
}
|
||||||
if (semanticNode.getType().equals(NodeType.SECTION)) {
|
if (semanticNode.getType().equals(NodeType.SECTION)) {
|
||||||
addSection(semanticNode, layoutGrid, color);
|
addSection(semanticNode, layoutGrid, color);
|
||||||
}
|
}
|
||||||
if (semanticNode.getType().equals(NodeType.TABLE)) {
|
if (semanticNode.getType().equals(NodeType.TABLE)) {
|
||||||
Table table = (Table) semanticNode;
|
Table table = (Table) semanticNode;
|
||||||
addInnerTableLines(table, layoutGrid);
|
addInnerTableLines(table, layoutGrid);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return layoutGrid;
|
return layoutGrid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addInnerTableLines(Table table, LayoutGrid layoutGrid) {
|
private void addInnerTableLines(Table table, LayoutGrid layoutGrid) {
|
||||||
|
|
||||||
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {
|
if (table.getNumberOfCols() < 1 || table.getNumberOfRows() < 1) {
|
||||||
|
|||||||
@ -12,6 +12,8 @@ public class ContentStreams {
|
|||||||
|
|
||||||
public static Identifier KNECON_LAYOUT = new Identifier("Layout grid", COSName.getPDFName("KNECON_LAYOUT"), true);
|
public static Identifier KNECON_LAYOUT = new Identifier("Layout grid", COSName.getPDFName("KNECON_LAYOUT"), true);
|
||||||
|
|
||||||
|
public static Identifier KNECON_VISUAL_PARSING = new Identifier("Layout grid - visual", COSName.getPDFName("KNECON_VISUAL_PARSING"), true);
|
||||||
|
|
||||||
public static Identifier KNECON_OCR = new Identifier("OCR", COSName.getPDFName("KNECON_OCR"), false);
|
public static Identifier KNECON_OCR = new Identifier("OCR", COSName.getPDFName("KNECON_OCR"), false);
|
||||||
|
|
||||||
public static Identifier KNECON_OCR_TEXT_DEBUG = new Identifier("OCR Text", COSName.getPDFName("KNECON_OCR_TEXT_DEBUG"), true);
|
public static Identifier KNECON_OCR_TEXT_DEBUG = new Identifier("OCR Text", COSName.getPDFName("KNECON_OCR_TEXT_DEBUG"), true);
|
||||||
@ -24,7 +26,7 @@ public class ContentStreams {
|
|||||||
|
|
||||||
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
||||||
|
|
||||||
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
|
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_VISUAL_PARSING,KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
|
||||||
|
|
||||||
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user