RED-8481: Use visual layout parsing to detect signatures
added a new layer for visual parsing results codestyle
This commit is contained in:
parent
71477dabde
commit
2c171b6a9e
@ -1,5 +1,6 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||||
|
|
||||||
public enum LayoutEngine {
|
public enum LayoutEngine {
|
||||||
ALGORITHM, AI
|
ALGORITHM,
|
||||||
|
AI
|
||||||
}
|
}
|
||||||
|
|||||||
@ -101,8 +101,10 @@ public class LayoutParsingPipeline {
|
|||||||
.orElse(originFile);
|
.orElse(originFile);
|
||||||
|
|
||||||
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
||||||
if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
|
if (layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
|
.isPresent()) {
|
||||||
|
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
|
.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||||
@ -124,7 +126,7 @@ public class LayoutParsingPipeline {
|
|||||||
imageServiceResponse,
|
imageServiceResponse,
|
||||||
tableServiceResponse,
|
tableServiceResponse,
|
||||||
visualLayoutParsingResponse,
|
visualLayoutParsingResponse,
|
||||||
layoutParsingRequest.identifier().toString());
|
layoutParsingRequest.identifier().toString());
|
||||||
|
|
||||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
@ -134,7 +136,7 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
|
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false);
|
||||||
|
|
||||||
layoutGridService.addLayoutGrid(viewerDocumentFile,documentGraph,viewerDocumentFile,false,true);
|
layoutGridService.addLayoutGrid(viewerDocumentFile, documentGraph, viewerDocumentFile, false, true);
|
||||||
|
|
||||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
@ -281,11 +283,10 @@ public class LayoutParsingPipeline {
|
|||||||
imageServiceResponseAdapter.findOcr(classificationPage);
|
imageServiceResponseAdapter.findOcr(classificationPage);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(signatures.containsKey(pageNumber)) {
|
if (signatures.containsKey(pageNumber)) {
|
||||||
if(classificationPage.getImages() == null ||classificationPage.getImages().size() == 0) {
|
if (classificationPage.getImages() == null || classificationPage.getImages().size() == 0) {
|
||||||
classificationPage.setImages(signatures.get(pageNumber));
|
classificationPage.setImages(signatures.get(pageNumber));
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
classificationPage.getImages().addAll(signatures.get(pageNumber));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -60,13 +60,15 @@ public class Document implements GenericSemanticNode {
|
|||||||
|
|
||||||
public List<Section> getMainSections() {
|
public List<Section> getMainSections() {
|
||||||
|
|
||||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node).collect(Collectors.toList());
|
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||||
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
|
public Stream<TextBlock> streamTerminalTextBlocksInOrder() {
|
||||||
|
|
||||||
return streamAllNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock);
|
return streamAllNodes().filter(SemanticNode::isLeaf)
|
||||||
|
.map(SemanticNode::getLeafTextBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -87,13 +89,16 @@ public class Document implements GenericSemanticNode {
|
|||||||
@Override
|
@Override
|
||||||
public Headline getHeadline() {
|
public Headline getHeadline() {
|
||||||
|
|
||||||
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node).findFirst().orElse(Headline.builder().build());
|
return streamAllSubNodesOfType(NodeType.HEADLINE).map(node -> (Headline) node)
|
||||||
|
.findFirst()
|
||||||
|
.orElse(Headline.builder().build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Stream<SemanticNode> streamAllNodes() {
|
private Stream<SemanticNode> streamAllNodes() {
|
||||||
|
|
||||||
return documentTree.allEntriesInOrder().map(DocumentTree.Entry::getNode);
|
return documentTree.allEntriesInOrder()
|
||||||
|
.map(DocumentTree.Entry::getNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -70,7 +70,9 @@ public class Image implements GenericSemanticNode {
|
|||||||
@Override
|
@Override
|
||||||
public TextBlock getTextBlock() {
|
public TextBlock getTextBlock() {
|
||||||
|
|
||||||
return streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
return streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||||
|
.map(SemanticNode::getLeafTextBlock)
|
||||||
|
.collect(new TextBlockCollector());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -53,7 +53,8 @@ public class Section implements GenericSemanticNode {
|
|||||||
|
|
||||||
public boolean hasTables() {
|
public boolean hasTables() {
|
||||||
|
|
||||||
return streamAllSubNodesOfType(NodeType.TABLE).findAny().isPresent();
|
return streamAllSubNodesOfType(NodeType.TABLE).findAny()
|
||||||
|
.isPresent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -61,7 +62,9 @@ public class Section implements GenericSemanticNode {
|
|||||||
public TextBlock getTextBlock() {
|
public TextBlock getTextBlock() {
|
||||||
|
|
||||||
if (textBlock == null) {
|
if (textBlock == null) {
|
||||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||||
|
.map(SemanticNode::getLeafTextBlock)
|
||||||
|
.collect(new TextBlockCollector());
|
||||||
}
|
}
|
||||||
return textBlock;
|
return textBlock;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,8 +22,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.textbloc
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||||
|
|
||||||
import ch.qos.logback.core.Layout;
|
|
||||||
|
|
||||||
public interface SemanticNode {
|
public interface SemanticNode {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -337,12 +335,24 @@ public interface SemanticNode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns the set of layoutengines
|
||||||
|
*
|
||||||
|
* @return set of layoutengines
|
||||||
|
*/
|
||||||
Set<LayoutEngine> getEngines();
|
Set<LayoutEngine> getEngines();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* adds a layoutengine to the set
|
||||||
|
*/
|
||||||
default void addEngine(LayoutEngine engine) {
|
default void addEngine(LayoutEngine engine) {
|
||||||
|
|
||||||
getEngines().add(engine);
|
getEngines().add(engine);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Streams all children located directly underneath this node in the DocumentTree.
|
* Streams all children located directly underneath this node in the DocumentTree.
|
||||||
*
|
*
|
||||||
@ -434,6 +444,7 @@ public interface SemanticNode {
|
|||||||
/**
|
/**
|
||||||
* TODO: this produces unwanted results for sections spanning multiple columns.
|
* TODO: this produces unwanted results for sections spanning multiple columns.
|
||||||
* Computes the Union of the bounding boxes of all children recursively.
|
* Computes the Union of the bounding boxes of all children recursively.
|
||||||
|
*
|
||||||
* @return The union of the BoundingBoxes of all children
|
* @return The union of the BoundingBoxes of all children
|
||||||
*/
|
*/
|
||||||
private Map<Page, Rectangle2D> getBBoxFromChildren() {
|
private Map<Page, Rectangle2D> getBBoxFromChildren() {
|
||||||
|
|||||||
@ -48,6 +48,7 @@ public class Table implements SemanticNode {
|
|||||||
@EqualsAndHashCode.Exclude
|
@EqualsAndHashCode.Exclude
|
||||||
Map<Page, Rectangle2D> bBoxCache;
|
Map<Page, Rectangle2D> bBoxCache;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Streams all entities in this table, that appear in a row, which contains any of the provided strings.
|
* Streams all entities in this table, that appear in a row, which contains any of the provided strings.
|
||||||
*
|
*
|
||||||
@ -56,8 +57,7 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
|
public Stream<RedactionEntity> streamEntitiesWhereRowContainsStringsIgnoreCase(List<String> strings) {
|
||||||
|
|
||||||
return IntStream.range(0, numberOfRows)
|
return IntStream.range(0, numberOfRows).boxed()
|
||||||
.boxed()
|
|
||||||
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
|
.filter(row -> rowContainsStringsIgnoreCase(row, strings))
|
||||||
.flatMap(this::streamRow)
|
.flatMap(this::streamRow)
|
||||||
.map(TableCell::getEntities)
|
.map(TableCell::getEntities)
|
||||||
@ -74,8 +74,11 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
|
public boolean rowContainsStringsIgnoreCase(Integer row, List<String> strings) {
|
||||||
|
|
||||||
String rowText = streamRow(row).map(TableCell::getTextBlock).collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
|
String rowText = streamRow(row).map(TableCell::getTextBlock)
|
||||||
return strings.stream().map(String::toLowerCase).allMatch(rowText::contains);
|
.collect(new TextBlockCollector()).getSearchText().toLowerCase(Locale.ROOT);
|
||||||
|
return strings.stream()
|
||||||
|
.map(String::toLowerCase)
|
||||||
|
.allMatch(rowText::contains);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -88,9 +91,13 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
|
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndValue(String header, String value) {
|
||||||
|
|
||||||
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
|
List<Integer> vertebrateStudyCols = streamHeaders().filter(headerNode -> headerNode.containsString(header))
|
||||||
|
.map(TableCell::getCol)
|
||||||
|
.toList();
|
||||||
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
return streamTableCells().filter(tableCellNode -> vertebrateStudyCols.stream()
|
||||||
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value))).map(TableCell::getEntities).flatMap(Collection::stream);
|
.anyMatch(vertebrateStudyCol -> getCell(tableCellNode.getRow(), vertebrateStudyCol).containsString(value)))
|
||||||
|
.map(TableCell::getEntities)
|
||||||
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -103,9 +110,13 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
|
public Stream<RedactionEntity> streamEntitiesWhereRowHasHeaderAndAnyValue(String header, List<String> values) {
|
||||||
|
|
||||||
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header)).map(TableCell::getCol).toList();
|
List<Integer> colsWithHeader = streamHeaders().filter(headerNode -> headerNode.containsString(header))
|
||||||
|
.map(TableCell::getCol)
|
||||||
|
.toList();
|
||||||
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
return streamTableCells().filter(tableCellNode -> colsWithHeader.stream()
|
||||||
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values))).map(TableCell::getEntities).flatMap(Collection::stream);
|
.anyMatch(colWithHeader -> getCell(tableCellNode.getRow(), colWithHeader).containsAnyString(values)))
|
||||||
|
.map(TableCell::getEntities)
|
||||||
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -117,12 +128,15 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
|
public Stream<RedactionEntity> streamEntitiesWhereRowContainsEntitiesOfType(List<String> types) {
|
||||||
|
|
||||||
List<Integer> rowsWithEntityOfType = IntStream.range(0, numberOfRows)
|
List<Integer> rowsWithEntityOfType = IntStream.range(0, numberOfRows).boxed()
|
||||||
.boxed()
|
.filter(rowNumber -> streamEntityTypesInRow(rowNumber).anyMatch(existingType -> types.stream()
|
||||||
.filter(rowNumber -> streamEntityTypesInRow(rowNumber).anyMatch(existingType -> types.stream().anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
|
.anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
return rowsWithEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream);
|
return rowsWithEntityOfType.stream()
|
||||||
|
.flatMap(this::streamRow)
|
||||||
|
.map(TableCell::getEntities)
|
||||||
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -134,18 +148,24 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
|
public Stream<RedactionEntity> streamEntitiesWhereRowContainsNoEntitiesOfType(List<String> types) {
|
||||||
|
|
||||||
List<Integer> rowsWithNoEntityOfType = IntStream.range(0, numberOfRows)
|
List<Integer> rowsWithNoEntityOfType = IntStream.range(0, numberOfRows).boxed()
|
||||||
.boxed()
|
.filter(rowNumber -> streamEntityTypesInRow(rowNumber).noneMatch(existingType -> types.stream()
|
||||||
.filter(rowNumber -> streamEntityTypesInRow(rowNumber).noneMatch(existingType -> types.stream().anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
|
.anyMatch(typeToCheck -> typeToCheck.equals(existingType))))
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
return rowsWithNoEntityOfType.stream().flatMap(this::streamRow).map(TableCell::getEntities).flatMap(Collection::stream);
|
return rowsWithNoEntityOfType.stream()
|
||||||
|
.flatMap(this::streamRow)
|
||||||
|
.map(TableCell::getEntities)
|
||||||
|
.flatMap(Collection::stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Stream<String> streamEntityTypesInRow(Integer rowNumber) {
|
private Stream<String> streamEntityTypesInRow(Integer rowNumber) {
|
||||||
|
|
||||||
return streamRow(rowNumber).map(TableCell::getEntities).flatMap(Collection::stream).map(RedactionEntity::getType).distinct();
|
return streamRow(rowNumber).map(TableCell::getEntities)
|
||||||
|
.flatMap(Collection::stream)
|
||||||
|
.map(RedactionEntity::getType)
|
||||||
|
.distinct();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -162,7 +182,8 @@ public class Table implements SemanticNode {
|
|||||||
throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols));
|
throw new IllegalArgumentException(format("row %d, col %d is out of bounds for number of rows of %d and number of cols %d", row, col, numberOfRows, numberOfCols));
|
||||||
}
|
}
|
||||||
int idx = row * numberOfCols + col;
|
int idx = row * numberOfCols + col;
|
||||||
return (TableCell) documentTree.getEntryById(treeId).getChildren().get(idx).getNode();
|
return (TableCell) documentTree.getEntryById(treeId).getChildren()
|
||||||
|
.get(idx).getNode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -199,7 +220,8 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<TableCell> streamCol(int col) {
|
public Stream<TableCell> streamCol(int col) {
|
||||||
|
|
||||||
return IntStream.range(0, numberOfRows).boxed().map(row -> getCell(row, col));
|
return IntStream.range(0, numberOfRows).boxed()
|
||||||
|
.map(row -> getCell(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -211,9 +233,11 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<TableCell> streamRow(int row) {
|
public Stream<TableCell> streamRow(int row) {
|
||||||
|
|
||||||
return IntStream.range(0, numberOfCols).boxed().map(col -> getCell(row, col));
|
return IntStream.range(0, numberOfCols).boxed()
|
||||||
|
.map(col -> getCell(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Streams all TableCells row-wise and filters them with header == true.
|
* Streams all TableCells row-wise and filters them with header == true.
|
||||||
*
|
*
|
||||||
@ -234,7 +258,8 @@ public class Table implements SemanticNode {
|
|||||||
*/
|
*/
|
||||||
public Stream<TableCell> streamHeadersForCell(int row, int col) {
|
public Stream<TableCell> streamHeadersForCell(int row, int col) {
|
||||||
|
|
||||||
return Stream.concat(streamRow(row), streamCol(col)).filter(TableCell::isHeader);
|
return Stream.concat(streamRow(row), streamCol(col))
|
||||||
|
.filter(TableCell::isHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -307,7 +332,9 @@ public class Table implements SemanticNode {
|
|||||||
public TextBlock getTextBlock() {
|
public TextBlock getTextBlock() {
|
||||||
|
|
||||||
if (textBlock == null) {
|
if (textBlock == null) {
|
||||||
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf).map(SemanticNode::getLeafTextBlock).collect(new TextBlockCollector());
|
textBlock = streamAllSubNodes().filter(SemanticNode::isLeaf)
|
||||||
|
.map(SemanticNode::getLeafTextBlock)
|
||||||
|
.collect(new TextBlockCollector());
|
||||||
}
|
}
|
||||||
return textBlock;
|
return textBlock;
|
||||||
}
|
}
|
||||||
@ -318,6 +345,8 @@ public class Table implements SemanticNode {
|
|||||||
|
|
||||||
return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
|
return treeId.toString() + ": " + NodeType.TABLE + ": #cols: " + numberOfCols + ", #rows: " + numberOfRows + ", " + this.getTextBlock().buildSummary();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<Page, Rectangle2D> getBBox() {
|
public Map<Page, Rectangle2D> getBBox() {
|
||||||
|
|
||||||
@ -326,4 +355,5 @@ public class Table implements SemanticNode {
|
|||||||
}
|
}
|
||||||
return bBoxCache;
|
return bBoxCache;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,6 +24,7 @@ public class VisualLayoutParsingAdapter {
|
|||||||
|
|
||||||
private static String SIGNATURES = "signature";
|
private static String SIGNATURES = "signature";
|
||||||
|
|
||||||
|
|
||||||
public Map<Integer, List<VisualLayoutParsingResult>> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
public Map<Integer, List<VisualLayoutParsingResult>> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||||
|
|
||||||
Map<Integer, List<VisualLayoutParsingResult>> tableCells = new HashMap<>();
|
Map<Integer, List<VisualLayoutParsingResult>> tableCells = new HashMap<>();
|
||||||
@ -34,10 +35,11 @@ public class VisualLayoutParsingAdapter {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||||
|
|
||||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||||
if(visualLayoutParsingResponse.getData() != null) {
|
if (visualLayoutParsingResponse.getData() != null) {
|
||||||
visualLayoutParsingResponse.getData()
|
visualLayoutParsingResponse.getData()
|
||||||
.forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>())
|
.forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>())
|
||||||
.addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
.addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||||
@ -67,14 +69,17 @@ public class VisualLayoutParsingAdapter {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<ClassifiedImage> convertSignatures(int pageNumber, List<VisualLayoutParsingBox> tableObjects) {
|
public List<ClassifiedImage> convertSignatures(int pageNumber, List<VisualLayoutParsingBox> tableObjects) {
|
||||||
|
|
||||||
List<ClassifiedImage> signatures = new ArrayList<>();
|
List<ClassifiedImage> signatures = new ArrayList<>();
|
||||||
|
|
||||||
tableObjects.stream().forEach(t -> {
|
tableObjects.stream().forEach(t -> {
|
||||||
if(t.getLabel().equals(SIGNATURES)) {
|
if (t.getLabel().equals(SIGNATURES)) {
|
||||||
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
|
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),
|
||||||
ImageType.SIGNATURE,true,false,false,pageNumber);
|
t.getBox().getY1(),
|
||||||
|
t.getBox().getX2() - t.getBox().getX1(),
|
||||||
|
t.getBox().getY2() - t.getBox().getY1()), ImageType.SIGNATURE, true, false, false, pageNumber);
|
||||||
|
|
||||||
signatures.add(signature);
|
signatures.add(signature);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,8 +20,6 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
|
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Footer;
|
||||||
@ -33,6 +31,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Pa
|
|||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Paragraph;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.AtomicTextBlock;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.IdBuilder;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.TextPositionOperations;
|
||||||
|
|
||||||
@ -104,7 +104,7 @@ public class DocumentGraphFactory {
|
|||||||
.transparent(image.isHasTransparency())
|
.transparent(image.isHasTransparency())
|
||||||
.page(page)
|
.page(page)
|
||||||
.documentTree(context.getDocumentTree());
|
.documentTree(context.getDocumentTree());
|
||||||
if(image.isSourceByAi()) {
|
if (image.isSourceByAi()) {
|
||||||
imageBuilder.engines(new HashSet<>(Set.of(LayoutEngine.AI)));
|
imageBuilder.engines(new HashSet<>(Set.of(LayoutEngine.AI)));
|
||||||
}
|
}
|
||||||
Image imageNode = imageBuilder.build();
|
Image imageNode = imageBuilder.build();
|
||||||
|
|||||||
@ -26,7 +26,14 @@ public class ContentStreams {
|
|||||||
|
|
||||||
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
public static Identifier ESCAPE_END = new Identifier("escape start", COSName.getPDFName("ESCAPE_END"), false);
|
||||||
|
|
||||||
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT, KNECON_VISUAL_PARSING,KNECON_OCR, KNECON_OCR_BBOX_DEBUG, KNECON_OCR_TEXT_DEBUG, OTHER, ESCAPE_START, ESCAPE_END);
|
public static List<Identifier> allContentStreams = List.of(KNECON_LAYOUT,
|
||||||
|
KNECON_VISUAL_PARSING,
|
||||||
|
KNECON_OCR,
|
||||||
|
KNECON_OCR_BBOX_DEBUG,
|
||||||
|
KNECON_OCR_TEXT_DEBUG,
|
||||||
|
OTHER,
|
||||||
|
ESCAPE_START,
|
||||||
|
ESCAPE_END);
|
||||||
|
|
||||||
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
public record Identifier(String name, COSName cosName, boolean optionalContent) {
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user