Merge branch 'RED-8481-hotfix' into 'main'
RED-8481: Use visual layout parsing to detect signatures See merge request fforesight/layout-parser!106
This commit is contained in:
commit
f146beeb44
@ -42,7 +42,7 @@ public class VisualLayoutParsingAdapter {
|
||||
if (visualLayoutParsingResponse.getData() != null) {
|
||||
visualLayoutParsingResponse.getData()
|
||||
.forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>())
|
||||
.addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||
.addAll(convertSignatures(tableData.getPage_idx()+1, tableData.getBoxes())));
|
||||
}
|
||||
return signatures;
|
||||
|
||||
|
||||
@ -2,16 +2,20 @@ package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
||||
@ -29,20 +33,27 @@ public class DocumentDataMapper {
|
||||
public DocumentData toDocumentData(Document document) {
|
||||
|
||||
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||
.stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicTextBlockData)
|
||||
.toList();
|
||||
|
||||
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||
.stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicPositionBlockData)
|
||||
.toList();
|
||||
|
||||
Set<Long> nonEmptyTextBlocks = documentTextData.stream().mapToLong(DocumentTextData::getId).boxed().collect(Collectors.toSet());
|
||||
Set<Long> nonEmptyTextBlocks = documentTextData.stream()
|
||||
.mapToLong(DocumentTextData::getId).boxed()
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
|
||||
List<DocumentPage> documentPageData = document.getPages()
|
||||
.stream()
|
||||
.map(DocumentDataMapper::toPageData)
|
||||
.toList();
|
||||
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||
return DocumentData.builder()
|
||||
.documentTextData(documentTextData.toArray(new DocumentTextData[0]))
|
||||
@ -76,21 +87,35 @@ public class DocumentDataMapper {
|
||||
default -> new HashMap<>();
|
||||
};
|
||||
|
||||
return DocumentStructure.EntryData.builder()
|
||||
DocumentStructure.EntryData.EntryDataBuilder documentBuilder = DocumentStructure.EntryData.builder()
|
||||
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
||||
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
||||
.children(entry.getChildren()
|
||||
.stream()
|
||||
.map(DocumentDataMapper::toEntryData)
|
||||
.toList())
|
||||
.type(entry.getType())
|
||||
.engines(entry.getNode().getEngines())
|
||||
.atomicBlockIds(atomicTextBlocks)
|
||||
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
|
||||
.properties(properties)
|
||||
.build();
|
||||
.pageNumbers(entry.getNode().getPages()
|
||||
.stream()
|
||||
.map(Page::getNumber)
|
||||
.map(Integer::longValue)
|
||||
.toArray(Long[]::new))
|
||||
.properties(properties);
|
||||
if (entry.getNode() != null) {
|
||||
documentBuilder.engines(entry.getNode().getEngines());
|
||||
} else {
|
||||
documentBuilder.engines(new HashSet<>(Set.of(LayoutEngine.ALGORITHM)));
|
||||
}
|
||||
return documentBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
private Long[] toAtomicTextBlockIds(TextBlock textBlock) {
|
||||
|
||||
return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
|
||||
return textBlock.getAtomicTextBlocks()
|
||||
.stream()
|
||||
.map(AtomicTextBlock::getId)
|
||||
.toArray(Long[]::new);
|
||||
}
|
||||
|
||||
|
||||
@ -142,7 +167,9 @@ public class DocumentDataMapper {
|
||||
|
||||
private int[] toPrimitiveIntArray(List<Integer> list) {
|
||||
|
||||
return list.stream().mapToInt(Integer::intValue).toArray();
|
||||
return list.stream()
|
||||
.mapToInt(Integer::intValue)
|
||||
.toArray();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user