RED-8481: Use visual layout parsing to detect signatures
fixed some nullpointer errors
This commit is contained in:
parent
c324d3815e
commit
a6ba501fa8
@ -42,7 +42,7 @@ public class VisualLayoutParsingAdapter {
|
|||||||
if (visualLayoutParsingResponse.getData() != null) {
|
if (visualLayoutParsingResponse.getData() != null) {
|
||||||
visualLayoutParsingResponse.getData()
|
visualLayoutParsingResponse.getData()
|
||||||
.forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>())
|
.forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>())
|
||||||
.addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
.addAll(convertSignatures(tableData.getPage_idx()+1, tableData.getBoxes())));
|
||||||
}
|
}
|
||||||
return signatures;
|
return signatures;
|
||||||
|
|
||||||
|
|||||||
@ -2,16 +2,20 @@ package com.knecon.fforesight.service.layoutparser.processor.services.mapper;
|
|||||||
|
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image;
|
||||||
@ -29,20 +33,27 @@ public class DocumentDataMapper {
|
|||||||
public DocumentData toDocumentData(Document document) {
|
public DocumentData toDocumentData(Document document) {
|
||||||
|
|
||||||
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
|
List<DocumentTextData> documentTextData = document.streamTerminalTextBlocksInOrder()
|
||||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||||
|
.stream())
|
||||||
.distinct()
|
.distinct()
|
||||||
.map(DocumentDataMapper::toAtomicTextBlockData)
|
.map(DocumentDataMapper::toAtomicTextBlockData)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
List<DocumentPositionData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
||||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
.flatMap(textBlock -> textBlock.getAtomicTextBlocks()
|
||||||
|
.stream())
|
||||||
.distinct()
|
.distinct()
|
||||||
.map(DocumentDataMapper::toAtomicPositionBlockData)
|
.map(DocumentDataMapper::toAtomicPositionBlockData)
|
||||||
.toList();
|
.toList();
|
||||||
|
|
||||||
Set<Long> nonEmptyTextBlocks = documentTextData.stream().mapToLong(DocumentTextData::getId).boxed().collect(Collectors.toSet());
|
Set<Long> nonEmptyTextBlocks = documentTextData.stream()
|
||||||
|
.mapToLong(DocumentTextData::getId).boxed()
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
|
List<DocumentPage> documentPageData = document.getPages()
|
||||||
|
.stream()
|
||||||
|
.map(DocumentDataMapper::toPageData)
|
||||||
|
.toList();
|
||||||
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||||
return DocumentData.builder()
|
return DocumentData.builder()
|
||||||
.documentTextData(documentTextData.toArray(new DocumentTextData[0]))
|
.documentTextData(documentTextData.toArray(new DocumentTextData[0]))
|
||||||
@ -76,21 +87,35 @@ public class DocumentDataMapper {
|
|||||||
default -> new HashMap<>();
|
default -> new HashMap<>();
|
||||||
};
|
};
|
||||||
|
|
||||||
return DocumentStructure.EntryData.builder()
|
DocumentStructure.EntryData.EntryDataBuilder documentBuilder = DocumentStructure.EntryData.builder()
|
||||||
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
||||||
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
.children(entry.getChildren()
|
||||||
|
.stream()
|
||||||
|
.map(DocumentDataMapper::toEntryData)
|
||||||
|
.toList())
|
||||||
.type(entry.getType())
|
.type(entry.getType())
|
||||||
.engines(entry.getNode().getEngines())
|
|
||||||
.atomicBlockIds(atomicTextBlocks)
|
.atomicBlockIds(atomicTextBlocks)
|
||||||
.pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new))
|
.pageNumbers(entry.getNode().getPages()
|
||||||
.properties(properties)
|
.stream()
|
||||||
.build();
|
.map(Page::getNumber)
|
||||||
|
.map(Integer::longValue)
|
||||||
|
.toArray(Long[]::new))
|
||||||
|
.properties(properties);
|
||||||
|
if (entry.getNode() != null) {
|
||||||
|
documentBuilder.engines(entry.getNode().getEngines());
|
||||||
|
} else {
|
||||||
|
documentBuilder.engines(new HashSet<>(Set.of(LayoutEngine.ALGORITHM)));
|
||||||
|
}
|
||||||
|
return documentBuilder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Long[] toAtomicTextBlockIds(TextBlock textBlock) {
|
private Long[] toAtomicTextBlockIds(TextBlock textBlock) {
|
||||||
|
|
||||||
return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new);
|
return textBlock.getAtomicTextBlocks()
|
||||||
|
.stream()
|
||||||
|
.map(AtomicTextBlock::getId)
|
||||||
|
.toArray(Long[]::new);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -142,7 +167,9 @@ public class DocumentDataMapper {
|
|||||||
|
|
||||||
private int[] toPrimitiveIntArray(List<Integer> list) {
|
private int[] toPrimitiveIntArray(List<Integer> list) {
|
||||||
|
|
||||||
return list.stream().mapToInt(Integer::intValue).toArray();
|
return list.stream()
|
||||||
|
.mapToInt(Integer::intValue)
|
||||||
|
.toArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user