diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java index 6fe668b..9972310 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/VisualLayoutParsingAdapter.java @@ -42,7 +42,7 @@ public class VisualLayoutParsingAdapter { if (visualLayoutParsingResponse.getData() != null) { visualLayoutParsingResponse.getData() .forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx() + 1, tableCell -> new ArrayList<>()) - .addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes()))); + .addAll(convertSignatures(tableData.getPage_idx()+1, tableData.getBoxes()))); } return signatures; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java index 4f05c1b..ac42f15 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/mapper/DocumentDataMapper.java @@ -2,16 +2,20 @@ package com.knecon.fforesight.service.layoutparser.processor.services.mapper; import java.awt.geom.Rectangle2D; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import javax.xml.parsers.DocumentBuilder; + import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData; +import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine; import com.knecon.fforesight.service.layoutparser.processor.model.graph.DocumentTree; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Image; @@ -29,20 +33,27 @@ public class DocumentDataMapper { public DocumentData toDocumentData(Document document) { List documentTextData = document.streamTerminalTextBlocksInOrder() - .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream()) + .flatMap(textBlock -> textBlock.getAtomicTextBlocks() + .stream()) .distinct() .map(DocumentDataMapper::toAtomicTextBlockData) .toList(); List atomicPositionBlockData = document.streamTerminalTextBlocksInOrder() - .flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream()) + .flatMap(textBlock -> textBlock.getAtomicTextBlocks() + .stream()) .distinct() .map(DocumentDataMapper::toAtomicPositionBlockData) .toList(); - Set nonEmptyTextBlocks = documentTextData.stream().mapToLong(DocumentTextData::getId).boxed().collect(Collectors.toSet()); + Set nonEmptyTextBlocks = documentTextData.stream() + .mapToLong(DocumentTextData::getId).boxed() + .collect(Collectors.toSet()); - List documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList(); + List documentPageData = document.getPages() + .stream() + .map(DocumentDataMapper::toPageData) + .toList(); DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree()); return DocumentData.builder() .documentTextData(documentTextData.toArray(new DocumentTextData[0])) @@ -76,21 +87,35 @@ public class DocumentDataMapper { default -> new HashMap<>(); }; - return DocumentStructure.EntryData.builder() + DocumentStructure.EntryData.EntryDataBuilder documentBuilder = DocumentStructure.EntryData.builder() .treeId(toPrimitiveIntArray(entry.getTreeId())) - .children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList()) + .children(entry.getChildren() + .stream() + .map(DocumentDataMapper::toEntryData) + .toList()) .type(entry.getType()) - .engines(entry.getNode().getEngines()) .atomicBlockIds(atomicTextBlocks) - .pageNumbers(entry.getNode().getPages().stream().map(Page::getNumber).map(Integer::longValue).toArray(Long[]::new)) - .properties(properties) - .build(); + .pageNumbers(entry.getNode().getPages() + .stream() + .map(Page::getNumber) + .map(Integer::longValue) + .toArray(Long[]::new)) + .properties(properties); + if (entry.getNode() != null) { + documentBuilder.engines(entry.getNode().getEngines()); + } else { + documentBuilder.engines(new HashSet<>(Set.of(LayoutEngine.ALGORITHM))); + } + return documentBuilder.build(); } private Long[] toAtomicTextBlockIds(TextBlock textBlock) { - return textBlock.getAtomicTextBlocks().stream().map(AtomicTextBlock::getId).toArray(Long[]::new); + return textBlock.getAtomicTextBlocks() + .stream() + .map(AtomicTextBlock::getId) + .toArray(Long[]::new); } @@ -142,7 +167,9 @@ public class DocumentDataMapper { private int[] toPrimitiveIntArray(List list) { - return list.stream().mapToInt(Integer::intValue).toArray(); + return list.stream() + .mapToInt(Integer::intValue) + .toArray(); } }