Merge branch 'RED-8873-bp' into 'release/0.89.x'
RED-8773 - Fix images not appearing on specific file See merge request fforesight/layout-parser!124
This commit is contained in:
commit
cafbcbefc6
@ -49,9 +49,14 @@ public class DocumentGraphFactory {
|
|||||||
Document documentGraph = new Document();
|
Document documentGraph = new Document();
|
||||||
Context context = new Context(documentGraph);
|
Context context = new Context(documentGraph);
|
||||||
|
|
||||||
document.getPages().forEach(context::buildAndAddPageWithCounter);
|
document.getPages()
|
||||||
document.getSections().stream().flatMap(section -> section.getImages().stream()).forEach(image -> context.getImages().add(image));
|
.forEach(context::buildAndAddPageWithCounter);
|
||||||
addSections(document, context);
|
document.getSections()
|
||||||
|
.stream()
|
||||||
|
.flatMap(section -> section.getImages()
|
||||||
|
.stream())
|
||||||
|
.forEach(image -> context.getImages().add(image));
|
||||||
|
addSections(document, context, documentGraph);
|
||||||
addHeaderAndFooterToEachPage(document, context);
|
addHeaderAndFooterToEachPage(document, context);
|
||||||
|
|
||||||
documentGraph.setNumberOfPages(context.pages.size());
|
documentGraph.setNumberOfPages(context.pages.size());
|
||||||
@ -62,9 +67,10 @@ public class DocumentGraphFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addSections(ClassificationDocument document, Context context) {
|
private void addSections(ClassificationDocument classificationDocument, Context context, Document document) {
|
||||||
|
|
||||||
document.getSections().forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context));
|
classificationDocument.getSections()
|
||||||
|
.forEach(section -> SectionNodeFactory.addSection(null, section.getNonEmptyPageBlocks(), section.getImages(), context, document));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -74,9 +80,11 @@ public class DocumentGraphFactory {
|
|||||||
|
|
||||||
GenericSemanticNode node;
|
GenericSemanticNode node;
|
||||||
if (originalTextBlock.isHeadline()) {
|
if (originalTextBlock.isHeadline()) {
|
||||||
node = Headline.builder().documentTree(context.getDocumentTree()).build();
|
node = Headline.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
} else {
|
} else {
|
||||||
node = Paragraph.builder().documentTree(context.getDocumentTree()).build();
|
node = Paragraph.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
page.getMainBody().add(node);
|
page.getMainBody().add(node);
|
||||||
@ -93,6 +101,22 @@ public class DocumentGraphFactory {
|
|||||||
|
|
||||||
public void addImage(Section section, ClassifiedImage image, Context context) {
|
public void addImage(Section section, ClassifiedImage image, Context context) {
|
||||||
|
|
||||||
|
Image imageNode = createImage(image, context);
|
||||||
|
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
||||||
|
imageNode.setTreeId(treeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addImage(Document document, ClassifiedImage image, Context context) {
|
||||||
|
|
||||||
|
Image imageNode = createImage(image, context);
|
||||||
|
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(document, imageNode);
|
||||||
|
imageNode.setTreeId(treeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Image createImage(ClassifiedImage image, Context context) {
|
||||||
|
|
||||||
Rectangle2D position = image.getPosition();
|
Rectangle2D position = image.getPosition();
|
||||||
Page page = context.getPage(image.getPage());
|
Page page = context.getPage(image.getPage());
|
||||||
Image imageNode = Image.builder()
|
Image imageNode = Image.builder()
|
||||||
@ -104,9 +128,7 @@ public class DocumentGraphFactory {
|
|||||||
.documentTree(context.getDocumentTree())
|
.documentTree(context.getDocumentTree())
|
||||||
.build();
|
.build();
|
||||||
page.getMainBody().add(imageNode);
|
page.getMainBody().add(imageNode);
|
||||||
|
return imageNode;
|
||||||
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(section, imageNode);
|
|
||||||
imageNode.setTreeId(treeId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -145,11 +167,12 @@ public class DocumentGraphFactory {
|
|||||||
private void addFooter(List<TextPageBlock> textBlocks, Context context) {
|
private void addFooter(List<TextPageBlock> textBlocks, Context context) {
|
||||||
|
|
||||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
|
Footer footer = Footer.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks),
|
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks),
|
||||||
footer,
|
footer,
|
||||||
context,
|
context,
|
||||||
page);
|
page);
|
||||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
||||||
footer.setTreeId(tocId);
|
footer.setTreeId(tocId);
|
||||||
footer.setLeafTextBlock(textBlock);
|
footer.setLeafTextBlock(textBlock);
|
||||||
@ -160,7 +183,8 @@ public class DocumentGraphFactory {
|
|||||||
public void addHeader(List<TextPageBlock> textBlocks, Context context) {
|
public void addHeader(List<TextPageBlock> textBlocks, Context context) {
|
||||||
|
|
||||||
Page page = context.getPage(textBlocks.get(0).getPage());
|
Page page = context.getPage(textBlocks.get(0).getPage());
|
||||||
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
|
Header header = Header.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
|
AtomicTextBlock textBlock = context.textBlockFactory.buildAtomicTextBlock(TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(textBlocks), header, 0, page);
|
||||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
||||||
header.setTreeId(tocId);
|
header.setTreeId(tocId);
|
||||||
@ -172,7 +196,8 @@ public class DocumentGraphFactory {
|
|||||||
private void addEmptyFooter(int pageIndex, Context context) {
|
private void addEmptyFooter(int pageIndex, Context context) {
|
||||||
|
|
||||||
Page page = context.getPage(pageIndex);
|
Page page = context.getPage(pageIndex);
|
||||||
Footer footer = Footer.builder().documentTree(context.getDocumentTree()).build();
|
Footer footer = Footer.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(footer, context, page);
|
||||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(footer);
|
||||||
footer.setTreeId(tocId);
|
footer.setTreeId(tocId);
|
||||||
@ -184,7 +209,8 @@ public class DocumentGraphFactory {
|
|||||||
private void addEmptyHeader(int pageIndex, Context context) {
|
private void addEmptyHeader(int pageIndex, Context context) {
|
||||||
|
|
||||||
Page page = context.getPage(pageIndex);
|
Page page = context.getPage(pageIndex);
|
||||||
Header header = Header.builder().documentTree(context.getDocumentTree()).build();
|
Header header = Header.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
AtomicTextBlock textBlock = context.textBlockFactory.emptyTextBlock(header, 0, page);
|
||||||
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
List<Integer> tocId = context.getDocumentTree().createNewMainEntryAndReturnId(header);
|
||||||
header.setTreeId(tocId);
|
header.setTreeId(tocId);
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||||
@ -24,27 +25,46 @@ import lombok.experimental.UtilityClass;
|
|||||||
@UtilityClass
|
@UtilityClass
|
||||||
public class SectionNodeFactory {
|
public class SectionNodeFactory {
|
||||||
|
|
||||||
public void addSection(GenericSemanticNode parentNode, List<AbstractPageBlock> pageBlocks, List<ClassifiedImage> images, DocumentGraphFactory.Context context) {
|
public void addSection(GenericSemanticNode parentNode,
|
||||||
|
List<AbstractPageBlock> pageBlocks,
|
||||||
|
List<ClassifiedImage> images,
|
||||||
|
DocumentGraphFactory.Context context,
|
||||||
|
Document document) {
|
||||||
|
|
||||||
|
// This is for the case where we have images on a page without any text/footer/header.
|
||||||
|
// The pageBlocks list is empty, but we still need to add those images to the document.
|
||||||
|
if (!images.isEmpty() && pageBlocks.isEmpty()) {
|
||||||
|
images.stream()
|
||||||
|
.distinct()
|
||||||
|
.forEach(image -> DocumentGraphFactory.addImage(document, image, context));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (pageBlocks.isEmpty()) {
|
if (pageBlocks.isEmpty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Map<Integer, List<AbstractPageBlock>> blocksPerPage = pageBlocks.stream().collect(groupingBy(AbstractPageBlock::getPage));
|
|
||||||
Section section = Section.builder().documentTree(context.getDocumentTree()).build();
|
Map<Integer, List<AbstractPageBlock>> blocksPerPage = pageBlocks.stream()
|
||||||
|
.collect(groupingBy(AbstractPageBlock::getPage));
|
||||||
|
Section section = Section.builder().documentTree(context.getDocumentTree())
|
||||||
|
.build();
|
||||||
|
|
||||||
context.getSections().add(section);
|
context.getSections().add(section);
|
||||||
blocksPerPage.keySet().forEach(pageNumber -> addSectionNodeToPageNode(context, section, pageNumber));
|
blocksPerPage.keySet()
|
||||||
|
.forEach(pageNumber -> addSectionNodeToPageNode(context, section, pageNumber));
|
||||||
|
|
||||||
section.setTreeId(getTreeId(parentNode, context, section));
|
section.setTreeId(getTreeId(parentNode, context, section));
|
||||||
|
|
||||||
addFirstHeadlineDirectlyToSection(pageBlocks, context, section);
|
addFirstHeadlineDirectlyToSection(pageBlocks, context, section, document);
|
||||||
if (containsTablesAndTextBlocks(pageBlocks)) {
|
if (containsTablesAndTextBlocks(pageBlocks)) {
|
||||||
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(section, subSectionPageBlocks, emptyList(), context));
|
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(section, subSectionPageBlocks, emptyList(), context, document));
|
||||||
} else {
|
} else {
|
||||||
addTablesAndParagraphsAndHeadlinesToSection(pageBlocks, context, section);
|
addTablesAndParagraphsAndHeadlinesToSection(pageBlocks, context, section, document);
|
||||||
}
|
}
|
||||||
|
|
||||||
images.stream().distinct().forEach(image -> DocumentGraphFactory.addImage(section, image, context));
|
images.stream()
|
||||||
|
.distinct()
|
||||||
|
.forEach(image -> DocumentGraphFactory.addImage(section, image, context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -58,16 +78,16 @@ public class SectionNodeFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addFirstHeadlineDirectlyToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section) {
|
private void addFirstHeadlineDirectlyToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section, Document document) {
|
||||||
|
|
||||||
if (pageBlocks.get(0).isHeadline()) {
|
if (pageBlocks.get(0).isHeadline()) {
|
||||||
addTablesAndParagraphsAndHeadlinesToSection(List.of(pageBlocks.get(0)), context, section);
|
addTablesAndParagraphsAndHeadlinesToSection(List.of(pageBlocks.get(0)), context, section, document);
|
||||||
pageBlocks.remove(0);
|
pageBlocks.remove(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addTablesAndParagraphsAndHeadlinesToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section) {
|
private void addTablesAndParagraphsAndHeadlinesToSection(List<AbstractPageBlock> pageBlocks, DocumentGraphFactory.Context context, Section section, Document document) {
|
||||||
|
|
||||||
Set<AbstractPageBlock> alreadyMerged = new HashSet<>();
|
Set<AbstractPageBlock> alreadyMerged = new HashSet<>();
|
||||||
List<AbstractPageBlock> remainingBlocks = new LinkedList<>(pageBlocks);
|
List<AbstractPageBlock> remainingBlocks = new LinkedList<>(pageBlocks);
|
||||||
@ -86,7 +106,7 @@ public class SectionNodeFactory {
|
|||||||
} else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
|
} else if (abstractPageBlock instanceof TablePageBlock tablePageBlock) {
|
||||||
List<TablePageBlock> tablesToMerge = TableMergingUtility.findConsecutiveTablesWithSameColCountAndSameHeaders(tablePageBlock, remainingBlocks);
|
List<TablePageBlock> tablesToMerge = TableMergingUtility.findConsecutiveTablesWithSameColCountAndSameHeaders(tablePageBlock, remainingBlocks);
|
||||||
alreadyMerged.addAll(tablesToMerge);
|
alreadyMerged.addAll(tablesToMerge);
|
||||||
TableNodeFactory.addTable(section, tablesToMerge, context);
|
TableNodeFactory.addTable(section, tablesToMerge, context, document);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException(format("Unhandled AbstractPageBlockType %s!", abstractPageBlock.getClass()));
|
throw new RuntimeException(format("Unhandled AbstractPageBlockType %s!", abstractPageBlock.getClass()));
|
||||||
}
|
}
|
||||||
@ -96,7 +116,9 @@ public class SectionNodeFactory {
|
|||||||
|
|
||||||
private boolean containsTablesAndTextBlocks(List<AbstractPageBlock> pageBlocks) {
|
private boolean containsTablesAndTextBlocks(List<AbstractPageBlock> pageBlocks) {
|
||||||
|
|
||||||
return pageBlocks.stream().anyMatch(pageBlock -> pageBlock instanceof TablePageBlock) && pageBlocks.stream().anyMatch(pageBlock -> pageBlock instanceof TextPageBlock);
|
return pageBlocks.stream()
|
||||||
|
.anyMatch(pageBlock -> pageBlock instanceof TablePageBlock) && pageBlocks.stream()
|
||||||
|
.anyMatch(pageBlock -> pageBlock instanceof TextPageBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -112,7 +134,9 @@ public class SectionNodeFactory {
|
|||||||
|
|
||||||
List<List<AbstractPageBlock>> splitList = splitIntoCoherentList(pageBlocks);
|
List<List<AbstractPageBlock>> splitList = splitIntoCoherentList(pageBlocks);
|
||||||
movePrecedingHeadlineToTableList(splitList);
|
movePrecedingHeadlineToTableList(splitList);
|
||||||
return splitList.stream().filter(list -> !list.isEmpty()).toList();
|
return splitList.stream()
|
||||||
|
.filter(list -> !list.isEmpty())
|
||||||
|
.toList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -133,7 +157,8 @@ public class SectionNodeFactory {
|
|||||||
|
|
||||||
private boolean listIsTablesOnly(List<AbstractPageBlock> abstractPageBlocks) {
|
private boolean listIsTablesOnly(List<AbstractPageBlock> abstractPageBlocks) {
|
||||||
|
|
||||||
return abstractPageBlocks.stream().allMatch(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock);
|
return abstractPageBlocks.stream()
|
||||||
|
.allMatch(abstractPageBlock -> abstractPageBlock instanceof TablePageBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import java.util.Set;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||||
@ -27,23 +28,26 @@ public class TableNodeFactory {
|
|||||||
public final double TABLE_CELL_MERGE_CONTENTS_SIZE_THRESHOLD = 0.05;
|
public final double TABLE_CELL_MERGE_CONTENTS_SIZE_THRESHOLD = 0.05;
|
||||||
|
|
||||||
|
|
||||||
public void addTable(GenericSemanticNode parentNode, List<TablePageBlock> tablesToMerge, DocumentGraphFactory.Context context) {
|
public void addTable(GenericSemanticNode parentNode, List<TablePageBlock> tablesToMerge, DocumentGraphFactory.Context context, Document document) {
|
||||||
|
|
||||||
setPageNumberInCells(tablesToMerge);
|
setPageNumberInCells(tablesToMerge);
|
||||||
Set<Page> pages = tablesToMerge.stream().map(AbstractPageBlock::getPage).map(context::getPage).collect(Collectors.toSet());
|
Set<Page> pages = tablesToMerge.stream()
|
||||||
List<List<Cell>> mergedRows = tablesToMerge.stream().map(TablePageBlock::getRows).flatMap(Collection::stream).toList();
|
.map(AbstractPageBlock::getPage)
|
||||||
|
.map(context::getPage)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
List<List<Cell>> mergedRows = tablesToMerge.stream()
|
||||||
|
.map(TablePageBlock::getRows)
|
||||||
|
.flatMap(Collection::stream)
|
||||||
|
.toList();
|
||||||
|
|
||||||
Table table = Table.builder()
|
Table table = Table.builder().documentTree(context.getDocumentTree()).numberOfCols(mergedRows.isEmpty() ? 0 : mergedRows.get(0).size()).numberOfRows(mergedRows.size())
|
||||||
.documentTree(context.getDocumentTree())
|
|
||||||
.numberOfCols(mergedRows.isEmpty() ? 0 : mergedRows.get(0).size())
|
|
||||||
.numberOfRows(mergedRows.size())
|
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
pages.forEach(page -> addTableToPage(page, parentNode, table));
|
pages.forEach(page -> addTableToPage(page, parentNode, table));
|
||||||
|
|
||||||
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(parentNode, table);
|
List<Integer> treeId = context.getDocumentTree().createNewChildEntryAndReturnId(parentNode, table);
|
||||||
table.setTreeId(treeId);
|
table.setTreeId(treeId);
|
||||||
addTableCells(mergedRows, table, context);
|
addTableCells(mergedRows, table, context, document);
|
||||||
|
|
||||||
ifTableHasNoHeadersSetFirstRowAsHeaders(table);
|
ifTableHasNoHeadersSetFirstRowAsHeaders(table);
|
||||||
}
|
}
|
||||||
@ -63,7 +67,8 @@ public class TableNodeFactory {
|
|||||||
|
|
||||||
private void setPageNumberInTextBlocksWithPageNumberSetTo0(TablePageBlock table, Cell cell) {
|
private void setPageNumberInTextBlocksWithPageNumberSetTo0(TablePageBlock table, Cell cell) {
|
||||||
|
|
||||||
cell.getTextBlocks().stream()//
|
cell.getTextBlocks()
|
||||||
|
.stream()//
|
||||||
.filter(tb -> tb.getPage() == 0)//
|
.filter(tb -> tb.getPage() == 0)//
|
||||||
.forEach(tb -> tb.setPage(table.getPage()));
|
.forEach(tb -> tb.setPage(table.getPage()));
|
||||||
}
|
}
|
||||||
@ -82,28 +87,32 @@ public class TableNodeFactory {
|
|||||||
|
|
||||||
private void ifTableHasNoHeadersSetFirstRowAsHeaders(Table table) {
|
private void ifTableHasNoHeadersSetFirstRowAsHeaders(Table table) {
|
||||||
|
|
||||||
if (table.streamHeaders().findAny().isEmpty()) {
|
if (table.streamHeaders()
|
||||||
table.streamRow(0).forEach(tableCellNode -> tableCellNode.setHeader(true));
|
.findAny().isEmpty()) {
|
||||||
|
table.streamRow(0)
|
||||||
|
.forEach(tableCellNode -> tableCellNode.setHeader(true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addTableCells(List<List<Cell>> rows, Table table, DocumentGraphFactory.Context context) {
|
private void addTableCells(List<List<Cell>> rows, Table table, DocumentGraphFactory.Context context, Document document) {
|
||||||
|
|
||||||
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
for (int rowIndex = 0; rowIndex < rows.size(); rowIndex++) {
|
||||||
for (int colIndex = 0; colIndex < rows.get(rowIndex).size(); colIndex++) {
|
for (int colIndex = 0; colIndex < rows.get(rowIndex).size(); colIndex++) {
|
||||||
addTableCell(rows.get(rowIndex).get(colIndex), rowIndex, colIndex, table, context);
|
addTableCell(rows.get(rowIndex)
|
||||||
|
.get(colIndex), rowIndex, colIndex, table, context, document);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("PMD.UnusedPrivateMethod") // PMD actually flags this wrong
|
@SuppressWarnings("PMD.UnusedPrivateMethod") // PMD actually flags this wrong
|
||||||
private void addTableCell(Cell cell, int rowIndex, int colIndex, Table tableNode, DocumentGraphFactory.Context context) {
|
private void addTableCell(Cell cell, int rowIndex, int colIndex, Table tableNode, DocumentGraphFactory.Context context, Document document) {
|
||||||
|
|
||||||
Page page = context.getPage(cell.getPageNumber());
|
Page page = context.getPage(cell.getPageNumber());
|
||||||
|
|
||||||
TableCell tableCell = TableCell.builder().documentTree(context.getDocumentTree()).row(rowIndex).col(colIndex).header(cell.isHeaderCell()).bBox(cell.getBounds2D()).build();
|
TableCell tableCell = TableCell.builder().documentTree(context.getDocumentTree()).row(rowIndex).col(colIndex).header(cell.isHeaderCell()).bBox(cell.getBounds2D())
|
||||||
|
.build();
|
||||||
page.getMainBody().add(tableCell);
|
page.getMainBody().add(tableCell);
|
||||||
|
|
||||||
List<Integer> treeId = context.getDocumentTree().createNewTableChildEntryAndReturnId(tableNode, tableCell);
|
List<Integer> treeId = context.getDocumentTree().createNewTableChildEntryAndReturnId(tableNode, tableCell);
|
||||||
@ -113,16 +122,26 @@ public class TableNodeFactory {
|
|||||||
if (cell.getTextBlocks().isEmpty()) {
|
if (cell.getTextBlocks().isEmpty()) {
|
||||||
tableCell.setLeafTextBlock(context.getTextBlockFactory().emptyTextBlock(tableNode, context, page));
|
tableCell.setLeafTextBlock(context.getTextBlockFactory().emptyTextBlock(tableNode, context, page));
|
||||||
} else if (cell.getTextBlocks().size() == 1) {
|
} else if (cell.getTextBlocks().size() == 1) {
|
||||||
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(cell.getTextBlocks().get(0).getSequences(), tableCell, context, page);
|
textBlock = context.getTextBlockFactory()
|
||||||
|
.buildAtomicTextBlock(cell.getTextBlocks()
|
||||||
|
.get(0).getSequences(), tableCell, context, page);
|
||||||
tableCell.setLeafTextBlock(textBlock);
|
tableCell.setLeafTextBlock(textBlock);
|
||||||
} else if (firstTextBlockIsHeadline(cell)) {
|
} else if (firstTextBlockIsHeadline(cell)) {
|
||||||
SectionNodeFactory.addSection(tableCell, cell.getTextBlocks().stream().map(tb -> (AbstractPageBlock) tb).toList(), emptyList(), context);
|
SectionNodeFactory.addSection(tableCell,
|
||||||
|
cell.getTextBlocks()
|
||||||
|
.stream()
|
||||||
|
.map(tb -> (AbstractPageBlock) tb)
|
||||||
|
.toList(),
|
||||||
|
emptyList(),
|
||||||
|
context,
|
||||||
|
document);
|
||||||
} else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) {
|
} else if (cellAreaIsSmallerThanPageAreaTimesThreshold(cell, page)) {
|
||||||
List<TextPositionSequence> sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks());
|
List<TextPositionSequence> sequences = TextPositionOperations.mergeAndSortTextPositionSequenceByYThenX(cell.getTextBlocks());
|
||||||
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page);
|
textBlock = context.getTextBlockFactory().buildAtomicTextBlock(sequences, tableCell, context, page);
|
||||||
tableCell.setLeafTextBlock(textBlock);
|
tableCell.setLeafTextBlock(textBlock);
|
||||||
} else {
|
} else {
|
||||||
cell.getTextBlocks().forEach(tb -> DocumentGraphFactory.addParagraphOrHeadline(tableCell, tb, context, emptyList()));
|
cell.getTextBlocks()
|
||||||
|
.forEach(tb -> DocumentGraphFactory.addParagraphOrHeadline(tableCell, tb, context, emptyList()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,7 +154,8 @@ public class TableNodeFactory {
|
|||||||
|
|
||||||
private boolean firstTextBlockIsHeadline(Cell cell) {
|
private boolean firstTextBlockIsHeadline(Cell cell) {
|
||||||
|
|
||||||
return cell.getTextBlocks().get(0).isHeadline();
|
return cell.getTextBlocks()
|
||||||
|
.get(0).isHeadline();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user