Merge branch 'hotfix-bp' into 'release/0.159.x'
hotfix: unmerge super large tables See merge request fforesight/layout-parser!219
This commit is contained in:
commit
f6c60aa5eb
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.outline;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@ -185,12 +186,8 @@ public class TOCEnrichmentService {
|
||||
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
|
||||
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(currentTable);
|
||||
// Allow merging of tables if header row is separated from first logical non-header row
|
||||
if (previousTableNonHeaderRow.isEmpty()
|
||||
&& previousTable.getRowCount() == 1
|
||||
&& previousTable.getRows()
|
||||
.get(0).size() == tableNonHeaderRow.size()) {
|
||||
previousTableNonHeaderRow = previousTable.getRows()
|
||||
.get(0)
|
||||
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
|
||||
previousTableNonHeaderRow = previousTable.getRows().get(0)
|
||||
.stream()
|
||||
.map(cell -> {
|
||||
Cell fakeCell = Cell.copy(cell);
|
||||
@ -201,8 +198,7 @@ public class TOCEnrichmentService {
|
||||
}
|
||||
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
||||
for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||
List<Cell> row = currentTable.getRows()
|
||||
.get(i);
|
||||
List<Cell> row = currentTable.getRows().get(i);
|
||||
if (row.size() == tableNonHeaderRow.size() && row.stream()
|
||||
.allMatch(cell -> cell.getHeaderCells().isEmpty())) {
|
||||
for (int j = 0; j < row.size(); j++) {
|
||||
@ -225,18 +221,15 @@ public class TOCEnrichmentService {
|
||||
|
||||
return table.getRows()
|
||||
.stream()
|
||||
.flatMap(row -> row.stream()
|
||||
.filter(cell -> !cell.getHeaderCells().isEmpty()))
|
||||
.findAny().isEmpty();
|
||||
|
||||
.flatMap(Collection::stream)
|
||||
.allMatch(cell -> cell.getHeaderCells().isEmpty());
|
||||
}
|
||||
|
||||
|
||||
private List<Cell> getRowWithNonHeaderCells(TablePageBlock table) {
|
||||
|
||||
for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||
List<Cell> row = table.getRows()
|
||||
.get(i);
|
||||
List<Cell> row = table.getRows().get(i);
|
||||
if (row.size() == 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -36,10 +36,7 @@ public class TableNodeFactory {
|
||||
Document document) {
|
||||
|
||||
setPageNumberInCells(tablesToMerge);
|
||||
Set<Page> pages = tablesToMerge.stream()
|
||||
.map(AbstractPageBlock::getPage)
|
||||
.map(context::getPage)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
List<List<Cell>> mergedRows = tablesToMerge.stream()
|
||||
.map(TablePageBlock::getRows)
|
||||
.flatMap(Collection::stream)
|
||||
|
||||
@ -36,7 +36,7 @@ public class TableMergingUtility {
|
||||
TablePageBlock consecutiveTable = consecutiveTables.get(i);
|
||||
|
||||
if (consecutiveTable.getColCount() == originalTablePageBlock.getColCount() //
|
||||
&& headersMatch(originalTablePageBlock, consecutiveTable) //
|
||||
&& getHeaders(consecutiveTable).isEmpty() //
|
||||
&& outerBoundaryAlignsX(originalTablePageBlock, consecutiveTable) //
|
||||
&& consecutiveOrSamePage(currentTable, consecutiveTable) //
|
||||
&& !tableBetween(currentTable, consecutiveTable, findTablesBetween(consecutiveTables, currentTableIndex, i))) {
|
||||
@ -80,12 +80,6 @@ public class TableMergingUtility {
|
||||
}
|
||||
|
||||
|
||||
private static boolean headersMatch(TablePageBlock originalTable, TablePageBlock consecutiveTable) {
|
||||
|
||||
return getHeaders(consecutiveTable).isEmpty() || getHeaders(originalTable).equals(getHeaders(consecutiveTable));
|
||||
}
|
||||
|
||||
|
||||
private static boolean outerBoundaryAlignsX(TablePageBlock originalTablePageBlock, TablePageBlock consecutiveTable) {
|
||||
|
||||
return Math.abs(consecutiveTable.getMinX() - originalTablePageBlock.getMinX()) < TABLE_ALIGNMENT_THRESHOLD
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user