Fix merging of tables with single header row table
This commit is contained in:
parent
8c08bb3664
commit
00a960ee23
@ -89,10 +89,15 @@ public class SectionsBuilderService {
|
|||||||
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
|
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
|
||||||
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
|
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
|
||||||
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(table);
|
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(table);
|
||||||
|
// Allow merging of tables if header row is separated from first logical non-header row
|
||||||
|
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1
|
||||||
|
&& previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
|
||||||
|
previousTableNonHeaderRow = previousTable.getRows().get(0);
|
||||||
|
}
|
||||||
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
||||||
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||||
List<Cell> row = table.getRows().get(i);
|
List<Cell> row = table.getRows().get(i);
|
||||||
if (row.size() == previousTableNonHeaderRow.size()
|
if (row.size() == tableNonHeaderRow.size()
|
||||||
&& row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
|
&& row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
|
||||||
for (int j = 0; j < row.size(); j++) {
|
for (int j = 0; j < row.size(); j++) {
|
||||||
row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
|
row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
|
||||||
|
|||||||
@ -18,6 +18,7 @@ import org.springframework.test.context.junit4.SpringRunner;
|
|||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||||
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||||
@ -64,4 +65,44 @@ public class PdfSegmentationServiceTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTableExtraction() throws IOException {
|
||||||
|
|
||||||
|
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||||
|
|
||||||
|
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||||
|
Document document = pdfSegmentationService.parseDocument(pdDocument);
|
||||||
|
assertThat(document.getParagraphs()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
|
.collect(Collectors.toList())).isNotEmpty();
|
||||||
|
Table firstTable = document.getParagraphs()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(0);
|
||||||
|
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||||
|
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||||
|
Table secondTable = document.getParagraphs()
|
||||||
|
.stream()
|
||||||
|
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.get(1);
|
||||||
|
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||||
|
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
||||||
|
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||||
|
.get(0)
|
||||||
|
.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
assertThat(secondTable.getRows().stream()
|
||||||
|
.allMatch(row -> row.stream()
|
||||||
|
.map(Cell::getHeaderCells)
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.equals(firstTableHeaderCells)))
|
||||||
|
.isTrue();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user