Pull request #27: Fix merging of tables with single header row table

Merge in RED/redaction-service from bugfix/merge-single-row-tables to master

* commit '00a960ee2308e7ed7da23ebd3e85f5ef9db14610':
  Fix merging of tables with single header row table
This commit is contained in:
Dominique Eiflaender 2020-08-24 12:10:53 +02:00
commit bb1112d0d7
3 changed files with 47 additions and 1 deletions

View File

@ -89,10 +89,15 @@ public class SectionsBuilderService {
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(table);
// Allow merging of tables if header row is separated from first logical non-header row
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1
&& previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
previousTableNonHeaderRow = previousTable.getRows().get(0);
}
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
List<Cell> row = table.getRows().get(i);
if (row.size() == previousTableNonHeaderRow.size()
if (row.size() == tableNonHeaderRow.size()
&& row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
for (int j = 0; j < row.size(); j++) {
row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());

View File

@ -18,6 +18,7 @@ import org.springframework.test.context.junit4.SpringRunner;
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
@ -64,4 +65,44 @@ public class PdfSegmentationServiceTest {
}
}
@Test
public void testTableExtraction() throws IOException {
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
Document document = pdfSegmentationService.parseDocument(pdDocument);
assertThat(document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
.collect(Collectors.toList())).isNotEmpty();
Table firstTable = document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
.collect(Collectors.toList())
.get(0);
assertThat(firstTable.getColCount()).isEqualTo(8);
assertThat(firstTable.getRowCount()).isEqualTo(1);
Table secondTable = document.getParagraphs()
.stream()
.flatMap(paragraph -> paragraph.getTables().stream())
.collect(Collectors.toList())
.get(1);
assertThat(secondTable.getColCount()).isEqualTo(8);
assertThat(secondTable.getRowCount()).isEqualTo(2);
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
.get(0)
.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList());
assertThat(secondTable.getRows().stream()
.allMatch(row -> row.stream()
.map(Cell::getHeaderCells)
.collect(Collectors.toList())
.equals(firstTableHeaderCells)))
.isTrue();
}
}
}