Pull request #27: Fix merging of tables with single header row table
Merge in RED/redaction-service from bugfix/merge-single-row-tables to master * commit '00a960ee2308e7ed7da23ebd3e85f5ef9db14610': Fix merging of tables with single header row table
This commit is contained in:
commit
bb1112d0d7
@ -89,10 +89,15 @@ public class SectionsBuilderService {
|
||||
if (previousTable != null && hasInvalidHeaderInformation(table) && hasValidHeaderInformation(previousTable)) {
|
||||
List<Cell> previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable);
|
||||
List<Cell> tableNonHeaderRow = getRowWithNonHeaderCells(table);
|
||||
// Allow merging of tables if header row is separated from first logical non-header row
|
||||
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1
|
||||
&& previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) {
|
||||
previousTableNonHeaderRow = previousTable.getRows().get(0);
|
||||
}
|
||||
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
||||
for (int i = table.getRows().size() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table
|
||||
List<Cell> row = table.getRows().get(i);
|
||||
if (row.size() == previousTableNonHeaderRow.size()
|
||||
if (row.size() == tableNonHeaderRow.size()
|
||||
&& row.stream().allMatch(cell -> cell.getHeaderCells().isEmpty())) {
|
||||
for (int j = 0; j < row.size(); j++) {
|
||||
row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells());
|
||||
|
||||
@ -18,6 +18,7 @@ import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.classification.model.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.classification.service.BlockificationService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Cell;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.model.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.RulingCleaningService;
|
||||
import com.iqser.red.service.redaction.v1.server.tableextraction.service.TableExtractionService;
|
||||
@ -64,4 +65,44 @@ public class PdfSegmentationServiceTest {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testTableExtraction() throws IOException {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource("files/Minimal Examples/Merge Table.pdf");
|
||||
|
||||
try (PDDocument pdDocument = PDDocument.load(pdfFileResource.getInputStream())) {
|
||||
Document document = pdfSegmentationService.parseDocument(pdDocument);
|
||||
assertThat(document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
.collect(Collectors.toList())).isNotEmpty();
|
||||
Table firstTable = document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
.collect(Collectors.toList())
|
||||
.get(0);
|
||||
assertThat(firstTable.getColCount()).isEqualTo(8);
|
||||
assertThat(firstTable.getRowCount()).isEqualTo(1);
|
||||
Table secondTable = document.getParagraphs()
|
||||
.stream()
|
||||
.flatMap(paragraph -> paragraph.getTables().stream())
|
||||
.collect(Collectors.toList())
|
||||
.get(1);
|
||||
assertThat(secondTable.getColCount()).isEqualTo(8);
|
||||
assertThat(secondTable.getRowCount()).isEqualTo(2);
|
||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||
.get(0)
|
||||
.stream()
|
||||
.map(Cell::getHeaderCells)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(secondTable.getRows().stream()
|
||||
.allMatch(row -> row.stream()
|
||||
.map(Cell::getHeaderCells)
|
||||
.collect(Collectors.toList())
|
||||
.equals(firstTableHeaderCells)))
|
||||
.isTrue();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user