Pull request #30: Fix test and logic of same page table merge for one row tables

Merge in RED/redaction-service from bugfix/fix-test-table-merge to master

* commit '38ddb1d9c8dbf0e83d40ec916f1ebeb6d0f86f42':
  Fix test and logic of same page table merge for one row tables
This commit is contained in:
Dominique Eiflaender 2020-08-24 14:22:41 +02:00
commit d026f4e1db
2 changed files with 13 additions and 9 deletions

View File

@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.collections4.CollectionUtils;
import org.springframework.stereotype.Service;
@ -39,7 +40,7 @@ public class SectionsBuilderService {
current.setPage(page.getPageNumber());
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
chunkBlock.setHeadline(lastHeadline);
lastHeadline = current.getText();
chunkBlockList.add(chunkBlock);
@ -47,7 +48,8 @@ public class SectionsBuilderService {
if (CollectionUtils.isNotEmpty(chunkBlock.getTables())) {
previousTable = chunkBlock.getTables().get(chunkBlock.getTables().size() - 1);
}
} else if (current instanceof Table) {
}
if (current instanceof Table) {
Table table = (Table) current;
// Distribute header information for subsequent tables
mergeTableMetadata(table, previousTable);
@ -58,7 +60,7 @@ public class SectionsBuilderService {
}
}
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
chunkBlock.setHeadline(lastHeadline);
chunkBlockList.add(chunkBlock);
@ -76,7 +78,11 @@ public class SectionsBuilderService {
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows()
.get(0)
.size() == tableNonHeaderRow.size()) {
previousTableNonHeaderRow = previousTable.getRows().get(0);
previousTableNonHeaderRow = previousTable.getRows().get(0).stream().map(cell -> {
Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
fakeCell.setHeaderCells(Collections.singletonList(cell));
return fakeCell;
}).collect(Collectors.toList());
}
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
for (int i = currentTable.getRows()
@ -94,7 +100,7 @@ public class SectionsBuilderService {
}
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline, Table previousTable) {
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline) {
Paragraph paragraph = new Paragraph();
TextBlock textBlock = null;
@ -105,7 +111,6 @@ public class SectionsBuilderService {
Iterator<AbstractTextContainer> itty = wordBlockList.iterator();
boolean alreadyAdded = false;
AbstractTextContainer previous = null;
Table sectionTable = previousTable;
while (itty.hasNext()) {
AbstractTextContainer container = itty.next();
@ -118,8 +123,6 @@ public class SectionsBuilderService {
} else {
table.setHeadline("Table in: " + lastHeadline);
}
mergeTableMetadata(table, sectionTable);
sectionTable = table;
if (textBlock != null && !alreadyAdded) {
paragraph.getPageBlocks().add(textBlock);

View File

@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.segmentation;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@ -94,7 +95,7 @@ public class PdfSegmentationServiceTest {
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
.get(0)
.stream()
.map(Cell::getHeaderCells)
.map(Collections::singletonList)
.collect(Collectors.toList());
assertThat(secondTable.getRows().stream()
.allMatch(row -> row.stream()