Pull request #30: Fix test and logic of same page table merge for one row tables
Merge in RED/redaction-service from bugfix/fix-test-table-merge to master * commit '38ddb1d9c8dbf0e83d40ec916f1ebeb6d0f86f42': Fix test and logic of same page table merge for one row tables
This commit is contained in:
commit
d026f4e1db
@ -4,6 +4,7 @@ import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
@ -39,7 +40,7 @@ public class SectionsBuilderService {
|
||||
current.setPage(page.getPageNumber());
|
||||
|
||||
if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) {
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
lastHeadline = current.getText();
|
||||
chunkBlockList.add(chunkBlock);
|
||||
@ -47,7 +48,8 @@ public class SectionsBuilderService {
|
||||
if (CollectionUtils.isNotEmpty(chunkBlock.getTables())) {
|
||||
previousTable = chunkBlock.getTables().get(chunkBlock.getTables().size() - 1);
|
||||
}
|
||||
} else if (current instanceof Table) {
|
||||
}
|
||||
if (current instanceof Table) {
|
||||
Table table = (Table) current;
|
||||
// Distribute header information for subsequent tables
|
||||
mergeTableMetadata(table, previousTable);
|
||||
@ -58,7 +60,7 @@ public class SectionsBuilderService {
|
||||
}
|
||||
}
|
||||
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable);
|
||||
Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||
chunkBlock.setHeadline(lastHeadline);
|
||||
chunkBlockList.add(chunkBlock);
|
||||
|
||||
@ -76,7 +78,11 @@ public class SectionsBuilderService {
|
||||
if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows()
|
||||
.get(0)
|
||||
.size() == tableNonHeaderRow.size()) {
|
||||
previousTableNonHeaderRow = previousTable.getRows().get(0);
|
||||
previousTableNonHeaderRow = previousTable.getRows().get(0).stream().map(cell -> {
|
||||
Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
|
||||
fakeCell.setHeaderCells(Collections.singletonList(cell));
|
||||
return fakeCell;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) {
|
||||
for (int i = currentTable.getRows()
|
||||
@ -94,7 +100,7 @@ public class SectionsBuilderService {
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline, Table previousTable) {
|
||||
private Paragraph buildTextBlock(List<AbstractTextContainer> wordBlockList, String lastHeadline) {
|
||||
|
||||
Paragraph paragraph = new Paragraph();
|
||||
TextBlock textBlock = null;
|
||||
@ -105,7 +111,6 @@ public class SectionsBuilderService {
|
||||
Iterator<AbstractTextContainer> itty = wordBlockList.iterator();
|
||||
boolean alreadyAdded = false;
|
||||
AbstractTextContainer previous = null;
|
||||
Table sectionTable = previousTable;
|
||||
while (itty.hasNext()) {
|
||||
AbstractTextContainer container = itty.next();
|
||||
|
||||
@ -118,8 +123,6 @@ public class SectionsBuilderService {
|
||||
} else {
|
||||
table.setHeadline("Table in: " + lastHeadline);
|
||||
}
|
||||
mergeTableMetadata(table, sectionTable);
|
||||
sectionTable = table;
|
||||
|
||||
if (textBlock != null && !alreadyAdded) {
|
||||
paragraph.getPageBlocks().add(textBlock);
|
||||
|
||||
@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.segmentation;
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@ -94,7 +95,7 @@ public class PdfSegmentationServiceTest {
|
||||
List<List<Cell>> firstTableHeaderCells = firstTable.getRows()
|
||||
.get(0)
|
||||
.stream()
|
||||
.map(Cell::getHeaderCells)
|
||||
.map(Collections::singletonList)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(secondTable.getRows().stream()
|
||||
.allMatch(row -> row.stream()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user