diff --git a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java index 0e84afa1..11d5d5d4 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java +++ b/redaction-service-v1/redaction-service-server-v1/src/main/java/com/iqser/red/service/redaction/v1/server/segmentation/SectionsBuilderService.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.collections4.CollectionUtils; import org.springframework.stereotype.Service; @@ -39,7 +40,7 @@ public class SectionsBuilderService { current.setPage(page.getPageNumber()); if (prev != null && current.getClassification().startsWith("H ") || !document.isHeadlines()) { - Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable); + Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline); chunkBlock.setHeadline(lastHeadline); lastHeadline = current.getText(); chunkBlockList.add(chunkBlock); @@ -47,7 +48,8 @@ public class SectionsBuilderService { if (CollectionUtils.isNotEmpty(chunkBlock.getTables())) { previousTable = chunkBlock.getTables().get(chunkBlock.getTables().size() - 1); } - } else if (current instanceof Table) { + } + if (current instanceof Table) { Table table = (Table) current; // Distribute header information for subsequent tables mergeTableMetadata(table, previousTable); @@ -58,7 +60,7 @@ public class SectionsBuilderService { } } - Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline, previousTable); + Paragraph chunkBlock = buildTextBlock(chunkWords, lastHeadline); chunkBlock.setHeadline(lastHeadline); chunkBlockList.add(chunkBlock); @@ -76,7 +78,11 @@ public class SectionsBuilderService { if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows() .get(0) .size() == tableNonHeaderRow.size()) { - previousTableNonHeaderRow = previousTable.getRows().get(0); + previousTableNonHeaderRow = previousTable.getRows().get(0).stream().map(cell -> { + Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]); + fakeCell.setHeaderCells(Collections.singletonList(cell)); + return fakeCell; + }).collect(Collectors.toList()); } if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) { for (int i = currentTable.getRows() @@ -94,7 +100,7 @@ public class SectionsBuilderService { } - private Paragraph buildTextBlock(List wordBlockList, String lastHeadline, Table previousTable) { + private Paragraph buildTextBlock(List wordBlockList, String lastHeadline) { Paragraph paragraph = new Paragraph(); TextBlock textBlock = null; @@ -105,7 +111,6 @@ public class SectionsBuilderService { Iterator itty = wordBlockList.iterator(); boolean alreadyAdded = false; AbstractTextContainer previous = null; - Table sectionTable = previousTable; while (itty.hasNext()) { AbstractTextContainer container = itty.next(); @@ -118,8 +123,6 @@ public class SectionsBuilderService { } else { table.setHeadline("Table in: " + lastHeadline); } - mergeTableMetadata(table, sectionTable); - sectionTable = table; if (textBlock != null && !alreadyAdded) { paragraph.getPageBlocks().add(textBlock); diff --git a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java index 19498e67..852dc91b 100644 --- a/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java +++ b/redaction-service-v1/redaction-service-server-v1/src/test/java/com/iqser/red/service/redaction/v1/server/segmentation/PdfSegmentationServiceTest.java @@ -3,6 +3,7 @@ package com.iqser.red.service.redaction.v1.server.segmentation; import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -94,7 +95,7 @@ public class PdfSegmentationServiceTest { List> firstTableHeaderCells = firstTable.getRows() .get(0) .stream() - .map(Cell::getHeaderCells) + .map(Collections::singletonList) .collect(Collectors.toList()); assertThat(secondTable.getRows().stream() .allMatch(row -> row.stream()