diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 8cbd322..890862d 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -39,8 +39,8 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType; import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineExtractorService; -import com.knecon.fforesight.service.layoutparser.processor.model.outline.OutlineValidationService; import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeBuilderService; +import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTreeEnhancementService; import com.knecon.fforesight.service.layoutparser.processor.model.outline.SectionTree; import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell; import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings; @@ -105,8 +105,8 @@ public class LayoutParsingPipeline { VisualLayoutParsingAdapter visualLayoutParsingAdapter; GraphicExtractorService graphicExtractorService; OutlineExtractorService outlineExtractorService; - OutlineValidationService outlineValidationService; SectionTreeBuilderService sectionTreeBuilderService; + SectionTreeEnhancementService sectionTreeEnhancementService; LayoutparserSettings settings; ClassificationService classificationService; @@ -344,14 +344,14 @@ public class LayoutParsingPipeline { classificationService.classify(classificationDocument, layoutParsingType, identifier); - SectionTree sectionTree = outlineValidationService.createSectionTree(classificationDocument); + SectionTree sectionTree = sectionTreeBuilderService.createSectionTree(classificationDocument); classificationDocument.setSectionTree(sectionTree); log.info("Building Sections for {}", identifier); switch (layoutParsingType) { case CLARIFYND_PARAGRAPH_DEBUG, REDACT_MANAGER_PARAGRAPH_DEBUG -> sectionsBuilderService.buildParagraphDebugSections(classificationDocument); - default -> sectionTreeBuilderService.assignSectionBlocksAndImages(classificationDocument); + default -> sectionTreeEnhancementService.assignSectionBlocksAndImages(classificationDocument); } return classificationDocument; diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java deleted file mode 100644 index 820d1e2..0000000 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineValidationService.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.knecon.fforesight.service.layoutparser.processor.model.outline; - -import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.TABLE_OF_CONTENTS_HEADLINE; -import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeSet; - -import org.springframework.stereotype.Service; - -import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; -import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; - -import io.micrometer.observation.annotation.Observed; -import lombok.extern.slf4j.Slf4j; - -@Service -@Slf4j -public class OutlineValidationService { - - @Observed(name = "OutlineValidationService", contextualName = "create-toc") - public SectionTree createSectionTree(ClassificationDocument classificationDocument) { - - List headlines = extractHeadlines(classificationDocument); - - List mainSections = new ArrayList<>(); - Map lastItemsPerDepth = new HashMap<>(); - SectionTreeEntry last = null; - TreeSet depths = new TreeSet<>(); - - for (TextPageBlock current : headlines) { - int currentDepth = getHeadlineNumber(current.getClassification()); - Integer parentDepth = depths.floor(currentDepth - 1); - - var tocItem = new SectionTreeEntry(current); - - if (parentDepth == null) { - mainSections.add(tocItem); - lastItemsPerDepth = new HashMap<>(); - depths = new TreeSet<>(); - - } else { - assert last != null; - int lastDepth = getHeadlineNumber(last.getHeadline().getClassification()); - if (last.getHeadline().getClassification().equals(TABLE_OF_CONTENTS_HEADLINE) && !current.getClassification().equals(TABLE_OF_CONTENTS_HEADLINE)) { - // headline after toc should always start a main section - parentDepth = 1; - } else if (lastDepth < parentDepth) { - parentDepth = lastDepth; - } else if (lastDepth == currentDepth && last.getParent() != null) { - parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification()); - } - - SectionTreeEntry parent = lastItemsPerDepth.get(parentDepth); - parent.addChild(tocItem); - } - - last = tocItem; - lastItemsPerDepth.put(currentDepth, tocItem); - depths.add(currentDepth); - } - - return new - - SectionTree(mainSections); - - } - - - private static List extractHeadlines(ClassificationDocument classificationDocument) { - - return classificationDocument.getPages() - .stream() - .flatMap(classificationPage -> classificationPage.getTextBlocks() - .stream() - .filter(tb -> tb instanceof TextPageBlock && tb.getClassification() != null && tb.getClassification().isHeadline()) - .map(tb -> (TextPageBlock) tb)) - .toList(); - } - -} diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java index a4778a1..37174a2 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeBuilderService.java @@ -1,252 +1,82 @@ package com.knecon.fforesight.service.layoutparser.processor.model.outline; +import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.TABLE_OF_CONTENTS_HEADLINE; +import static com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType.getHeadlineNumber; + import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.TreeSet; import org.springframework.stereotype.Service; -import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; -import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter; -import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader; -import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; -import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType; -import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; -import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell; -import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock; import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; +import io.micrometer.observation.annotation.Observed; import lombok.extern.slf4j.Slf4j; -@Slf4j @Service +@Slf4j public class SectionTreeBuilderService { - public void assignSectionBlocksAndImages(ClassificationDocument document) { + @Observed(name = "OutlineValidationService", contextualName = "create-toc") + public SectionTree createSectionTree(ClassificationDocument classificationDocument) { - SectionTree toc = document.getSectionTree(); - Iterator iterator = toc.iterator(); - SectionTreeEntry currentTOCItem = null; - if (iterator.hasNext()) { - currentTOCItem = iterator.next(); - } - List startBlocks = new ArrayList<>(); - List startImages = new ArrayList<>(); - SectionTreeEntry currentSection = null; - boolean foundFirstHeadline = false; + List headlines = extractHeadlines(classificationDocument); - List headers = new ArrayList<>(); - List footers = new ArrayList<>(); - TablePageBlock previousTable = null; - List lastFoundTOCItems = new ArrayList<>(); + List mainSections = new ArrayList<>(); + Map lastItemsPerDepth = new HashMap<>(); + SectionTreeEntry last = null; + TreeSet depths = new TreeSet<>(); - for (ClassificationPage page : document.getPages()) { - List currentPageTOCItems = new ArrayList<>(); - List header = new ArrayList<>(); - List footer = new ArrayList<>(); - for (AbstractPageBlock current : page.getTextBlocks()) { + for (TextPageBlock current : headlines) { + int currentDepth = getHeadlineNumber(current.getClassification()); + Integer parentDepth = depths.floor(currentDepth - 1); - if (current.getClassification() == null) { - continue; + var tocItem = new SectionTreeEntry(current); + + if (parentDepth == null) { + mainSections.add(tocItem); + lastItemsPerDepth = new HashMap<>(); + depths = new TreeSet<>(); + + } else { + assert last != null; + int lastDepth = getHeadlineNumber(last.getHeadline().getClassification()); + if (last.getHeadline().getClassification().equals(TABLE_OF_CONTENTS_HEADLINE) && !current.getClassification().equals(TABLE_OF_CONTENTS_HEADLINE)) { + // headline after toc should always start a main section + parentDepth = 1; + } else if (lastDepth < parentDepth) { + parentDepth = lastDepth; + } else if (lastDepth == currentDepth && last.getParent() != null) { + parentDepth = getHeadlineNumber(last.getParent().getHeadline().getClassification()); } - current.setPage(page.getPageNumber()); - - if (current.getClassification().equals(PageBlockType.HEADER)) { - header.add((TextPageBlock) current); - continue; - } - - if (current.getClassification().equals(PageBlockType.FOOTER)) { - footer.add((TextPageBlock) current); - continue; - } - - if (current instanceof TablePageBlock table) { - if (previousTable != null) { - mergeTableMetadata(table, previousTable); - } - previousTable = table; - } - - if (current instanceof TextPageBlock && currentTOCItem != null && currentTOCItem.getHeadline().getText().equals(current.getText())) { - if (!foundFirstHeadline) { - foundFirstHeadline = true; - } - currentSection = currentTOCItem; - currentTOCItem.getSectionBlocks().add(current); - currentPageTOCItems.add(currentTOCItem); - - if (iterator.hasNext()) { - currentTOCItem = iterator.next(); - } - } else if (!foundFirstHeadline) { - startBlocks.add(current); - } else { - currentSection.getSectionBlocks().add(current); - } + SectionTreeEntry parent = lastItemsPerDepth.get(parentDepth); + parent.addChild(tocItem); } - if (!currentPageTOCItems.isEmpty()) { - lastFoundTOCItems = currentPageTOCItems; - } - - for (ClassifiedImage image : page.getImages()) { - - Double xMin = null; - Double yMin = null; - Double xMax = null; - Double yMax = null; - - for (SectionTreeEntry tocItem : lastFoundTOCItems) { - var headline = tocItem.getHeadline(); - - if (headline.getPage() != page.getPageNumber()) { - continue; - } - - if (headline.getMinX() < headline.getMaxX()) { - if (xMin == null || headline.getMinX() < xMin) { - xMin = headline.getMinX(); - } - if (xMax == null || headline.getMaxX() > xMax) { - xMax = headline.getMaxX(); - } - } else { - if (xMin == null || headline.getMaxX() < xMin) { - xMin = headline.getMaxX(); - } - if (xMax == null || headline.getMinX() > xMax) { - xMax = headline.getMinX(); - } - } - - if (headline.getMinY() < headline.getMaxY()) { - if (yMin == null || headline.getMinY() < yMin) { - yMin = headline.getMinY(); - } - if (yMax == null || headline.getMaxY() > yMax) { - yMax = headline.getMaxY(); - } - } else { - if (yMin == null || headline.getMaxY() < yMin) { - yMin = headline.getMaxY(); - } - if (yMax == null || headline.getMinY() > yMax) { - yMax = headline.getMinY(); - } - } - - log.debug("Image position x: {}, y: {}", image.getPosition().getX(), image.getPosition().getY()); - log.debug("Headline position xMin: {}, xMax: {}, yMin: {}, yMax: {}", xMin, xMax, yMin, yMax); - - if (image.getPosition().getX() >= xMin && image.getPosition().getX() <= xMax && image.getPosition().getY() >= yMin && image.getPosition().getY() <= yMax) { - tocItem.getImages().add(image); - image.setAppendedToSection(true); - break; - } - } - if (!image.isAppendedToSection()) { - log.debug("Image uses last found section"); - if (!lastFoundTOCItems.isEmpty()) { - lastFoundTOCItems.get(lastFoundTOCItems.size() - 1).getImages().add(image); - } else { - startImages.add(image); - } - image.setAppendedToSection(true); - } - } - - if (!header.isEmpty()) { - headers.add(new ClassificationHeader(header)); - } - if (!footer.isEmpty()) { - footers.add(new ClassificationFooter(footer)); - } + last = tocItem; + lastItemsPerDepth.put(currentDepth, tocItem); + depths.add(currentDepth); } - if (!startBlocks.isEmpty() || !startImages.isEmpty()) { - SectionTreeEntry unassigned = new SectionTreeEntry(null); - unassigned.setSectionBlocks(startBlocks); - unassigned.setImages(startImages); - document.getSectionTree().getMainSections().add(0, unassigned); - } - document.setHeaders(headers); - document.setFooters(footers); + return new SectionTree(mainSections); + } - private void mergeTableMetadata(TablePageBlock currentTable, TablePageBlock previousTable) { + private static List extractHeadlines(ClassificationDocument classificationDocument) { - // Distribute header information for subsequent tables - if (previousTable != null && hasInvalidHeaderInformation(currentTable) && hasValidHeaderInformation(previousTable)) { - List previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable); - List tableNonHeaderRow = getRowWithNonHeaderCells(currentTable); - // Allow merging of tables if header row is separated from first logical non-header row - if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) { - previousTableNonHeaderRow = previousTable.getRows().get(0) - .stream() - .map(cell -> { - Cell fakeCell = Cell.copy(cell); - fakeCell.setHeaderCells(Collections.singletonList(cell)); - return fakeCell; - }) - .toList(); - } - if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) { - for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table - List row = currentTable.getRows().get(i); - if (row.size() == tableNonHeaderRow.size() && row.stream() - .allMatch(cell -> cell.getHeaderCells().isEmpty())) { - for (int j = 0; j < row.size(); j++) { - row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells()); - } - } - } - } - } - } - - - private boolean hasValidHeaderInformation(TablePageBlock table) { - - return !hasInvalidHeaderInformation(table); - } - - - private boolean hasInvalidHeaderInformation(TablePageBlock table) { - - return table.getRows() + return classificationDocument.getPages() .stream() - .flatMap(Collection::stream) - .allMatch(cell -> cell.getHeaderCells().isEmpty()); - } - - - private List getRowWithNonHeaderCells(TablePageBlock table) { - - for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table - List row = table.getRows().get(i); - if (row.size() == 1) { - continue; - } - boolean allNonHeader = true; - for (Cell cell : row) { - if (cell.isHeaderCell()) { - allNonHeader = false; - break; - } - } - if (allNonHeader) { - return row; - } - } - - return Collections.emptyList(); - + .flatMap(classificationPage -> classificationPage.getTextBlocks() + .stream() + .filter(tb -> tb instanceof TextPageBlock && tb.getClassification() != null && tb.getClassification().isHeadline()) + .map(tb -> (TextPageBlock) tb)) + .toList(); } } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeEnhancementService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeEnhancementService.java new file mode 100644 index 0000000..af4d6a2 --- /dev/null +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/SectionTreeEnhancementService.java @@ -0,0 +1,252 @@ +package com.knecon.fforesight.service.layoutparser.processor.model.outline; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import org.springframework.stereotype.Service; + +import com.knecon.fforesight.service.layoutparser.processor.model.AbstractPageBlock; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationFooter; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationHeader; +import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage; +import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType; +import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage; +import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell; +import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock; +import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +public class SectionTreeEnhancementService { + + public void assignSectionBlocksAndImages(ClassificationDocument document) { + + SectionTree toc = document.getSectionTree(); + Iterator iterator = toc.iterator(); + SectionTreeEntry currentTOCItem = null; + if (iterator.hasNext()) { + currentTOCItem = iterator.next(); + } + List startBlocks = new ArrayList<>(); + List startImages = new ArrayList<>(); + SectionTreeEntry currentSection = null; + boolean foundFirstHeadline = false; + + List headers = new ArrayList<>(); + List footers = new ArrayList<>(); + TablePageBlock previousTable = null; + List lastFoundTOCItems = new ArrayList<>(); + + for (ClassificationPage page : document.getPages()) { + List currentPageTOCItems = new ArrayList<>(); + List header = new ArrayList<>(); + List footer = new ArrayList<>(); + for (AbstractPageBlock current : page.getTextBlocks()) { + + if (current.getClassification() == null) { + continue; + } + + current.setPage(page.getPageNumber()); + + if (current.getClassification().equals(PageBlockType.HEADER)) { + header.add((TextPageBlock) current); + continue; + } + + if (current.getClassification().equals(PageBlockType.FOOTER)) { + footer.add((TextPageBlock) current); + continue; + } + + if (current instanceof TablePageBlock table) { + if (previousTable != null) { + mergeTableMetadata(table, previousTable); + } + previousTable = table; + } + + if (current instanceof TextPageBlock && currentTOCItem != null && currentTOCItem.getHeadline().getText().equals(current.getText())) { + if (!foundFirstHeadline) { + foundFirstHeadline = true; + } + currentSection = currentTOCItem; + currentTOCItem.getSectionBlocks().add(current); + currentPageTOCItems.add(currentTOCItem); + + if (iterator.hasNext()) { + currentTOCItem = iterator.next(); + } + } else if (!foundFirstHeadline) { + startBlocks.add(current); + } else { + currentSection.getSectionBlocks().add(current); + } + } + + if (!currentPageTOCItems.isEmpty()) { + lastFoundTOCItems = currentPageTOCItems; + } + + for (ClassifiedImage image : page.getImages()) { + + Double xMin = null; + Double yMin = null; + Double xMax = null; + Double yMax = null; + + for (SectionTreeEntry tocItem : lastFoundTOCItems) { + var headline = tocItem.getHeadline(); + + if (headline.getPage() != page.getPageNumber()) { + continue; + } + + if (headline.getMinX() < headline.getMaxX()) { + if (xMin == null || headline.getMinX() < xMin) { + xMin = headline.getMinX(); + } + if (xMax == null || headline.getMaxX() > xMax) { + xMax = headline.getMaxX(); + } + } else { + if (xMin == null || headline.getMaxX() < xMin) { + xMin = headline.getMaxX(); + } + if (xMax == null || headline.getMinX() > xMax) { + xMax = headline.getMinX(); + } + } + + if (headline.getMinY() < headline.getMaxY()) { + if (yMin == null || headline.getMinY() < yMin) { + yMin = headline.getMinY(); + } + if (yMax == null || headline.getMaxY() > yMax) { + yMax = headline.getMaxY(); + } + } else { + if (yMin == null || headline.getMaxY() < yMin) { + yMin = headline.getMaxY(); + } + if (yMax == null || headline.getMinY() > yMax) { + yMax = headline.getMinY(); + } + } + + log.debug("Image position x: {}, y: {}", image.getPosition().getX(), image.getPosition().getY()); + log.debug("Headline position xMin: {}, xMax: {}, yMin: {}, yMax: {}", xMin, xMax, yMin, yMax); + + if (image.getPosition().getX() >= xMin && image.getPosition().getX() <= xMax && image.getPosition().getY() >= yMin && image.getPosition().getY() <= yMax) { + tocItem.getImages().add(image); + image.setAppendedToSection(true); + break; + } + } + if (!image.isAppendedToSection()) { + log.debug("Image uses last found section"); + if (!lastFoundTOCItems.isEmpty()) { + lastFoundTOCItems.get(lastFoundTOCItems.size() - 1).getImages().add(image); + } else { + startImages.add(image); + } + image.setAppendedToSection(true); + } + } + + if (!header.isEmpty()) { + headers.add(new ClassificationHeader(header)); + } + if (!footer.isEmpty()) { + footers.add(new ClassificationFooter(footer)); + } + } + + if (!startBlocks.isEmpty() || !startImages.isEmpty()) { + SectionTreeEntry unassigned = new SectionTreeEntry(null); + unassigned.setSectionBlocks(startBlocks); + unassigned.setImages(startImages); + document.getSectionTree().getMainSections().add(0, unassigned); + } + document.setHeaders(headers); + document.setFooters(footers); + } + + + private void mergeTableMetadata(TablePageBlock currentTable, TablePageBlock previousTable) { + + // Distribute header information for subsequent tables + if (previousTable != null && hasInvalidHeaderInformation(currentTable) && hasValidHeaderInformation(previousTable)) { + List previousTableNonHeaderRow = getRowWithNonHeaderCells(previousTable); + List tableNonHeaderRow = getRowWithNonHeaderCells(currentTable); + // Allow merging of tables if header row is separated from first logical non-header row + if (previousTableNonHeaderRow.isEmpty() && previousTable.getRowCount() == 1 && previousTable.getRows().get(0).size() == tableNonHeaderRow.size()) { + previousTableNonHeaderRow = previousTable.getRows().get(0) + .stream() + .map(cell -> { + Cell fakeCell = Cell.copy(cell); + fakeCell.setHeaderCells(Collections.singletonList(cell)); + return fakeCell; + }) + .toList(); + } + if (previousTableNonHeaderRow.size() == tableNonHeaderRow.size()) { + for (int i = currentTable.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table + List row = currentTable.getRows().get(i); + if (row.size() == tableNonHeaderRow.size() && row.stream() + .allMatch(cell -> cell.getHeaderCells().isEmpty())) { + for (int j = 0; j < row.size(); j++) { + row.get(j).setHeaderCells(previousTableNonHeaderRow.get(j).getHeaderCells()); + } + } + } + } + } + } + + + private boolean hasValidHeaderInformation(TablePageBlock table) { + + return !hasInvalidHeaderInformation(table); + } + + + private boolean hasInvalidHeaderInformation(TablePageBlock table) { + + return table.getRows() + .stream() + .flatMap(Collection::stream) + .allMatch(cell -> cell.getHeaderCells().isEmpty()); + } + + + private List getRowWithNonHeaderCells(TablePageBlock table) { + + for (int i = table.getRowCount() - 1; i >= 0; i--) { // Non header rows are most likely at bottom of table + List row = table.getRows().get(i); + if (row.size() == 1) { + continue; + } + boolean allNonHeader = true; + for (Cell cell : row) { + if (cell.isHeaderCell()) { + allNonHeader = false; + break; + } + } + if (allNonHeader) { + return row; + } + } + + return Collections.emptyList(); + + } + +}