Merge branch 'RED-8995' into 'release/0.89.x'

RED-8995: unclassified text might be missing from document data

See merge request fforesight/layout-parser!133
This commit is contained in:
Kilian Schüttler 2024-04-18 10:01:12 +02:00
commit bdbac18169

View File

@ -43,7 +43,6 @@ public class SectionsBuilderService {
for (ClassificationPage page : document.getPages()) {
List<TextPageBlock> header = new ArrayList<>();
List<TextPageBlock> footer = new ArrayList<>();
List<TextPageBlock> unclassifiedText = new ArrayList<>();
for (AbstractPageBlock current : page.getTextBlocks()) {
if (current.getClassification() == null) {
@ -62,11 +61,6 @@ public class SectionsBuilderService {
continue;
}
if (current.getClassification().equals(PageBlockType.OTHER)) {
unclassifiedText.add((TextPageBlock) current);
continue;
}
if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) {
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
chunkBlock.setHeadline(lastHeadline);
@ -94,9 +88,6 @@ public class SectionsBuilderService {
if (!footer.isEmpty()) {
footers.add(new ClassificationFooter(footer));
}
if (!unclassifiedText.isEmpty()) {
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
}
}
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);