RED-8995: unclassified text might be missing from document data
* treat TablePageBlock.OTHER like PARAGRAPH (no special treatment)
This commit is contained in:
parent
778bae0f7f
commit
2addf63baf
@ -43,7 +43,6 @@ public class SectionsBuilderService {
|
|||||||
for (ClassificationPage page : document.getPages()) {
|
for (ClassificationPage page : document.getPages()) {
|
||||||
List<TextPageBlock> header = new ArrayList<>();
|
List<TextPageBlock> header = new ArrayList<>();
|
||||||
List<TextPageBlock> footer = new ArrayList<>();
|
List<TextPageBlock> footer = new ArrayList<>();
|
||||||
List<TextPageBlock> unclassifiedText = new ArrayList<>();
|
|
||||||
for (AbstractPageBlock current : page.getTextBlocks()) {
|
for (AbstractPageBlock current : page.getTextBlocks()) {
|
||||||
|
|
||||||
if (current.getClassification() == null) {
|
if (current.getClassification() == null) {
|
||||||
@ -62,11 +61,6 @@ public class SectionsBuilderService {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (current.getClassification().equals(PageBlockType.OTHER)) {
|
|
||||||
unclassifiedText.add((TextPageBlock) current);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) {
|
if (prev != null && current.getClassification().isHeadline() && !prev.getClassification().isHeadline() || !document.isHeadlines()) {
|
||||||
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||||
chunkBlock.setHeadline(lastHeadline);
|
chunkBlock.setHeadline(lastHeadline);
|
||||||
@ -94,9 +88,6 @@ public class SectionsBuilderService {
|
|||||||
if (!footer.isEmpty()) {
|
if (!footer.isEmpty()) {
|
||||||
footers.add(new ClassificationFooter(footer));
|
footers.add(new ClassificationFooter(footer));
|
||||||
}
|
}
|
||||||
if (!unclassifiedText.isEmpty()) {
|
|
||||||
unclassifiedTexts.add(new UnclassifiedText(unclassifiedText));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
ClassificationSection chunkBlock = buildTextBlock(chunkWords, lastHeadline);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user