Merge branch 'RED-7806' into 'main'

RED-7806 - Specific customer document cannot be processed

See merge request fforesight/layout-parser!79
This commit is contained in:
Kilian Schüttler 2023-10-25 10:50:34 +02:00
commit 1b1f777706
3 changed files with 14 additions and 10 deletions

View File

@ -143,13 +143,13 @@ public class LayoutParsingPipeline {
return String.format("%d pages with %d sections, %d headlines, %d paragraphs, %d tables with %d cells, %d headers, and %d footers parsed",
numberOfPages,
semanticNodeCounts.get(NodeType.SECTION),
semanticNodeCounts.get(NodeType.HEADLINE),
semanticNodeCounts.get(NodeType.PARAGRAPH),
semanticNodeCounts.get(NodeType.TABLE),
semanticNodeCounts.get(NodeType.TABLE_CELL),
semanticNodeCounts.get(NodeType.HEADER),
semanticNodeCounts.get(NodeType.FOOTER));
semanticNodeCounts.get(NodeType.SECTION) == null ? 0 : semanticNodeCounts.get(NodeType.SECTION),
semanticNodeCounts.get(NodeType.HEADLINE) == null ? 0 : semanticNodeCounts.get(NodeType.HEADLINE),
semanticNodeCounts.get(NodeType.PARAGRAPH) == null ? 0 : semanticNodeCounts.get(NodeType.PARAGRAPH),
semanticNodeCounts.get(NodeType.TABLE) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE),
semanticNodeCounts.get(NodeType.TABLE_CELL) == null ? 0 : semanticNodeCounts.get(NodeType.TABLE_CELL),
semanticNodeCounts.get(NodeType.HEADER) == null ? 0 : semanticNodeCounts.get(NodeType.HEADER),
semanticNodeCounts.get(NodeType.FOOTER) == null ? 0 : semanticNodeCounts.get(NodeType.FOOTER));
}

View File

@ -224,6 +224,9 @@ public class TextPositionSequence implements CharSequence {
@JsonAttribute(ignore = true)
public String getFont() {
if (textPositions.get(0).getFontName() == null) {
return "none";
}
return textPositions.get(0).getFontName().toLowerCase(Locale.ROOT).replaceAll(",bold", "").replaceAll(",italic", "");
}
@ -231,7 +234,9 @@ public class TextPositionSequence implements CharSequence {
@JsonIgnore
@JsonAttribute(ignore = true)
public String getFontStyle() {
if (textPositions.get(0).getFontName() == null) {
return "standard";
}
String lowercaseFontName = textPositions.get(0).getFontName().toLowerCase(Locale.ROOT);
if (lowercaseFontName.contains("bold") && lowercaseFontName.contains("italic")) {
@ -243,7 +248,6 @@ public class TextPositionSequence implements CharSequence {
} else {
return "standard";
}
}

View File

@ -133,7 +133,7 @@ public class SearchTextWithTextPositionFactory {
private static void addTextPositionWithFontType(RedTextPosition currentTextPosition, String fontType, List<Integer> fontTypePositions, int stringIdx) {
if (currentTextPosition.getFontName().toLowerCase(Locale.ROOT).contains(fontType)) {
if (currentTextPosition.getFontName() != null && currentTextPosition.getFontName().toLowerCase(Locale.ROOT).contains(fontType)) {
fontTypePositions.add(stringIdx);
}
}