diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java index 6f8fd3b..ca72723 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocuMineBlockificationService.java @@ -61,8 +61,8 @@ public class DocuMineBlockificationService { boolean newLineAfterSplit = prev != null && word.getMinYDirAdj() != prev.getMinYDirAdj() && wasSplitted && splitX1 != word.getMinXDirAdj(); boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines); boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir()); - boolean splitByOtherFontAndOtherY = prev != null && prev.getMaxYDirAdj() != word.getMaxYDirAdj() && (word.getFontStyle().contains("bold") && !prev.getFontStyle() - .contains("bold") || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold")); + boolean splitByOtherFontAndOtherY = prev != null && Math.abs(prev.getMaxYDirAdj() - word.getMaxYDirAdj()) > word.getTextHeight() * 0.2 && (word.getFontStyle() + .contains("bold") && !prev.getFontStyle().contains("bold") || prev.getFontStyle().contains("bold") && !word.getFontStyle().contains("bold")); Matcher matcher = pattern.matcher(chunkWords.stream().collect(Collectors.joining(" ")).toString()); boolean startsOnSameX = Math.abs(minX - word.getMinXDirAdj()) < 5 && matcher.matches(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/DocumineIdentifierProblem.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/DocumineIdentifierProblem.pdf new file mode 100644 index 0000000..a13ba29 Binary files /dev/null and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/DocumineIdentifierProblem.pdf differ