diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java index 4e257cb..d5526f6 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/TOCEnrichmentService.java @@ -97,10 +97,10 @@ public class TOCEnrichmentService { for (ClassifiedImage image : page.getImages()) { - Float xMin = null; - Float yMin = null; - Float xMax = null; - Float yMax = null; + Double xMin = null; + Double yMin = null; + Double xMax = null; + Double yMax = null; for (TableOfContentItem tocItem : lastFoundTOCItems) { var headline = tocItem.getHeadline(); @@ -195,7 +195,7 @@ public class TOCEnrichmentService { .get(0) .stream() .map(cell -> { - Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]); + Cell fakeCell = Cell.copy(cell); fakeCell.setHeaderCells(Collections.singletonList(cell)); return fakeCell; }) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java index 77ae789..af0a3fd 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/TextPageBlock.java @@ -58,17 +58,6 @@ public class TextPageBlock extends AbstractPageBlock { } - private void calculateBBox() { - - if (sequences == null) { - this.bBox = new Rectangle2D.Double(); - this.bBoxInitialUserSpace = new Rectangle2D.Double(); - return; - } - setToBBoxOfComponents(sequences); - } - - @JsonIgnore public float getPageHeight() { @@ -83,6 +72,17 @@ public class TextPageBlock extends AbstractPageBlock { } + public void calculateBBox() { + + if (sequences == null) { + this.bBox = new Rectangle2D.Double(); + this.bBoxInitialUserSpace = new Rectangle2D.Double(); + return; + } + setToBBoxOfComponents(sequences); + } + + public static TextPageBlock merge(List textBlocksToMerge) { if (textBlocksToMerge.isEmpty()) { @@ -105,7 +105,6 @@ public class TextPageBlock extends AbstractPageBlock { } - private void calculateFrequencyCounters() { FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter(); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java index 7863e71..9a82796 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/BlockificationPostprocessingService.java @@ -237,7 +237,7 @@ public class BlockificationPostprocessingService { boolean modifiedBlockToSplit = false; if (!wordSequenceResult.inSequence.isEmpty()) { blockToSplit.setSequences(wordSequenceResult.inSequence); - blockToSplit.resize(); + blockToSplit.calculateBBox(); modifiedBlockToSplit = true; } @@ -368,7 +368,7 @@ public class BlockificationPostprocessingService { assert firstBlock != null; firstBlock.setToDuplicate(false); - firstBlock.resize(); + firstBlock.calculateBBox(); classificationPage.getTextBlocks().removeAll(mergedBlocks); } diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java index 7e65d99..1b2b2df 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/blockification/DocstrumBlockificationService.java @@ -101,18 +101,6 @@ public class DocstrumBlockificationService { abstractPageBlocks.add(buildTextBlock(textPositionSequences, 0)); }); - if (xyOrder) { - abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)) - .thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))); - abstractPageBlocks.sort(new Comparator() { - @Override - public int compare(AbstractPageBlock o1, AbstractPageBlock o2) { - - return Math.abs(o1.getMinY() - o2.getMinY()) < 5 && o1.getMinX() < o2.getMinX() == true ? -1 : 0; - } - }); - } - return abstractPageBlocks; } @@ -196,8 +184,7 @@ public class DocstrumBlockificationService { private boolean intersectsYWithPreviousHavingMaxOneLine(TextPageBlock previous, TextPageBlock current, ClassificationPage page) { - return previous.intersectsY(current)//(Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD && Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) // - && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1); + return previous.intersectsY(current) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1); } @@ -326,67 +313,6 @@ public class DocstrumBlockificationService { } - public List splitZonesAtRulings(List textPositions, List horizontalRulingLines, List verticalRulingLines) { - - int indexOnPage = 0; - List chunkWords = new ArrayList<>(); - List chunkBlockList = new ArrayList<>(); - - float minX = 1000, maxX = 0, minY = 1000, maxY = 0; - TextPositionSequence prev = null; - - for (TextPositionSequence word : textPositions) { - - boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines); - boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir()); - - if (prev != null && (splitByDir || isSplitByRuling)) { - - TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage); - indexOnPage++; - - chunkBlockList.add(cb1); - chunkWords = new ArrayList<>(); - - minX = 1000; - maxX = 0; - minY = 1000; - maxY = 0; - prev = null; - } - - chunkWords.add(word); - - prev = word; - if (word.getMinXDirAdj() < minX) { - minX = word.getMinXDirAdj(); - } - if (word.getMaxXDirAdj() > maxX) { - maxX = word.getMaxXDirAdj(); - } - if (word.getMinYDirAdj() < minY) { - minY = word.getMinYDirAdj(); - } - if (word.getMaxYDirAdj() > maxY) { - maxY = word.getMaxYDirAdj(); - } - } - - TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage); - if (cb1 != null) { - chunkBlockList.add(cb1); - } - - return chunkBlockList; - } - - - private boolean equalsWithThreshold(float f1, float f2) { - - return Math.abs(f1 - f2) < THRESHOLD; - } - - public static TextPageBlock buildTextBlock(List wordBlockList, int indexOnPage) { return new TextPageBlock(wordBlockList); diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/RotateTextWithRulingsTestFile.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/RotateTextWithRulingsTestFile.pdf index da05904..88846ba 100644 Binary files a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/RotateTextWithRulingsTestFile.pdf and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/RotateTextWithRulingsTestFile.pdf differ diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/simpleTablesRotated.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/simpleTablesRotated.pdf index f6571ef..6084303 100644 Binary files a/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/simpleTablesRotated.pdf and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/Minimal Examples/simpleTablesRotated.pdf differ diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf index e6d9a07..f0e1f7e 100644 Binary files a/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf and b/layoutparser-service/layoutparser-service-server/src/test/resources/files/SinglePages/AbsolutelyEnormousTable.pdf differ