RED-7074: Design Subsection section tree structure algorithm

* post rebase fixup
This commit is contained in:
maverickstuder 2024-05-15 15:09:31 +02:00
parent 61c90fc30d
commit 49f13d1f03
7 changed files with 19 additions and 94 deletions

View File

@ -97,10 +97,10 @@ public class TOCEnrichmentService {
for (ClassifiedImage image : page.getImages()) {
Float xMin = null;
Float yMin = null;
Float xMax = null;
Float yMax = null;
Double xMin = null;
Double yMin = null;
Double xMax = null;
Double yMax = null;
for (TableOfContentItem tocItem : lastFoundTOCItems) {
var headline = tocItem.getHeadline();
@ -195,7 +195,7 @@ public class TOCEnrichmentService {
.get(0)
.stream()
.map(cell -> {
Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
Cell fakeCell = Cell.copy(cell);
fakeCell.setHeaderCells(Collections.singletonList(cell));
return fakeCell;
})

View File

@ -58,17 +58,6 @@ public class TextPageBlock extends AbstractPageBlock {
}
private void calculateBBox() {
if (sequences == null) {
this.bBox = new Rectangle2D.Double();
this.bBoxInitialUserSpace = new Rectangle2D.Double();
return;
}
setToBBoxOfComponents(sequences);
}
@JsonIgnore
public float getPageHeight() {
@ -83,6 +72,17 @@ public class TextPageBlock extends AbstractPageBlock {
}
public void calculateBBox() {
if (sequences == null) {
this.bBox = new Rectangle2D.Double();
this.bBoxInitialUserSpace = new Rectangle2D.Double();
return;
}
setToBBoxOfComponents(sequences);
}
public static TextPageBlock merge(List<TextPageBlock> textBlocksToMerge) {
if (textBlocksToMerge.isEmpty()) {
@ -105,7 +105,6 @@ public class TextPageBlock extends AbstractPageBlock {
}
private void calculateFrequencyCounters() {
FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();

View File

@ -237,7 +237,7 @@ public class BlockificationPostprocessingService {
boolean modifiedBlockToSplit = false;
if (!wordSequenceResult.inSequence.isEmpty()) {
blockToSplit.setSequences(wordSequenceResult.inSequence);
blockToSplit.resize();
blockToSplit.calculateBBox();
modifiedBlockToSplit = true;
}
@ -368,7 +368,7 @@ public class BlockificationPostprocessingService {
assert firstBlock != null;
firstBlock.setToDuplicate(false);
firstBlock.resize();
firstBlock.calculateBBox();
classificationPage.getTextBlocks().removeAll(mergedBlocks);
}

View File

@ -101,18 +101,6 @@ public class DocstrumBlockificationService {
abstractPageBlocks.add(buildTextBlock(textPositionSequences, 0));
});
if (xyOrder) {
abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
.thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
abstractPageBlocks.sort(new Comparator<AbstractPageBlock>() {
@Override
public int compare(AbstractPageBlock o1, AbstractPageBlock o2) {
return Math.abs(o1.getMinY() - o2.getMinY()) < 5 && o1.getMinX() < o2.getMinX() == true ? -1 : 0;
}
});
}
return abstractPageBlocks;
}
@ -196,8 +184,7 @@ public class DocstrumBlockificationService {
private boolean intersectsYWithPreviousHavingMaxOneLine(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
return previous.intersectsY(current)//(Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD && Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) //
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1);
return previous.intersectsY(current) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1);
}
@ -326,67 +313,6 @@ public class DocstrumBlockificationService {
}
public List<AbstractPageBlock> splitZonesAtRulings(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
int indexOnPage = 0;
List<TextPositionSequence> chunkWords = new ArrayList<>();
List<AbstractPageBlock> chunkBlockList = new ArrayList<>();
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
TextPositionSequence prev = null;
for (TextPositionSequence word : textPositions) {
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
if (prev != null && (splitByDir || isSplitByRuling)) {
TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
indexOnPage++;
chunkBlockList.add(cb1);
chunkWords = new ArrayList<>();
minX = 1000;
maxX = 0;
minY = 1000;
maxY = 0;
prev = null;
}
chunkWords.add(word);
prev = word;
if (word.getMinXDirAdj() < minX) {
minX = word.getMinXDirAdj();
}
if (word.getMaxXDirAdj() > maxX) {
maxX = word.getMaxXDirAdj();
}
if (word.getMinYDirAdj() < minY) {
minY = word.getMinYDirAdj();
}
if (word.getMaxYDirAdj() > maxY) {
maxY = word.getMaxYDirAdj();
}
}
TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
if (cb1 != null) {
chunkBlockList.add(cb1);
}
return chunkBlockList;
}
private boolean equalsWithThreshold(float f1, float f2) {
return Math.abs(f1 - f2) < THRESHOLD;
}
public static TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
return new TextPageBlock(wordBlockList);