RED-7074: Design Subsection section tree structure algorithm
* post rebase fixup
This commit is contained in:
parent
61c90fc30d
commit
49f13d1f03
@ -97,10 +97,10 @@ public class TOCEnrichmentService {
|
||||
|
||||
for (ClassifiedImage image : page.getImages()) {
|
||||
|
||||
Float xMin = null;
|
||||
Float yMin = null;
|
||||
Float xMax = null;
|
||||
Float yMax = null;
|
||||
Double xMin = null;
|
||||
Double yMin = null;
|
||||
Double xMax = null;
|
||||
Double yMax = null;
|
||||
|
||||
for (TableOfContentItem tocItem : lastFoundTOCItems) {
|
||||
var headline = tocItem.getHeadline();
|
||||
@ -195,7 +195,7 @@ public class TOCEnrichmentService {
|
||||
.get(0)
|
||||
.stream()
|
||||
.map(cell -> {
|
||||
Cell fakeCell = new Cell(cell.getPoints()[0], cell.getPoints()[2]);
|
||||
Cell fakeCell = Cell.copy(cell);
|
||||
fakeCell.setHeaderCells(Collections.singletonList(cell));
|
||||
return fakeCell;
|
||||
})
|
||||
|
||||
@ -58,17 +58,6 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
}
|
||||
|
||||
|
||||
private void calculateBBox() {
|
||||
|
||||
if (sequences == null) {
|
||||
this.bBox = new Rectangle2D.Double();
|
||||
this.bBoxInitialUserSpace = new Rectangle2D.Double();
|
||||
return;
|
||||
}
|
||||
setToBBoxOfComponents(sequences);
|
||||
}
|
||||
|
||||
|
||||
@JsonIgnore
|
||||
public float getPageHeight() {
|
||||
|
||||
@ -83,6 +72,17 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
}
|
||||
|
||||
|
||||
public void calculateBBox() {
|
||||
|
||||
if (sequences == null) {
|
||||
this.bBox = new Rectangle2D.Double();
|
||||
this.bBoxInitialUserSpace = new Rectangle2D.Double();
|
||||
return;
|
||||
}
|
||||
setToBBoxOfComponents(sequences);
|
||||
}
|
||||
|
||||
|
||||
public static TextPageBlock merge(List<TextPageBlock> textBlocksToMerge) {
|
||||
|
||||
if (textBlocksToMerge.isEmpty()) {
|
||||
@ -105,7 +105,6 @@ public class TextPageBlock extends AbstractPageBlock {
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void calculateFrequencyCounters() {
|
||||
|
||||
FloatFrequencyCounter lineHeightFrequencyCounter = new FloatFrequencyCounter();
|
||||
|
||||
@ -237,7 +237,7 @@ public class BlockificationPostprocessingService {
|
||||
boolean modifiedBlockToSplit = false;
|
||||
if (!wordSequenceResult.inSequence.isEmpty()) {
|
||||
blockToSplit.setSequences(wordSequenceResult.inSequence);
|
||||
blockToSplit.resize();
|
||||
blockToSplit.calculateBBox();
|
||||
modifiedBlockToSplit = true;
|
||||
}
|
||||
|
||||
@ -368,7 +368,7 @@ public class BlockificationPostprocessingService {
|
||||
|
||||
assert firstBlock != null;
|
||||
firstBlock.setToDuplicate(false);
|
||||
firstBlock.resize();
|
||||
firstBlock.calculateBBox();
|
||||
classificationPage.getTextBlocks().removeAll(mergedBlocks);
|
||||
}
|
||||
|
||||
|
||||
@ -101,18 +101,6 @@ public class DocstrumBlockificationService {
|
||||
abstractPageBlocks.add(buildTextBlock(textPositionSequences, 0));
|
||||
});
|
||||
|
||||
if (xyOrder) {
|
||||
abstractPageBlocks.sort(Comparator.comparing(AbstractPageBlock::getMinY, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD))
|
||||
.thenComparing(AbstractPageBlock::getMinX, (o1, o2) -> DoubleUtils.compareDouble(o1, o2, THRESHOLD)));
|
||||
abstractPageBlocks.sort(new Comparator<AbstractPageBlock>() {
|
||||
@Override
|
||||
public int compare(AbstractPageBlock o1, AbstractPageBlock o2) {
|
||||
|
||||
return Math.abs(o1.getMinY() - o2.getMinY()) < 5 && o1.getMinX() < o2.getMinX() == true ? -1 : 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return abstractPageBlocks;
|
||||
}
|
||||
|
||||
@ -196,8 +184,7 @@ public class DocstrumBlockificationService {
|
||||
|
||||
private boolean intersectsYWithPreviousHavingMaxOneLine(TextPageBlock previous, TextPageBlock current, ClassificationPage page) {
|
||||
|
||||
return previous.intersectsY(current)//(Math.abs(previous.getMaxY() - current.getMaxY()) < THRESHOLD && Math.abs(previous.getMinY() - current.getMinY()) < THRESHOLD) //
|
||||
&& (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1);
|
||||
return previous.intersectsY(current) && (previous.getNumberOfLines() == 1 && current.getNumberOfLines() >= 1);
|
||||
}
|
||||
|
||||
|
||||
@ -326,67 +313,6 @@ public class DocstrumBlockificationService {
|
||||
}
|
||||
|
||||
|
||||
public List<AbstractPageBlock> splitZonesAtRulings(List<TextPositionSequence> textPositions, List<Ruling> horizontalRulingLines, List<Ruling> verticalRulingLines) {
|
||||
|
||||
int indexOnPage = 0;
|
||||
List<TextPositionSequence> chunkWords = new ArrayList<>();
|
||||
List<AbstractPageBlock> chunkBlockList = new ArrayList<>();
|
||||
|
||||
float minX = 1000, maxX = 0, minY = 1000, maxY = 0;
|
||||
TextPositionSequence prev = null;
|
||||
|
||||
for (TextPositionSequence word : textPositions) {
|
||||
|
||||
boolean isSplitByRuling = isSplitByRuling(minX, minY, maxX, maxY, word, horizontalRulingLines, verticalRulingLines);
|
||||
boolean splitByDir = prev != null && !prev.getDir().equals(word.getDir());
|
||||
|
||||
if (prev != null && (splitByDir || isSplitByRuling)) {
|
||||
|
||||
TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
|
||||
indexOnPage++;
|
||||
|
||||
chunkBlockList.add(cb1);
|
||||
chunkWords = new ArrayList<>();
|
||||
|
||||
minX = 1000;
|
||||
maxX = 0;
|
||||
minY = 1000;
|
||||
maxY = 0;
|
||||
prev = null;
|
||||
}
|
||||
|
||||
chunkWords.add(word);
|
||||
|
||||
prev = word;
|
||||
if (word.getMinXDirAdj() < minX) {
|
||||
minX = word.getMinXDirAdj();
|
||||
}
|
||||
if (word.getMaxXDirAdj() > maxX) {
|
||||
maxX = word.getMaxXDirAdj();
|
||||
}
|
||||
if (word.getMinYDirAdj() < minY) {
|
||||
minY = word.getMinYDirAdj();
|
||||
}
|
||||
if (word.getMaxYDirAdj() > maxY) {
|
||||
maxY = word.getMaxYDirAdj();
|
||||
}
|
||||
}
|
||||
|
||||
TextPageBlock cb1 = buildTextBlock(chunkWords, indexOnPage);
|
||||
if (cb1 != null) {
|
||||
chunkBlockList.add(cb1);
|
||||
}
|
||||
|
||||
return chunkBlockList;
|
||||
}
|
||||
|
||||
|
||||
private boolean equalsWithThreshold(float f1, float f2) {
|
||||
|
||||
return Math.abs(f1 - f2) < THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public static TextPageBlock buildTextBlock(List<TextPositionSequence> wordBlockList, int indexOnPage) {
|
||||
|
||||
return new TextPageBlock(wordBlockList);
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user