RED-7074: Design Subsection section tree structure algorithm
This commit is contained in:
parent
3a2ee903af
commit
c05f67cf44
@ -2,5 +2,6 @@ package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
|
||||
|
||||
public enum LayoutEngine {
|
||||
ALGORITHM,
|
||||
AI
|
||||
AI,
|
||||
OUTLINE
|
||||
}
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.Rectangle;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.docstrum.model.BoundingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
|
||||
@ -20,6 +21,9 @@ public abstract class AbstractPageBlock extends BoundingBox {
|
||||
|
||||
@JsonIgnore
|
||||
protected PageBlockType classification;
|
||||
|
||||
Set<LayoutEngine> engines = new HashSet<>();
|
||||
|
||||
@JsonIgnore
|
||||
protected int page;
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ import java.util.function.Function;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageBlockType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.SectionIdentifier;
|
||||
@ -204,21 +205,28 @@ public class BlockificationPostprocessingService {
|
||||
return false;
|
||||
}
|
||||
if (minDistance == distanceToDirectMatch) {
|
||||
directMatch.setClassification(headlineType);
|
||||
setClassificationAndAddOutlineEngine(directMatch, headlineType);
|
||||
} else if (minDistance == distanceToSplitCandidate) {
|
||||
SplitBlockResult splitBlockResult = splitBlock(classificationPage, splitCandidate, context.sectionIdentifier, outlineObject.getTitle());
|
||||
if (splitBlockResult.modifiedBlockToSplit) {
|
||||
splitCandidate.setClassification(headlineType);
|
||||
setClassificationAndAddOutlineEngine(splitCandidate, headlineType);
|
||||
}
|
||||
splitBlockResult.otherBlocks.forEach(other -> other.setClassification(null));
|
||||
} else {
|
||||
var merged = mergeBlocks(classificationPage, bestMergeCandidateCombination);
|
||||
merged.setClassification(headlineType);
|
||||
setClassificationAndAddOutlineEngine(merged, headlineType);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
private static void setClassificationAndAddOutlineEngine(TextPageBlock block, PageBlockType headlineType) {
|
||||
|
||||
block.setClassification(headlineType);
|
||||
block.getEngines().add(LayoutEngine.OUTLINE);
|
||||
}
|
||||
|
||||
|
||||
private SplitBlockResult splitBlock(ClassificationPage classificationPage, TextPageBlock blockToSplit, SectionIdentifier sectionIdentifier, String title) {
|
||||
|
||||
List<TextPageBlock> otherBlocks = new ArrayList<>();
|
||||
|
||||
@ -122,6 +122,7 @@ public class DocumentGraphFactory {
|
||||
List<Integer> treeId = context.documentTree.createNewChildEntryAndReturnId(parentNode, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
node.setTreeId(treeId);
|
||||
node.getEngines().addAll(originalTextBlock.getEngines());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -9,6 +9,7 @@ import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
@ -23,6 +24,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.DuplicatedParagraph;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SuperSection;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
@ -62,6 +64,12 @@ public class LayoutGridService {
|
||||
static Color HEADER_COLOR = new Color(171, 131, 6);
|
||||
static Color IMAGE_COLOR = new Color(253, 63, 146);
|
||||
|
||||
private record RectangleIdentifier(List<Integer> treeId, Integer pageNumber) {
|
||||
|
||||
}
|
||||
|
||||
HashMap<RectangleIdentifier, Rectangle2D> rectangleMap = new HashMap<>();
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||
@ -192,31 +200,20 @@ public class LayoutGridService {
|
||||
private void addSection(SemanticNode semanticNode, LayoutGrid layoutGrid, Color color) {
|
||||
|
||||
Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox();
|
||||
|
||||
List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION)
|
||||
.toList();
|
||||
float lineWidthModifier = (subSections.stream()
|
||||
.map(node -> node.getTreeId().size())
|
||||
.max(Integer::compareTo)
|
||||
.orElse(semanticNode.getTreeId().size()) - semanticNode.getTreeId().size());
|
||||
Integer maxChildDepth = subSections.stream()
|
||||
.map(node -> node.getTreeId().size())
|
||||
.max(Integer::compareTo)
|
||||
.orElse(semanticNode.getTreeId().size());
|
||||
int ownDepth = semanticNode.getTreeId().size();
|
||||
|
||||
Page firstPage = semanticNode.getFirstPage();
|
||||
String treeIdString = buildTreeIdString(semanticNode);
|
||||
if (!subSections.isEmpty()) {
|
||||
addPlacedText(firstPage, bBoxMap.get(firstPage), treeIdString, layoutGrid);
|
||||
} else {
|
||||
bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, treeIdString, layoutGrid)));
|
||||
}
|
||||
|
||||
if (bBoxMap.values().size() == 1) {
|
||||
Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(firstPage.getNumber() - 1).getColoredLines();
|
||||
List<Line2D> lines = createLinesFromRectangle(r, firstPage.getRotation());
|
||||
// add string to top line
|
||||
var firstLine = lines.remove(0);
|
||||
coloredLines.add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : lines) {
|
||||
coloredLines.add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
handleSinglePage(semanticNode, layoutGrid, color, firstPage, bBoxMap.get(firstPage), treeIdString, maxChildDepth, ownDepth);
|
||||
return;
|
||||
}
|
||||
List<Page> pagesInOrder = bBoxMap.keySet()
|
||||
@ -224,106 +221,203 @@ public class LayoutGridService {
|
||||
.sorted(Comparator.comparingInt(Page::getNumber))
|
||||
.collect(Collectors.toList());
|
||||
pagesInOrder.remove(0);
|
||||
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid, lineWidthModifier);
|
||||
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
|
||||
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid, lineWidthModifier);
|
||||
for (Page middlePage : pagesInOrder) {
|
||||
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid, lineWidthModifier);
|
||||
handleFirstPageOfSection(semanticNode, color, firstPage, bBoxMap.get(firstPage), treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
if (semanticNode instanceof SuperSection) {
|
||||
return;
|
||||
}
|
||||
for (Page middlePage : pagesInOrder.subList(0, pagesInOrder.size() - 1)) {
|
||||
handleForMiddlePageOfSection(semanticNode, color, middlePage, bBoxMap.get(middlePage), treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
}
|
||||
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
|
||||
handleLastPageOfSection(semanticNode, color, lastPage, bBoxMap.get(lastPage), treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void addPlacedText(Page page, Rectangle2D textBBox, String s, LayoutGrid layoutGrid) {
|
||||
private void addPlacedText(Page page, Rectangle2D textBBox, Rectangle2D highestParentRect, String s, LayoutGrid layoutGrid, Integer maxChildDepth) {
|
||||
|
||||
// translates text, such that its right edge is a bit to the left of the drawn box
|
||||
float translationAmount = ((FONT.getStringWidth(s) / 1000) * FONT_SIZE + (2 * LINE_WIDTH) + 4);
|
||||
float translationAmount = ((FONT.getStringWidth(s) / 1000) * FONT_SIZE + LINE_WIDTH + 2 * maxChildDepth);
|
||||
|
||||
Point2D upperLeftCorner;
|
||||
Point2D translationVector;
|
||||
switch (page.getRotation()) {
|
||||
case 90 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMinY());
|
||||
if (highestParentRect != null) {
|
||||
upperLeftCorner = new Point2D.Double(highestParentRect.getMinX(), textBBox.getMinY());
|
||||
} else {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMinY());
|
||||
}
|
||||
translationVector = new Point2D.Double(FONT_SIZE, -translationAmount);
|
||||
}
|
||||
case 180 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMinY());
|
||||
if (highestParentRect != null) {
|
||||
upperLeftCorner = new Point2D.Double(highestParentRect.getMaxX(), textBBox.getMinY());
|
||||
} else {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMinY());
|
||||
}
|
||||
translationVector = new Point2D.Double(translationAmount, FONT_SIZE);
|
||||
}
|
||||
case 270 -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMaxY());
|
||||
|
||||
if (highestParentRect != null) {
|
||||
upperLeftCorner = new Point2D.Double(highestParentRect.getMaxX(), textBBox.getMaxY());
|
||||
} else {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMaxX(), textBBox.getMaxY());
|
||||
}
|
||||
translationVector = new Point2D.Double(-FONT_SIZE, translationAmount);
|
||||
}
|
||||
default -> {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMaxY());
|
||||
|
||||
if (highestParentRect != null) {
|
||||
upperLeftCorner = new Point2D.Double(highestParentRect.getMinX(), textBBox.getMaxY());
|
||||
} else {
|
||||
upperLeftCorner = new Point2D.Double(textBBox.getMinX(), textBBox.getMaxY());
|
||||
}
|
||||
translationVector = new Point2D.Double(-translationAmount, -FONT_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
upperLeftCorner = add(upperLeftCorner, translationVector);
|
||||
|
||||
var placedTexts = layoutGrid.getVisualizationsPerPages()
|
||||
List<PlacedText> placedTexts = layoutGrid.getVisualizationsPerPages()
|
||||
.get(page.getNumber() - 1).getPlacedTexts();
|
||||
placedTexts.add(PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT));
|
||||
}
|
||||
|
||||
PlacedText newText = PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT);
|
||||
|
||||
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
Optional<PlacedText> conflictingText = placedTexts.stream()
|
||||
.filter(pt -> Math.abs(pt.lineStart().getY() - newText.lineStart().getY()) <= FONT_SIZE)
|
||||
.findFirst();
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(middlePage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(middlePage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var midPageLines = createLinesFromRectangle(r, middlePage.getRotation());
|
||||
// remove top line
|
||||
midPageLines.remove(0);
|
||||
// remove top line
|
||||
midPageLines.remove(1);
|
||||
// add string to left line
|
||||
var leftLine = midPageLines.remove(1);
|
||||
coloredLines.add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : midPageLines) {
|
||||
coloredLines.add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
if (conflictingText.isPresent()) {
|
||||
PlacedText existingText = conflictingText.get();
|
||||
if (newText.text().length() > existingText.text().length()) {
|
||||
placedTexts.remove(existingText);
|
||||
placedTexts.add(newText);
|
||||
}
|
||||
} else {
|
||||
placedTexts.add(newText);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
private void handleSinglePage(SemanticNode semanticNode,
|
||||
LayoutGrid layoutGrid,
|
||||
Color color,
|
||||
Page page,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(lastPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(lastPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var lastPageLines = createLinesFromRectangle(r, lastPage.getRotation());
|
||||
// remove top line
|
||||
lastPageLines.remove(0);
|
||||
// add string to left line
|
||||
var leftLine = lastPageLines.remove(2);
|
||||
coloredLines.add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : lastPageLines) {
|
||||
coloredLines.add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(firstPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var firstPageLines = createLinesFromRectangle(r, firstPage.getRotation());
|
||||
// remove bottom line
|
||||
firstPageLines.remove(2);
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, page, rectangle2D, treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
// add string to top line
|
||||
var firstLine = firstPageLines.remove(0);
|
||||
coloredLines.add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : firstPageLines) {
|
||||
coloredLines.add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
var firstLine = result.pageLines().remove(0);
|
||||
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleFirstPageOfSection(SemanticNode semanticNode,
|
||||
Color color,
|
||||
Page firstPage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
LayoutGrid layoutGrid,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, firstPage, rectangle2D, treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
// remove bottom line
|
||||
result.pageLines().remove(2);
|
||||
// add string to top line
|
||||
var firstLine = result.pageLines().remove(0);
|
||||
result.coloredLines().add(new ColoredLine(firstLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleForMiddlePageOfSection(SemanticNode semanticNode,
|
||||
Color color,
|
||||
Page middlePage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
LayoutGrid layoutGrid,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, middlePage, rectangle2D, treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
// remove top line
|
||||
result.pageLines().remove(0);
|
||||
// remove bottom line
|
||||
result.pageLines().remove(1);
|
||||
// add string to left line
|
||||
var leftLine = result.pageLines().remove(1);
|
||||
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleLastPageOfSection(SemanticNode semanticNode,
|
||||
Color color,
|
||||
Page lastPage,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
LayoutGrid layoutGrid,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
RectangleAndLinesResult result = createLinesAndPlaceText(semanticNode, lastPage, rectangle2D, treeIdString, layoutGrid, maxChildDepth, ownDepth);
|
||||
// remove top line
|
||||
result.pageLines().remove(0);
|
||||
// add string to left line
|
||||
var leftLine = result.pageLines().remove(2);
|
||||
result.coloredLines().add(new ColoredLine(leftLine, color, LINE_WIDTH));
|
||||
for (Line2D line : result.pageLines()) {
|
||||
result.coloredLines().add(new ColoredLine(line, color, LINE_WIDTH));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private RectangleAndLinesResult createLinesAndPlaceText(SemanticNode semanticNode,
|
||||
Page page,
|
||||
Rectangle2D rectangle2D,
|
||||
String treeIdString,
|
||||
LayoutGrid layoutGrid,
|
||||
Integer maxChildDepth,
|
||||
Integer ownDepth) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(page.getNumber() - 1).getColoredLines();
|
||||
int lineWidthModifier = maxChildDepth - ownDepth;
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var lastPageLines = createLinesFromRectangle(r, page.getRotation());
|
||||
|
||||
SemanticNode highestParent = semanticNode.getHighestParent();
|
||||
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
|
||||
addPlacedText(page, rectangle2D, highestParentRect, treeIdString, layoutGrid, maxChildDepth);
|
||||
|
||||
if (semanticNode instanceof SuperSection) {
|
||||
rectangleMap.put(new RectangleIdentifier(semanticNode.getTreeId(), page.getNumber()), r);
|
||||
}
|
||||
|
||||
return new RectangleAndLinesResult(coloredLines, r, lastPageLines);
|
||||
}
|
||||
|
||||
|
||||
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
private String buildTreeIdString(SemanticNode semanticNode) {
|
||||
|
||||
return semanticNode.getTreeId()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user