Merge branch 'main' into RED-9206
This commit is contained in:
commit
5f5a6258c5
@ -26,6 +26,7 @@ import lombok.RequiredArgsConstructor;
|
||||
@RequiredArgsConstructor
|
||||
public class DocstrumBlockificationService {
|
||||
|
||||
public static final float Y_THRESHOLD = 5f;
|
||||
private final DocstrumSegmentationService docstrumSegmentationService;
|
||||
|
||||
static final float THRESHOLD = 1f;
|
||||
@ -163,7 +164,7 @@ public class DocstrumBlockificationService {
|
||||
previous = current;
|
||||
}
|
||||
|
||||
mergeIntersectingBlocks(page, usedRulings, 0, 6.5f);
|
||||
mergeIntersectingBlocks(page, usedRulings, 0, Y_THRESHOLD);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -165,8 +165,10 @@ public class LayoutGridService {
|
||||
List<Double> ys = yStream.collect(Collectors.toList());
|
||||
ys.remove(0);
|
||||
|
||||
Rectangle2D tableBBox = table.getBBox().get(page);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D tableBBox = table.getBBox()
|
||||
.get(page);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(page.getNumber() - 1).getColoredLines();
|
||||
xs.forEach(x -> {
|
||||
Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY()));
|
||||
coloredLines.add(new ColoredLine(line, INNER_LINES_COLOR, LINE_WIDTH));
|
||||
@ -192,6 +194,11 @@ public class LayoutGridService {
|
||||
Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox();
|
||||
List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION)
|
||||
.toList();
|
||||
float lineWidthModifier = (subSections.stream()
|
||||
.map(node -> node.getTreeId().size())
|
||||
.max(Integer::compareTo)
|
||||
.orElse(semanticNode.getTreeId().size()) - semanticNode.getTreeId().size());
|
||||
|
||||
Page firstPage = semanticNode.getFirstPage();
|
||||
String treeIdString = buildTreeIdString(semanticNode);
|
||||
if (!subSections.isEmpty()) {
|
||||
@ -200,8 +207,9 @@ public class LayoutGridService {
|
||||
bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, treeIdString, layoutGrid)));
|
||||
}
|
||||
if (bBoxMap.values().size() == 1) {
|
||||
Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH, LINE_WIDTH);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(firstPage.getNumber() - 1).getColoredLines();
|
||||
List<Line2D> lines = createLinesFromRectangle(r, firstPage.getRotation());
|
||||
// add string to top line
|
||||
var firstLine = lines.remove(0);
|
||||
@ -216,11 +224,11 @@ public class LayoutGridService {
|
||||
.sorted(Comparator.comparingInt(Page::getNumber))
|
||||
.collect(Collectors.toList());
|
||||
pagesInOrder.remove(0);
|
||||
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid);
|
||||
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid, lineWidthModifier);
|
||||
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
|
||||
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid);
|
||||
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid, lineWidthModifier);
|
||||
for (Page middlePage : pagesInOrder) {
|
||||
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid);
|
||||
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid, lineWidthModifier);
|
||||
}
|
||||
}
|
||||
|
||||
@ -254,15 +262,18 @@ public class LayoutGridService {
|
||||
|
||||
upperLeftCorner = add(upperLeftCorner, translationVector);
|
||||
|
||||
var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts();
|
||||
var placedTexts = layoutGrid.getVisualizationsPerPages()
|
||||
.get(page.getNumber() - 1).getPlacedTexts();
|
||||
placedTexts.add(PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT));
|
||||
}
|
||||
|
||||
|
||||
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid) {
|
||||
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(middlePage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(middlePage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var midPageLines = createLinesFromRectangle(r, middlePage.getRotation());
|
||||
// remove top line
|
||||
midPageLines.remove(0);
|
||||
@ -277,10 +288,12 @@ public class LayoutGridService {
|
||||
}
|
||||
|
||||
|
||||
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid) {
|
||||
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(lastPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(lastPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var lastPageLines = createLinesFromRectangle(r, lastPage.getRotation());
|
||||
// remove top line
|
||||
lastPageLines.remove(0);
|
||||
@ -293,10 +306,12 @@ public class LayoutGridService {
|
||||
}
|
||||
|
||||
|
||||
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid) {
|
||||
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid, float lineWidthModifier) {
|
||||
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH);
|
||||
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
|
||||
.get(firstPage.getNumber() - 1).getColoredLines();
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
var firstPageLines = createLinesFromRectangle(r, firstPage.getRotation());
|
||||
// remove bottom line
|
||||
firstPageLines.remove(2);
|
||||
@ -366,7 +381,10 @@ public class LayoutGridService {
|
||||
|
||||
private static boolean isNotSectionOrTableCellOrDocument(SemanticNode semanticNode) {
|
||||
|
||||
return !(semanticNode.getType().equals(NodeType.DOCUMENT) || semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.TABLE_CELL));
|
||||
return !(semanticNode.getType().equals(NodeType.DOCUMENT)
|
||||
|| semanticNode.getType().equals(NodeType.SECTION)
|
||||
|| semanticNode.getType().equals(NodeType.SUPER_SECTION)
|
||||
|| semanticNode.getType().equals(NodeType.TABLE_CELL));
|
||||
}
|
||||
|
||||
|
||||
@ -374,9 +392,7 @@ public class LayoutGridService {
|
||||
|
||||
semanticNode.getBBox()
|
||||
.forEach((page, textBBox) -> layoutGrid.getVisualizationsPerPages()
|
||||
.get(page.getNumber() - 1)
|
||||
.getColoredRectangles()
|
||||
.add(new ColoredRectangle(textBBox, color, LINE_WIDTH)));
|
||||
.get(page.getNumber() - 1).getColoredRectangles().add(new ColoredRectangle(textBBox, color, LINE_WIDTH)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/new/crafted_outline_test_doc.pdf";
|
||||
String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
|
||||
@ -114,7 +114,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile());
|
||||
|
||||
assertThat(classificationDocument.getHeaders()
|
||||
.get(0).getTextBlocks().size()).isEqualTo(2);
|
||||
.get(0).getTextBlocks().size()).isEqualTo(3);
|
||||
assertThat(classificationDocument.getHeaders()
|
||||
.get(0).getTextBlocks()
|
||||
.get(0).getSequences().size()).isEqualTo(8);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user