Merge branch 'main' into RED-9206

This commit is contained in:
Corina Olariu 2024-06-05 13:34:14 +03:00
commit 5f5a6258c5
4 changed files with 41 additions and 24 deletions

View File

@ -26,6 +26,7 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class DocstrumBlockificationService {
public static final float Y_THRESHOLD = 5f;
private final DocstrumSegmentationService docstrumSegmentationService;
static final float THRESHOLD = 1f;
@ -163,7 +164,7 @@ public class DocstrumBlockificationService {
previous = current;
}
mergeIntersectingBlocks(page, usedRulings, 0, 6.5f);
mergeIntersectingBlocks(page, usedRulings, 0, Y_THRESHOLD);
}

View File

@ -165,8 +165,10 @@ public class LayoutGridService {
List<Double> ys = yStream.collect(Collectors.toList());
ys.remove(0);
Rectangle2D tableBBox = table.getBBox().get(page);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getColoredLines();
Rectangle2D tableBBox = table.getBBox()
.get(page);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
.get(page.getNumber() - 1).getColoredLines();
xs.forEach(x -> {
Line2D line = new Line2D.Double(new Point2D.Double(x, tableBBox.getMaxY()), new Point2D.Double(x, tableBBox.getMinY()));
coloredLines.add(new ColoredLine(line, INNER_LINES_COLOR, LINE_WIDTH));
@ -192,6 +194,11 @@ public class LayoutGridService {
Map<Page, Rectangle2D> bBoxMap = semanticNode.getBBox();
List<SemanticNode> subSections = semanticNode.streamAllSubNodesOfType(NodeType.SECTION)
.toList();
float lineWidthModifier = (subSections.stream()
.map(node -> node.getTreeId().size())
.max(Integer::compareTo)
.orElse(semanticNode.getTreeId().size()) - semanticNode.getTreeId().size());
Page firstPage = semanticNode.getFirstPage();
String treeIdString = buildTreeIdString(semanticNode);
if (!subSections.isEmpty()) {
@ -200,8 +207,9 @@ public class LayoutGridService {
bBoxMap.forEach(((page, textBBox) -> addPlacedText(page, textBBox, treeIdString, layoutGrid)));
}
if (bBoxMap.values().size() == 1) {
Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH, LINE_WIDTH);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(bBoxMap.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
.get(firstPage.getNumber() - 1).getColoredLines();
List<Line2D> lines = createLinesFromRectangle(r, firstPage.getRotation());
// add string to top line
var firstLine = lines.remove(0);
@ -216,11 +224,11 @@ public class LayoutGridService {
.sorted(Comparator.comparingInt(Page::getNumber))
.collect(Collectors.toList());
pagesInOrder.remove(0);
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid);
addLinesForFirstPageOfSection(semanticNode, color, firstPage, layoutGrid, lineWidthModifier);
var lastPage = pagesInOrder.remove(pagesInOrder.size() - 1);
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid);
addLinesForLastPageOfSection(semanticNode, color, lastPage, layoutGrid, lineWidthModifier);
for (Page middlePage : pagesInOrder) {
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid);
addLinesForMiddlePageOfSection(semanticNode, color, middlePage, layoutGrid, lineWidthModifier);
}
}
@ -254,15 +262,18 @@ public class LayoutGridService {
upperLeftCorner = add(upperLeftCorner, translationVector);
var placedTexts = layoutGrid.getVisualizationsPerPages().get(page.getNumber() - 1).getPlacedTexts();
var placedTexts = layoutGrid.getVisualizationsPerPages()
.get(page.getNumber() - 1).getPlacedTexts();
placedTexts.add(PlacedText.textFacingUp(s, upperLeftCorner, FONT_SIZE, Color.BLACK, FONT));
}
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid) {
private void addLinesForMiddlePageOfSection(SemanticNode semanticNode, Color color, Page middlePage, LayoutGrid layoutGrid, float lineWidthModifier) {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(middlePage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(middlePage), LINE_WIDTH, LINE_WIDTH);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
.get(middlePage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
.get(middlePage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
var midPageLines = createLinesFromRectangle(r, middlePage.getRotation());
// remove top line
midPageLines.remove(0);
@ -277,10 +288,12 @@ public class LayoutGridService {
}
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid) {
private void addLinesForLastPageOfSection(SemanticNode semanticNode, Color color, Page lastPage, LayoutGrid layoutGrid, float lineWidthModifier) {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(lastPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(lastPage), LINE_WIDTH, LINE_WIDTH);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
.get(lastPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
.get(lastPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
var lastPageLines = createLinesFromRectangle(r, lastPage.getRotation());
// remove top line
lastPageLines.remove(0);
@ -293,10 +306,12 @@ public class LayoutGridService {
}
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid) {
private void addLinesForFirstPageOfSection(SemanticNode semanticNode, Color color, Page firstPage, LayoutGrid layoutGrid, float lineWidthModifier) {
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages().get(firstPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(firstPage), LINE_WIDTH, LINE_WIDTH);
List<ColoredLine> coloredLines = layoutGrid.getVisualizationsPerPages()
.get(firstPage.getNumber() - 1).getColoredLines();
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
.get(firstPage), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
var firstPageLines = createLinesFromRectangle(r, firstPage.getRotation());
// remove bottom line
firstPageLines.remove(2);
@ -366,7 +381,10 @@ public class LayoutGridService {
private static boolean isNotSectionOrTableCellOrDocument(SemanticNode semanticNode) {
return !(semanticNode.getType().equals(NodeType.DOCUMENT) || semanticNode.getType().equals(NodeType.SECTION) || semanticNode.getType().equals(NodeType.TABLE_CELL));
return !(semanticNode.getType().equals(NodeType.DOCUMENT)
|| semanticNode.getType().equals(NodeType.SECTION)
|| semanticNode.getType().equals(NodeType.SUPER_SECTION)
|| semanticNode.getType().equals(NodeType.TABLE_CELL));
}
@ -374,9 +392,7 @@ public class LayoutGridService {
semanticNode.getBBox()
.forEach((page, textBBox) -> layoutGrid.getVisualizationsPerPages()
.get(page.getNumber() - 1)
.getColoredRectangles()
.add(new ColoredRectangle(textBBox, color, LINE_WIDTH)));
.get(page.getNumber() - 1).getColoredRectangles().add(new ColoredRectangle(textBBox, color, LINE_WIDTH)));
}

View File

@ -31,7 +31,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
@SneakyThrows
public void testViewerDocument() {
String fileName = "files/new/crafted_outline_test_doc.pdf";
String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";

View File

@ -114,7 +114,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile());
assertThat(classificationDocument.getHeaders()
.get(0).getTextBlocks().size()).isEqualTo(2);
.get(0).getTextBlocks().size()).isEqualTo(3);
assertThat(classificationDocument.getHeaders()
.get(0).getTextBlocks()
.get(0).getSequences().size()).isEqualTo(8);