diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalService.java index 7f2a190..9f32d3c 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalService.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/ImagePositionRetrievalService.java @@ -43,20 +43,22 @@ public class ImagePositionRetrievalService { Map pageIdToImagePositions = new HashMap<>(); ElementReader reader = new ElementReader(); for (int pageId = 1; pageId <= pdfDoc.getPageCount(); ++pageId) { - reader.begin(pdfDoc.getPage(pageId)); RectCollection imagePositions = new RectCollection(); - processElements(reader, imagePositions, pdfDoc.getPage(pageId), mirrorY); + reader.begin(pdfDoc.getPage(pageId)); + + findImagePositionsOnPage(reader, imagePositions, pdfDoc.getPage(pageId), mirrorY); imagePositions = mergeOverlappingRects(imagePositions); + + reader.end(); if (imagePositions.getNumRects() > 0) { pageIdToImagePositions.put(pageId, imagePositions); } - reader.end(); } return pageIdToImagePositions; } - private void processElements(ElementReader reader, RectCollection imagePositions, Page currentPage, boolean mirrorY) throws PDFNetException { + private void findImagePositionsOnPage(ElementReader reader, RectCollection imagePositions, Page currentPage, boolean mirrorY) throws PDFNetException { Element element; while ((element = reader.next()) != null) { @@ -64,10 +66,9 @@ public class ImagePositionRetrievalService { case Element.e_image, Element.e_inline_image -> imagePositions.addRect(toRotationAdjustedRect(element.getBBox(), currentPage, mirrorY)); case Element.e_form -> { reader.formBegin(); - processElements(reader, imagePositions, currentPage, mirrorY); + findImagePositionsOnPage(reader, imagePositions, currentPage, mirrorY); reader.end(); } - } } } @@ -76,28 +77,31 @@ public class ImagePositionRetrievalService { @SneakyThrows public RectCollection mergeOverlappingRects(RectCollection imagePositions) { - if (imagePositions.getNumRects() < 2) { + if (imagePositions.getNumRects() == 1) { return imagePositions; } + List rectangleList = toSortedRectangleList(imagePositions); rectangleList = mergeRectangleListRecursive(rectangleList, 0); - return toRectCollection(rectangleList); } - // Sometimes images are split up into stripes, here we try to merge the positions into one larger rectangle + // Sometimes images are split up into stripes, here we merge the positions of aligned and intersecting rectangles into one larger rectangle private List mergeRectangleListRecursive(List rectangleList, int currentIdx) { if (rectangleList.size() < currentIdx + 2) { return rectangleList; } + var rect1 = rectangleList.get(currentIdx); var rect2 = rectangleList.get(currentIdx + 1); + boolean isAlignedX = Math.abs(rect1.getMinX() - rect2.getMinX()) < TOLERANCE && Math.abs(rect1.getMaxX() - rect2.getMaxX()) < TOLERANCE; boolean isAlignedY = Math.abs(rect1.getMinY() - rect2.getMinY()) < TOLERANCE && Math.abs(rect1.getMaxY() - rect2.getMaxY()) < TOLERANCE; - boolean intersects = rect1.intersects(rect2.getMinX() - TOLERANCE, rect2.getMinY() - TOLERANCE, rect2.getWidth() + 2 * TOLERANCE, rect2.getHeight() + 2 * TOLERANCE); + boolean intersects = rect1.intersects(rect2.getMinX() - TOLERANCE, rect2.getMinY() - TOLERANCE, rect2.getWidth() + (2 * TOLERANCE), rect2.getHeight() + (2 * TOLERANCE)); + if (intersects && (isAlignedX || isAlignedY)) { rectangleList.remove(currentIdx + 1); rectangleList.remove(currentIdx); @@ -114,26 +118,29 @@ public class ImagePositionRetrievalService { int rotation = page.getRotation(); double height = page.getPageHeight(); double width = page.getPageWidth(); - //Even though the getBBox() method returns coordinates with (0,0) in the lower left corner, the OCRModule's addTextZonesForPage() wants to have its coordinates with (0,0) in the upper left corner + + // Even though PDFTron almost always has the origin in the lower left corner, for some reason, the OCRModule's addTextZonesForPage() uses the upper left corner as origin... Matrix2D mirrorMatrix; if (mirrorY) { mirrorMatrix = new Matrix2D(1, 0, 0, -1, 0, height); } else { mirrorMatrix = new Matrix2D(); } + // We need to rotate the rects to fit to the page rotation Matrix2D rotationMatrix = switch (rotation) { case 1 -> new Matrix2D(0, -1, 1, 0, 0, height); case 2 -> new Matrix2D(-1, 0, 0, -1, width, height); case 3 -> new Matrix2D(0, 1, -1, 0, width, 0); - default -> new Matrix2D(1, 0, 0, 1, 0, 0); + default -> new Matrix2D(); }; + Matrix2D finalMatrix = mirrorMatrix.multiply(rotationMatrix); Point2D.Double p1 = finalMatrix.multPoint(bbox.getX1(), bbox.getY1()); Point2D.Double p2 = finalMatrix.multPoint(bbox.getX2(), bbox.getY2()); - //PDFTron Rect needs lower left and upper right coordinates to calculate width and height correctly + // PDFTron Rect *needs* lower left and upper right coordinates to calculate width and height correctly, even though the documentation states otherwise Point2D.Double lowerLeft = new Point2D.Double(Math.min(p1.x, p2.x), Math.min(p1.y, p2.y)); Point2D.Double upperRight = new Point2D.Double(Math.max(p1.x, p2.x), Math.max(p1.y, p2.y)); diff --git a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java index a90ade4..49e7f33 100644 --- a/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java +++ b/ocr-service-v1/ocr-service-server-v1/src/main/java/com/iqser/red/service/ocr/v1/server/service/OCRService.java @@ -112,12 +112,15 @@ public class OCRService { .numberOfPagesToOCR(pageIdToRectCollection.size()) .numberOfOCRedPages(numProcessedPages) .build())); + } catch (PDFNetException e) { log.error("failed to process page {}", pageId); throw new RuntimeException(e); } } + ocrPageDoc.close(); + rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE, objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder() .fileId(fileId)