From 439499143244b17941cd768e70476174d3333361 Mon Sep 17 00:00:00 2001 From: RaphaelArnold Date: Thu, 31 Aug 2023 15:27:38 +0200 Subject: [PATCH] RED-7075: Some Styling improvements --- .../commons/WatermarkRemovalService.java | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java index 0a42f2f..2d8e6a2 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java @@ -36,7 +36,7 @@ public class WatermarkRemovalService { final static double TEXT_POSITION_THRESHOLD = 0.15; - final static double MIN_TEXTWATERMAK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height + final static double MIN_TEXTWATERMARK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height final static int PAGE_NUMBER_TEXT_SEARCH_THRESHOLD = 5; // stop text based search after 5 pages without watermark final static double ROTATED_TEXT_THRESHOLD = 12.5; //this is in degrees @@ -126,31 +126,12 @@ public class WatermarkRemovalService { return; } - if (element.getType() == Element.e_form) { - if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) { - return; - } - processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page); - } else if (element.getType() == Element.e_image || element.getType() == Element.e_inline_image) { - if (element.getXObject() == null) { - return; - } - if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) { - boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox() - .getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox() - .getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox() - .getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox() - .getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD; - - if (isLocatedNearBorder) { - return; - } - } - - processImages(element, elementFeaturesLinkedList); - } else if (element.getType() == Element.e_text) { - processText(element, elementFeaturesLinkedList, page); + switch (element.getType()) { + case Element.e_form -> processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page); + case Element.e_image, Element.e_inline_image -> processImages(element, elementFeaturesLinkedList, page, minAreaCoveringPage); + case Element.e_text -> processText(element, elementFeaturesLinkedList, page); } + } @@ -174,7 +155,7 @@ public class WatermarkRemovalService { return; } - boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMAK_HEIGHT_THRESHOLD; + boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMARK_HEIGHT_THRESHOLD; if (isBigEnough) { ElementFeatures elementFeatures = ElementFeatureFactory.extractFeatures(element); @@ -199,7 +180,14 @@ public class WatermarkRemovalService { @SneakyThrows - private void processImages(Element element, List elementFeaturesLinkedList) { + private void processImages(Element element, List elementFeaturesLinkedList, Page page, double minAreaCoveringPage) { + + if (element.getXObject() == null) { + return; + } + if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage && isLocatedNearBorder(element, page)) { + return; + } String hashOfImage = ImageHashFactory.calculate(element); ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage); @@ -207,6 +195,16 @@ public class WatermarkRemovalService { } + @SneakyThrows + private boolean isLocatedNearBorder(Element element, Page page) { + + return element.getBBox().getY1() < page.getVisibleContentBox().getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox() + .getY2() > page.getVisibleContentBox().getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox() + .getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox() + .getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD; + } + + @SneakyThrows private void processXObject(Element element, Set visitedXObjIds, @@ -215,6 +213,10 @@ public class WatermarkRemovalService { double minAreaCoveringPage, Page page) { + if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) { + return; + } + if (visitedXObjIds.add(element.getXObject().getObjNum())) { ElementReader xObjectReader = new ElementReader(); xObjectReader.begin(element.getXObject()); @@ -301,13 +303,8 @@ public class WatermarkRemovalService { continue; } if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage) { - boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox() - .getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox() - .getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox() - .getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox() - .getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD; - - if (isLocatedNearBorder && element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage || element.getXObject() == null) { + if (isLocatedNearBorder(element, page) && element.getBBox().getHeight() * element.getBBox() + .getWidth() < minAreaCoveringFromPage || element.getXObject() == null) { writer.writeElement(element); continue; }