From 9ce48f181ca5dceb20e671a2545672d61e5789c8 Mon Sep 17 00:00:00 2001 From: deiflaender Date: Mon, 14 Aug 2023 15:53:54 +0200 Subject: [PATCH] RED-7080: Remove all watermarks that are named as watermarks in OCG --- .../commons/OCGWatermarkRemovalService.java | 140 ++++++++++++++++++ .../commons/WatermarkRemovalService.java | 29 +--- .../commons/WatermarkRemovalServiceTest.java | 14 +- 3 files changed, 150 insertions(+), 33 deletions(-) create mode 100644 src/main/java/com/iqser/red/pdftronlogic/commons/OCGWatermarkRemovalService.java diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/OCGWatermarkRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/OCGWatermarkRemovalService.java new file mode 100644 index 0000000..85b2b59 --- /dev/null +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/OCGWatermarkRemovalService.java @@ -0,0 +1,140 @@ +package com.iqser.red.pdftronlogic.commons; + +import com.pdftron.common.PDFNetException; +import com.pdftron.pdf.*; +import com.pdftron.pdf.ocg.Group; +import com.pdftron.pdf.ocg.OCMD; +import com.pdftron.sdf.Obj; +import lombok.SneakyThrows; +import lombok.experimental.UtilityClass; +import lombok.extern.slf4j.Slf4j; + +import java.util.Set; +import java.util.TreeSet; + +@Slf4j +@UtilityClass +public class OCGWatermarkRemovalService { + + @SneakyThrows + public void removeWatermarks(PDFDoc pdfDoc) { + + if (hasOCGWatermarks(pdfDoc)) { + removeOCGWatermarks(pdfDoc); + } + } + + + @SneakyThrows + private boolean hasOCGWatermarks(PDFDoc pdfDoc) { + Obj ocgs = pdfDoc.getOCGs(); + for (int i = 0; i < ocgs.size(); i++) { + Group group = new Group(ocgs.getAt(i)); + if (group.isValid() && group.getName().equals("Watermark")) { + return true; + } + } + return false; + } + + + @SneakyThrows + private void removeOCGWatermarks(PDFDoc pdfDoc) { + + ElementReader reader = new ElementReader(); + ElementWriter writer = new ElementWriter(); + Set visitedXObjIds = new TreeSet<>(); + + for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) { + + Page page = iterator.next(); + writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds); + } + + reader.destroy(); + writer.destroy(); + } + + + @SneakyThrows + private void writeAllElementsExceptWatermarks(Page page, + ElementReader reader, + ElementWriter writer, + Set visitedXObjIds) { + + reader.begin(page); + writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict()); + processElements(page, reader, writer, visitedXObjIds); + writer.end(); + reader.end(); + } + + + private void processElements(Page page, + ElementReader reader, + ElementWriter writer, + Set visitedXObjIds) throws PDFNetException { + + for (Element element = reader.next(); element != null; element = reader.next()) { + + if (inOCGWatermark(element)) { + continue; + } + + switch (element.getType()) { + case Element.e_form -> processForms(page, element, reader, writer, visitedXObjIds); + default -> writer.writeElement(element); + } + } + } + + + @SneakyThrows + private boolean inOCGWatermark(Element element) { + var xObj = element.getXObject(); + if (xObj != null) { + Obj oc = xObj.findObj("OC"); + if (oc != null) { + OCMD ocmd = new OCMD(oc); + if (ocmd.isValid()) { + Group group = new Group(ocmd.getOCGs()); + if (group.isValid() && group.getName().equals("Watermark")) { + return true; + } + } + } + } + return false; + } + + + @SneakyThrows + private void processForms(Page page, + Element element, + ElementReader reader, + ElementWriter writer, + Set visitedXObjIds) { + + + writer.writeElement(element); + + if (!visitedXObjIds.contains(element.getXObject().getObjNum())) { + visitedXObjIds.add(element.getXObject().getObjNum()); + // writer needs to be newly initialized when entering a new content stream + // see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest) + ElementWriter formWriter = new ElementWriter(); + reader.formBegin(); + formWriter.begin(element.getXObject()); + + reader.clearChangeList(); + formWriter.setDefaultGState(reader); + + processElements(page, reader, formWriter, visitedXObjIds); + formWriter.end(); + formWriter.destroy(); + reader.end(); + } + + } + +} diff --git a/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java b/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java index 7aab0c4..ce926ae 100644 --- a/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java +++ b/src/main/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalService.java @@ -3,8 +3,6 @@ package com.iqser.red.pdftronlogic.commons; import com.pdftron.common.PDFNetException; import com.pdftron.pdf.*; import com.pdftron.pdf.ocg.Group; -import com.pdftron.pdf.ocg.OCMD; -import com.pdftron.sdf.Obj; import com.pdftron.sdf.SDFDoc; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -36,6 +34,8 @@ public class WatermarkRemovalService { PDFDoc pdfDoc = new PDFDoc(pdfFile); + OCGWatermarkRemovalService.removeWatermarks(pdfDoc); + if (pdfDoc.getPageCount() < MIN_PAGES_THRESHOLD) { log.info("Document page count {} is below threshold {}", pdfDoc.getPageCount(), MIN_PAGES_THRESHOLD); } else { @@ -43,7 +43,8 @@ public class WatermarkRemovalService { List watermarkElementFeatures = filterSameFormObjectsOccuringOnMostPages(formObjectsForPages); - if (watermarkElementFeatures.size() > 0) { + Group group = new Group(pdfDoc.getOCGs()); + if (watermarkElementFeatures.size() > 0 || group.isValid() && group.getName().equals("Watermark")) { log.info("Watermark found and will be removed!"); removeAllWatermarks(pdfDoc, watermarkElementFeatures); } else { @@ -211,9 +212,6 @@ public class WatermarkRemovalService { double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth(); for (Element element = reader.next(); element != null; element = reader.next()) { - if (inOCGWatermark(element)) { - continue; - } switch (element.getType()) { case Element.e_image, Element.e_inline_image -> { @@ -235,25 +233,6 @@ public class WatermarkRemovalService { } - @SneakyThrows - private boolean inOCGWatermark(Element element) { - var xObj = element.getXObject(); - if (xObj != null) { - Obj oc = xObj.findObj("OC"); - if (oc != null) { - OCMD ocmd = new OCMD(oc); - if (ocmd.isValid()) { - Group group = new Group(ocmd.getOCGs()); - if (group.isValid() && group.getName().equals("Watermark")) { - return true; - } - } - } - } - return false; - } - - @SneakyThrows private void removeImages(Element element, ElementWriter writer, List watermarksElementFeaturesList) { diff --git a/src/test/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalServiceTest.java b/src/test/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalServiceTest.java index 7f54c9c..5e91d8d 100644 --- a/src/test/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalServiceTest.java +++ b/src/test/java/com/iqser/red/pdftronlogic/commons/WatermarkRemovalServiceTest.java @@ -1,16 +1,14 @@ package com.iqser.red.pdftronlogic.commons; -import java.io.FileOutputStream; -import java.nio.file.Path; -import java.util.Locale; - +import com.pdftron.pdf.PDFNet; +import lombok.SneakyThrows; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.platform.commons.util.StringUtils; -import com.pdftron.pdf.PDFNet; - -import lombok.SneakyThrows; +import java.io.FileOutputStream; +import java.nio.file.Path; +import java.util.Locale; @Disabled class WatermarkRemovalServiceTest { @@ -23,7 +21,7 @@ class WatermarkRemovalServiceTest { WatermarkRemovalService watermarkRemovalService = new WatermarkRemovalService(); - String filename = "files/18_TiltPlus_IrritacaoOcularAguda.pdf"; + String filename = "files/1.A16148F - Toxicidade oral aguda (1).pdf"; String tmpFilename = createTmpFileName(filename, "WATERMARK_REMOVAL"); try (var in = this.getClass().getClassLoader().getResourceAsStream(filename); var out = new FileOutputStream(tmpFilename)) {