RED-7080: Remove all watermarks that are named as watermarks in OCG

This commit is contained in:
deiflaender 2023-08-14 15:53:54 +02:00
parent ac92602c32
commit 9ce48f181c
3 changed files with 150 additions and 33 deletions

View File

@ -0,0 +1,140 @@
package com.iqser.red.pdftronlogic.commons;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.*;
import com.pdftron.pdf.ocg.Group;
import com.pdftron.pdf.ocg.OCMD;
import com.pdftron.sdf.Obj;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import java.util.Set;
import java.util.TreeSet;
@Slf4j
@UtilityClass
public class OCGWatermarkRemovalService {
@SneakyThrows
public void removeWatermarks(PDFDoc pdfDoc) {
if (hasOCGWatermarks(pdfDoc)) {
removeOCGWatermarks(pdfDoc);
}
}
@SneakyThrows
private boolean hasOCGWatermarks(PDFDoc pdfDoc) {
Obj ocgs = pdfDoc.getOCGs();
for (int i = 0; i < ocgs.size(); i++) {
Group group = new Group(ocgs.getAt(i));
if (group.isValid() && group.getName().equals("Watermark")) {
return true;
}
}
return false;
}
@SneakyThrows
private void removeOCGWatermarks(PDFDoc pdfDoc) {
ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter();
Set<Long> visitedXObjIds = new TreeSet<>();
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
Page page = iterator.next();
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
}
reader.destroy();
writer.destroy();
}
@SneakyThrows
private void writeAllElementsExceptWatermarks(Page page,
ElementReader reader,
ElementWriter writer,
Set<Long> visitedXObjIds) {
reader.begin(page);
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
processElements(page, reader, writer, visitedXObjIds);
writer.end();
reader.end();
}
private void processElements(Page page,
ElementReader reader,
ElementWriter writer,
Set<Long> visitedXObjIds) throws PDFNetException {
for (Element element = reader.next(); element != null; element = reader.next()) {
if (inOCGWatermark(element)) {
continue;
}
switch (element.getType()) {
case Element.e_form -> processForms(page, element, reader, writer, visitedXObjIds);
default -> writer.writeElement(element);
}
}
}
@SneakyThrows
private boolean inOCGWatermark(Element element) {
var xObj = element.getXObject();
if (xObj != null) {
Obj oc = xObj.findObj("OC");
if (oc != null) {
OCMD ocmd = new OCMD(oc);
if (ocmd.isValid()) {
Group group = new Group(ocmd.getOCGs());
if (group.isValid() && group.getName().equals("Watermark")) {
return true;
}
}
}
}
return false;
}
@SneakyThrows
private void processForms(Page page,
Element element,
ElementReader reader,
ElementWriter writer,
Set<Long> visitedXObjIds) {
writer.writeElement(element);
if (!visitedXObjIds.contains(element.getXObject().getObjNum())) {
visitedXObjIds.add(element.getXObject().getObjNum());
// writer needs to be newly initialized when entering a new content stream
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
ElementWriter formWriter = new ElementWriter();
reader.formBegin();
formWriter.begin(element.getXObject());
reader.clearChangeList();
formWriter.setDefaultGState(reader);
processElements(page, reader, formWriter, visitedXObjIds);
formWriter.end();
formWriter.destroy();
reader.end();
}
}
}

View File

@ -3,8 +3,6 @@ package com.iqser.red.pdftronlogic.commons;
import com.pdftron.common.PDFNetException;
import com.pdftron.pdf.*;
import com.pdftron.pdf.ocg.Group;
import com.pdftron.pdf.ocg.OCMD;
import com.pdftron.sdf.Obj;
import com.pdftron.sdf.SDFDoc;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
@ -36,6 +34,8 @@ public class WatermarkRemovalService {
PDFDoc pdfDoc = new PDFDoc(pdfFile);
OCGWatermarkRemovalService.removeWatermarks(pdfDoc);
if (pdfDoc.getPageCount() < MIN_PAGES_THRESHOLD) {
log.info("Document page count {} is below threshold {}", pdfDoc.getPageCount(), MIN_PAGES_THRESHOLD);
} else {
@ -43,7 +43,8 @@ public class WatermarkRemovalService {
List<ElementFeatures> watermarkElementFeatures = filterSameFormObjectsOccuringOnMostPages(formObjectsForPages);
if (watermarkElementFeatures.size() > 0) {
Group group = new Group(pdfDoc.getOCGs());
if (watermarkElementFeatures.size() > 0 || group.isValid() && group.getName().equals("Watermark")) {
log.info("Watermark found and will be removed!");
removeAllWatermarks(pdfDoc, watermarkElementFeatures);
} else {
@ -211,9 +212,6 @@ public class WatermarkRemovalService {
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
for (Element element = reader.next(); element != null; element = reader.next()) {
if (inOCGWatermark(element)) {
continue;
}
switch (element.getType()) {
case Element.e_image, Element.e_inline_image -> {
@ -235,25 +233,6 @@ public class WatermarkRemovalService {
}
@SneakyThrows
private boolean inOCGWatermark(Element element) {
var xObj = element.getXObject();
if (xObj != null) {
Obj oc = xObj.findObj("OC");
if (oc != null) {
OCMD ocmd = new OCMD(oc);
if (ocmd.isValid()) {
Group group = new Group(ocmd.getOCGs());
if (group.isValid() && group.getName().equals("Watermark")) {
return true;
}
}
}
}
return false;
}
@SneakyThrows
private void removeImages(Element element, ElementWriter
writer, List<ElementFeatures> watermarksElementFeaturesList) {

View File

@ -1,16 +1,14 @@
package com.iqser.red.pdftronlogic.commons;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.util.Locale;
import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import com.pdftron.pdf.PDFNet;
import lombok.SneakyThrows;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.util.Locale;
@Disabled
class WatermarkRemovalServiceTest {
@ -23,7 +21,7 @@ class WatermarkRemovalServiceTest {
WatermarkRemovalService watermarkRemovalService = new WatermarkRemovalService();
String filename = "files/18_TiltPlus_IrritacaoOcularAguda.pdf";
String filename = "files/1.A16148F - Toxicidade oral aguda (1).pdf";
String tmpFilename = createTmpFileName(filename, "WATERMARK_REMOVAL");
try (var in = this.getClass().getClassLoader().getResourceAsStream(filename); var out = new FileOutputStream(tmpFilename)) {