RED-7080: Remove all watermarks that are named as watermarks in OCG
This commit is contained in:
parent
ac92602c32
commit
9ce48f181c
@ -0,0 +1,140 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.*;
|
||||
import com.pdftron.pdf.ocg.Group;
|
||||
import com.pdftron.pdf.ocg.OCMD;
|
||||
import com.pdftron.sdf.Obj;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.UtilityClass;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
@Slf4j
|
||||
@UtilityClass
|
||||
public class OCGWatermarkRemovalService {
|
||||
|
||||
@SneakyThrows
|
||||
public void removeWatermarks(PDFDoc pdfDoc) {
|
||||
|
||||
if (hasOCGWatermarks(pdfDoc)) {
|
||||
removeOCGWatermarks(pdfDoc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean hasOCGWatermarks(PDFDoc pdfDoc) {
|
||||
Obj ocgs = pdfDoc.getOCGs();
|
||||
for (int i = 0; i < ocgs.size(); i++) {
|
||||
Group group = new Group(ocgs.getAt(i));
|
||||
if (group.isValid() && group.getName().equals("Watermark")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void removeOCGWatermarks(PDFDoc pdfDoc) {
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
ElementWriter writer = new ElementWriter();
|
||||
Set<Long> visitedXObjIds = new TreeSet<>();
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
writeAllElementsExceptWatermarks(page, reader, writer, visitedXObjIds);
|
||||
}
|
||||
|
||||
reader.destroy();
|
||||
writer.destroy();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void writeAllElementsExceptWatermarks(Page page,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
Set<Long> visitedXObjIds) {
|
||||
|
||||
reader.begin(page);
|
||||
writer.begin(page, ElementWriter.e_replacement, false, true, page.getResourceDict());
|
||||
processElements(page, reader, writer, visitedXObjIds);
|
||||
writer.end();
|
||||
reader.end();
|
||||
}
|
||||
|
||||
|
||||
private void processElements(Page page,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
Set<Long> visitedXObjIds) throws PDFNetException {
|
||||
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
|
||||
if (inOCGWatermark(element)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (element.getType()) {
|
||||
case Element.e_form -> processForms(page, element, reader, writer, visitedXObjIds);
|
||||
default -> writer.writeElement(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean inOCGWatermark(Element element) {
|
||||
var xObj = element.getXObject();
|
||||
if (xObj != null) {
|
||||
Obj oc = xObj.findObj("OC");
|
||||
if (oc != null) {
|
||||
OCMD ocmd = new OCMD(oc);
|
||||
if (ocmd.isValid()) {
|
||||
Group group = new Group(ocmd.getOCGs());
|
||||
if (group.isValid() && group.getName().equals("Watermark")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processForms(Page page,
|
||||
Element element,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
Set<Long> visitedXObjIds) {
|
||||
|
||||
|
||||
writer.writeElement(element);
|
||||
|
||||
if (!visitedXObjIds.contains(element.getXObject().getObjNum())) {
|
||||
visitedXObjIds.add(element.getXObject().getObjNum());
|
||||
// writer needs to be newly initialized when entering a new content stream
|
||||
// see ElementEditTest in PDFTron (https://www.pdftron.com/documentation/samples/android/java/ElementEditTest)
|
||||
ElementWriter formWriter = new ElementWriter();
|
||||
reader.formBegin();
|
||||
formWriter.begin(element.getXObject());
|
||||
|
||||
reader.clearChangeList();
|
||||
formWriter.setDefaultGState(reader);
|
||||
|
||||
processElements(page, reader, formWriter, visitedXObjIds);
|
||||
formWriter.end();
|
||||
formWriter.destroy();
|
||||
reader.end();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -3,8 +3,6 @@ package com.iqser.red.pdftronlogic.commons;
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.*;
|
||||
import com.pdftron.pdf.ocg.Group;
|
||||
import com.pdftron.pdf.ocg.OCMD;
|
||||
import com.pdftron.sdf.Obj;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -36,6 +34,8 @@ public class WatermarkRemovalService {
|
||||
|
||||
PDFDoc pdfDoc = new PDFDoc(pdfFile);
|
||||
|
||||
OCGWatermarkRemovalService.removeWatermarks(pdfDoc);
|
||||
|
||||
if (pdfDoc.getPageCount() < MIN_PAGES_THRESHOLD) {
|
||||
log.info("Document page count {} is below threshold {}", pdfDoc.getPageCount(), MIN_PAGES_THRESHOLD);
|
||||
} else {
|
||||
@ -43,7 +43,8 @@ public class WatermarkRemovalService {
|
||||
|
||||
List<ElementFeatures> watermarkElementFeatures = filterSameFormObjectsOccuringOnMostPages(formObjectsForPages);
|
||||
|
||||
if (watermarkElementFeatures.size() > 0) {
|
||||
Group group = new Group(pdfDoc.getOCGs());
|
||||
if (watermarkElementFeatures.size() > 0 || group.isValid() && group.getName().equals("Watermark")) {
|
||||
log.info("Watermark found and will be removed!");
|
||||
removeAllWatermarks(pdfDoc, watermarkElementFeatures);
|
||||
} else {
|
||||
@ -211,9 +212,6 @@ public class WatermarkRemovalService {
|
||||
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
|
||||
if (inOCGWatermark(element)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (element.getType()) {
|
||||
case Element.e_image, Element.e_inline_image -> {
|
||||
@ -235,25 +233,6 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean inOCGWatermark(Element element) {
|
||||
var xObj = element.getXObject();
|
||||
if (xObj != null) {
|
||||
Obj oc = xObj.findObj("OC");
|
||||
if (oc != null) {
|
||||
OCMD ocmd = new OCMD(oc);
|
||||
if (ocmd.isValid()) {
|
||||
Group group = new Group(ocmd.getOCGs());
|
||||
if (group.isValid() && group.getName().equals("Watermark")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void removeImages(Element element, ElementWriter
|
||||
writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Locale;
|
||||
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.platform.commons.util.StringUtils;
|
||||
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Locale;
|
||||
|
||||
@Disabled
|
||||
class WatermarkRemovalServiceTest {
|
||||
@ -23,7 +21,7 @@ class WatermarkRemovalServiceTest {
|
||||
|
||||
WatermarkRemovalService watermarkRemovalService = new WatermarkRemovalService();
|
||||
|
||||
String filename = "files/18_TiltPlus_IrritacaoOcularAguda.pdf";
|
||||
String filename = "files/1.A16148F - Toxicidade oral aguda (1).pdf";
|
||||
|
||||
String tmpFilename = createTmpFileName(filename, "WATERMARK_REMOVAL");
|
||||
try (var in = this.getClass().getClassLoader().getResourceAsStream(filename); var out = new FileOutputStream(tmpFilename)) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user