Merge branch 'RED-7075' into 'master'
RED-7075: Improved watermark removal to recognize smaller images and text Closes RED-7075 See merge request redactmanager/commons/pdftron-logic-commons!15
This commit is contained in:
commit
612bb5a63a
@ -3,7 +3,6 @@ package com.iqser.red.pdftronlogic.commons;
|
||||
import static com.iqser.red.pdftronlogic.commons.InvisibleElementRemovalService.TOLERANCE;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.Area;
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
|
||||
@ -23,6 +22,9 @@ import lombok.experimental.SuperBuilder;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ElementFeatures {
|
||||
|
||||
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
|
||||
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
|
||||
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
|
||||
int elementType;
|
||||
Rectangle2D boundingBox;
|
||||
|
||||
@ -34,18 +36,6 @@ public class ElementFeatures {
|
||||
rectsAlmostMatch(element.getBBox());
|
||||
}
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures){
|
||||
return elementFeatures.getElementType() == elementType &&
|
||||
elementFeatures.getBoundingBox() != null &&
|
||||
rectsAlmostMatch(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
protected boolean almostEqual(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < TOLERANCE;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsAlmostMatch(Rect bBox) {
|
||||
@ -57,6 +47,19 @@ public class ElementFeatures {
|
||||
almostEqual(bBox.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean almostEqual(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < TOLERANCE;
|
||||
}
|
||||
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsAlmostMatch(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsAlmostMatch(Rectangle2D bBox) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
@ -68,6 +71,34 @@ public class ElementFeatures {
|
||||
}
|
||||
|
||||
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
private boolean areRectsSimilar(Rectangle2D rectangle2D) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
|
||||
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
|
||||
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
|
||||
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean isPositionSimilar(double a, double b, double boxSize) {
|
||||
|
||||
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
protected boolean isSizeSimilar(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@ -148,6 +179,7 @@ public class ElementFeatures {
|
||||
int bitsPerComponent;
|
||||
String hashOfImage;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
@ -160,22 +192,27 @@ public class ElementFeatures {
|
||||
bitsPerComponent == element.getBitsPerComponent();
|
||||
}
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures){
|
||||
if(elementFeatures.getClass() != this.getClass()){
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
return super.almostMatches(elementFeatures) &&
|
||||
this.dataSize == ((Image) elementFeatures).getDataSize() &&
|
||||
this.height == ((Image) elementFeatures).getHeight() &&
|
||||
this.width == ((Image) elementFeatures).getWidth() &&
|
||||
this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() &&
|
||||
this.componentNum == ((Image) elementFeatures).getComponentNum() &&
|
||||
this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() &&
|
||||
calculateHammingDistance(((Image) elementFeatures).getHashOfImage()) <=4;
|
||||
return super.almostMatches(elementFeatures) && this.dataSize == ((Image) elementFeatures).getDataSize() && this.height == ((Image) elementFeatures).getHeight() && this.width == ((Image) elementFeatures).getWidth() && this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() && this.componentNum == ((Image) elementFeatures).getComponentNum() && this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() && calculateHammingDistance(
|
||||
((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return super.isSimilarTo(elementFeatures) && //
|
||||
calculateHammingDistance(((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
// Helper method to calculate the Hamming distance between two hexadecimal strings
|
||||
private int calculateHammingDistance(String hash2) {
|
||||
|
||||
int distance = 0;
|
||||
int maxLength = Math.max(this.hashOfImage.length(), hash2.length());
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
@ -202,34 +239,32 @@ public class ElementFeatures {
|
||||
|
||||
@Override
|
||||
public boolean almostMatches(Element element) throws PDFNetException {
|
||||
|
||||
return element.getType() == getElementType() && //
|
||||
element.getBBox() != null && //
|
||||
(super.rectsAlmostMatch(element.getBBox()) || almostRotateMatches(element.getBBox().getRectangle())) &&
|
||||
xObjectType == element.getXObject().getType() &&
|
||||
dictOrArrayOrStreamLength == element.getXObject().getDecodedStream().size();
|
||||
(super.rectsAlmostMatch(element.getBBox()) || almostRotateMatches(element.getBBox().getRectangle())) && xObjectType == element.getXObject()
|
||||
.getType() && dictOrArrayOrStreamLength == element.getXObject().getDecodedStream().size();
|
||||
}
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures){
|
||||
if(elementFeatures.getClass() != this.getClass()){
|
||||
|
||||
public boolean almostMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
if (elementFeatures.getClass() != this.getClass()) {
|
||||
return false;
|
||||
}
|
||||
return elementFeatures.getElementType() == getElementType() &&
|
||||
elementFeatures.getBoundingBox() != null &&
|
||||
(super.rectsAlmostMatch(elementFeatures.getBoundingBox()) || almostRotateMatches(elementFeatures.getBoundingBox().getBounds2D())) &&
|
||||
xObjectType == ((Form)elementFeatures).getXObjectType() &&
|
||||
dictOrArrayOrStreamLength == ((Form)elementFeatures).getDictOrArrayOrStreamLength();
|
||||
return elementFeatures.getElementType() == getElementType() && elementFeatures.getBoundingBox() != null && (super.rectsAlmostMatch(elementFeatures.getBoundingBox()) || almostRotateMatches(
|
||||
elementFeatures.getBoundingBox()
|
||||
.getBounds2D())) && xObjectType == ((Form) elementFeatures).getXObjectType() && dictOrArrayOrStreamLength == ((Form) elementFeatures).getDictOrArrayOrStreamLength();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private boolean almostRotateMatches(Rectangle2D bBox) {
|
||||
|
||||
return almostEqual(bBox.getWidth(), getBoundingBox().getHeight()) && //
|
||||
almostEqual(bBox.getHeight(), getBoundingBox().getWidth());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,28 +1,54 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.*;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.pdftron.common.PDFNetException;
|
||||
import com.pdftron.pdf.Element;
|
||||
import com.pdftron.pdf.ElementReader;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.PageIterator;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class WatermarkRemovalService {
|
||||
|
||||
final static double AREA_THRESHOLD = 0.6; // multiplied with page area
|
||||
final static double AREA_THRESHOLD = 0.5; // multiplied with page area
|
||||
final static double OCCURING_ON_PAGES_THRESHOLD_FACTOR = 0.75; // multiplied with number of pages
|
||||
|
||||
final static int MIN_PAGES_THRESHOLD = 3;
|
||||
|
||||
final static double IMAGE_POSITION_HEIGHT_THRESHOLD = 0.2; // multiplied with page height
|
||||
|
||||
final static double IMAGE_POSITION_WIDTH_THRESHOLD = 0.125; // multiplied with page width
|
||||
|
||||
final static double TEXT_POSITION_THRESHOLD = 0.15;
|
||||
|
||||
final static double MIN_TEXTWATERMARK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height
|
||||
|
||||
final static int PAGE_NUMBER_TEXT_SEARCH_THRESHOLD = 5; // stop text based search after 5 pages without watermark
|
||||
final static double ROTATED_TEXT_THRESHOLD = 12.5; //this is in degrees
|
||||
static boolean foundTextWatermark = true;
|
||||
|
||||
|
||||
/**
|
||||
* The method remove watermark works only for Documents with size greater than MIN_PAGES_THRESHOLD.
|
||||
* First the possible watermarks (big XObjects or Images) will be detected and then checked if those appear on most pages according to the
|
||||
* OCCURING_ON_PAGES_THRESHOLD_FACTOR by using image hashing for similarity and size and stream size of the xobjects.
|
||||
* The following watermarks will be found: big XObjects, big Images, small Images that appear in the middle of the page, and
|
||||
* text that is rotated and big enough compared to height of page.
|
||||
* First the possible watermarks will be detected and then checked if those appear on most pages according to the
|
||||
* OCCURING_ON_PAGES_THRESHOLD_FACTOR. We us image hashing for similarity between pictures and size and stream size of the xobjects.
|
||||
* If so, these detected and confirmed will not be written to the pdf file.
|
||||
*
|
||||
* @param pdfFile PDFFile to remove watermarks
|
||||
@ -46,7 +72,7 @@ public class WatermarkRemovalService {
|
||||
log.info("Watermark found and will be removed!");
|
||||
removeAllWatermarks(pdfDoc, watermarkElementFeatures);
|
||||
} else {
|
||||
log.info("No watermark found!");
|
||||
log.info("No unlabeled watermark found!");
|
||||
}
|
||||
}
|
||||
|
||||
@ -69,7 +95,6 @@ public class WatermarkRemovalService {
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
|
||||
|
||||
for (PageIterator iterator = pdfDoc.getPageIterator(); iterator.hasNext(); ) {
|
||||
|
||||
Page page = iterator.next();
|
||||
@ -80,7 +105,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
reader.begin(page);
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage);
|
||||
processElement(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringFromPage, page);
|
||||
}
|
||||
|
||||
formObjectsAndImagesForPages.put(page.getSDFObj().getObjNum(), elementFeaturesLinkedList);
|
||||
@ -96,28 +121,74 @@ public class WatermarkRemovalService {
|
||||
Set<Long> visitedXObjIds,
|
||||
List<ElementFeatures> elementFeaturesLinkedList,
|
||||
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
|
||||
double minAreaCoveringPage) throws PDFNetException {
|
||||
double minAreaCoveringPage,
|
||||
Page page) throws PDFNetException {
|
||||
|
||||
if (element.getBBox() == null) {
|
||||
return;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
|
||||
|
||||
switch (element.getType()) {
|
||||
case Element.e_form -> processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
case Element.e_image, Element.e_inline_image -> processImages(element, elementFeaturesLinkedList, page, minAreaCoveringPage);
|
||||
case Element.e_text -> processText(element, elementFeaturesLinkedList, page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processText(Element element, List<ElementFeatures> elementFeaturesLinkedList, Page page) {
|
||||
|
||||
if (page.getIndex() == PAGE_NUMBER_TEXT_SEARCH_THRESHOLD) {
|
||||
shouldTextSearchBeContinued(elementFeaturesLinkedList);
|
||||
}
|
||||
|
||||
if (!couldTextBeAWatermark(element, page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.getType() == Element.e_form) {
|
||||
processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage);
|
||||
} else if (element.getType() == Element.e_image || element.getType() == Element.e_inline_image) {
|
||||
if (element.getXObject() == null) {
|
||||
return;
|
||||
boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMARK_HEIGHT_THRESHOLD;
|
||||
|
||||
if (isBigEnough) {
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeatures(element);
|
||||
elementFeaturesLinkedList.add(elementFeatures);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean isTextRotated(Element element) {
|
||||
|
||||
return Math.abs(element.getCTM().getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(element.getCTM()
|
||||
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
|
||||
}
|
||||
|
||||
|
||||
private void shouldTextSearchBeContinued(List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
|
||||
int countTextWatermarks = 0;
|
||||
for (ElementFeatures elementFeatures : elementFeaturesLinkedList) {
|
||||
if (elementFeatures.getElementType() == Element.e_text) {
|
||||
countTextWatermarks++;
|
||||
}
|
||||
processImages(element, elementFeaturesLinkedList);
|
||||
}
|
||||
if (countTextWatermarks < elementFeaturesLinkedList.size() * OCCURING_ON_PAGES_THRESHOLD_FACTOR) {
|
||||
foundTextWatermark = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList, Page page, double minAreaCoveringPage) {
|
||||
|
||||
if (element.getXObject() == null) {
|
||||
return;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage && isLocatedNearBorder(element, page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String hashOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
|
||||
@ -125,18 +196,34 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
// Typically company logos on dossier pages are located near the border and should be excluded from the watermark removal
|
||||
@SneakyThrows
|
||||
private boolean isLocatedNearBorder(Element element, Page page) {
|
||||
|
||||
return element.getBBox().getY1() < page.getVisibleContentBox().getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox()
|
||||
.getY2() > page.getVisibleContentBox().getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
|
||||
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
|
||||
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processXObject(Element element,
|
||||
Set<Long> visitedXObjIds,
|
||||
List<ElementFeatures> elementFeaturesLinkedList,
|
||||
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
|
||||
double minAreaCoveringPage) {
|
||||
double minAreaCoveringPage,
|
||||
Page page) {
|
||||
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (visitedXObjIds.add(element.getXObject().getObjNum())) {
|
||||
ElementReader xObjectReader = new ElementReader();
|
||||
xObjectReader.begin(element.getXObject());
|
||||
for (Element element1 = xObjectReader.next(); element1 != null; element1 = xObjectReader.next()) {
|
||||
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage);
|
||||
processElement(element1, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
}
|
||||
elementFeaturesLinkedList.add(ElementFeatureFactory.extractFeatures(element));
|
||||
xObjectReader.destroy();
|
||||
@ -159,7 +246,8 @@ public class WatermarkRemovalService {
|
||||
.flatMap(Collection::stream)
|
||||
.filter(elementFeature -> formObjectsPerPage.values()
|
||||
.stream()
|
||||
.filter(elementFeaturesOnPage -> elementFeaturesOnPage.stream().anyMatch(elementFeature::almostMatches))
|
||||
.filter(elementFeaturesOnPage -> elementFeaturesOnPage.stream()
|
||||
.anyMatch(elementFeature.getElementType() == Element.e_image || elementFeature.getElementType() == Element.e_inline_image ? elementFeature::isSimilarTo : elementFeature::almostMatches))
|
||||
.count() >= minPagesFilter)
|
||||
.toList();
|
||||
}
|
||||
@ -210,21 +298,23 @@ public class WatermarkRemovalService {
|
||||
double minAreaCoveringFromPage = AREA_THRESHOLD * page.getPageHeight() * page.getPageWidth();
|
||||
for (Element element = reader.next(); element != null; element = reader.next()) {
|
||||
|
||||
|
||||
switch (element.getType()) {
|
||||
case Element.e_image, Element.e_inline_image -> {
|
||||
if (element.getBBox() == null) {
|
||||
writer.writeElement(element);
|
||||
continue;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage && isLocatedNearBorder(element, page) && element.getBBox()
|
||||
.getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
|
||||
|
||||
writer.writeElement(element);
|
||||
continue;
|
||||
|
||||
}
|
||||
removeImages(element, writer, watermarksElementFeaturesList);
|
||||
}
|
||||
case Element.e_form ->
|
||||
processForms(page, element, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
|
||||
case Element.e_form -> processForms(page, element, reader, writer, watermarksElementFeaturesList, visitedXObjIds);
|
||||
case Element.e_text -> processText(element, writer, watermarksElementFeaturesList, page);
|
||||
default -> writer.writeElement(element);
|
||||
}
|
||||
}
|
||||
@ -232,13 +322,47 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void removeImages(Element element, ElementWriter
|
||||
writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
private void processText(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList, Page page) {
|
||||
|
||||
if (!couldTextBeAWatermark(element, page)) {
|
||||
writer.writeElement(element);
|
||||
return;
|
||||
}
|
||||
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.almostMatches(element)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
writer.writeElement(element);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private boolean couldTextBeAWatermark(Element element, Page page) throws PDFNetException {
|
||||
|
||||
if (!foundTextWatermark) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isTextRotated(element)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Math.max(element.getBBox().getY1(), element.getBBox().getY2()) < page.getVisibleContentBox().getY1() + page.getPageHeight() * TEXT_POSITION_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
String hashValueOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.almostMatches(imageFeatures)) {
|
||||
if (elementFeatures.isSimilarTo(imageFeatures)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,15 +1,17 @@
|
||||
package com.iqser.red.pdftronlogic.commons;
|
||||
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.platform.commons.util.StringUtils;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.platform.commons.util.StringUtils;
|
||||
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
@Disabled
|
||||
class WatermarkRemovalServiceTest {
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user