RED-7075: Watermark Removal finished

This commit is contained in:
RaphaelArnold 2023-08-02 10:46:31 +02:00
parent e609fc450f
commit 5e4edbec66
2 changed files with 28 additions and 48 deletions

View File

@ -16,6 +16,22 @@ import lombok.experimental.UtilityClass;
@UtilityClass
public class ImageHashFactory {
@SneakyThrows
public String calculate(Element element) {
com.pdftron.pdf.Image image = new com.pdftron.pdf.Image(element.getXObject());
byte[] imageBytes = getBytesOfImage(image);
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageBytes);
BufferedImage image1 = ImageIO.read(byteArrayInputStream);
String hash = getSimplePHash(image1);
return hash;
}
@SneakyThrows
private byte[] getBytesOfImage(com.pdftron.pdf.Image inputImage) {
// 0 because the memory filter determines the size
@ -31,19 +47,7 @@ public class ImageHashFactory {
filterWriter.destroy();
return res;
}
@SneakyThrows
public String calculate(Element element) {
com.pdftron.pdf.Image image = new com.pdftron.pdf.Image(element.getXObject());
byte[] imageBytes = getBytesOfImage(image);
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageBytes);
BufferedImage image1 = ImageIO.read(byteArrayInputStream);
String hash = getSimplePHash(image1);
return hash;
}
public String getSimplePHash(BufferedImage image) {
// Resize the image to a fixed size (e.g., 8x8 pixels)
@ -74,8 +78,10 @@ public class ImageHashFactory {
return hashBuilder.toString();
}
// Helper method to calculate the average grayscale pixel value
private int calculateAverage(BufferedImage image) {
int total = 0;
int width = image.getWidth();
int height = image.getHeight();
@ -87,30 +93,4 @@ public class ImageHashFactory {
return total / (width * height);
}
// to hash images either use getDHash or getSimplePHash
public String getDHash(BufferedImage image) throws Exception {
BufferedImage resizedImage = resizeImage(image, 9, 8); // Resize image to 9x8 for dHash
long hash = 0L;
for (int y = 0; y < 8; y++) {
for (int x = 0; x < 8; x++) {
int leftPixel = resizedImage.getRGB(x, y);
int rightPixel = resizedImage.getRGB(x + 1, y);
hash <<= 1;
hash |= leftPixel < rightPixel ? 1 : 0;
}
}
return Long.toHexString(hash);
}
// Helper method to resize the image to the desired dimensions
private BufferedImage resizeImage(BufferedImage image, int width, int height) {
BufferedImage resizedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
resizedImage.getGraphics().drawImage(image, 0, 0, width, height, null);
return resizedImage;
}
}

View File

@ -66,7 +66,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static Map<Long, List<ElementFeatures>> findAllFormObjectsAndImages(PDFDoc pdfDoc) {
private Map<Long, List<ElementFeatures>> findAllFormObjectsAndImages(PDFDoc pdfDoc) {
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage = new LinkedList<>();
Map<Long, List<ElementFeatures>> formObjectsAndImagesForPages = new HashMap<>();
@ -98,7 +98,7 @@ public class WatermarkRemovalService {
}
private static void processElement(Element element,
private void processElement(Element element,
Set<Long> visitedXObjIds,
List<ElementFeatures> elementFeaturesLinkedList,
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
@ -120,7 +120,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
String hashOfImage = ImageHashFactory.calculate(element);
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
@ -129,7 +129,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static void processXObject(Element element,
private void processXObject(Element element,
Set<Long> visitedXObjIds,
List<ElementFeatures> elementFeaturesLinkedList,
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
@ -152,7 +152,7 @@ public class WatermarkRemovalService {
/*
parameter
*/
private static List<ElementFeatures> filterSameFormObjectsOccuringOnMostPages(Map<Long, List<ElementFeatures>> formObjectsPerPage) {
private List<ElementFeatures> filterSameFormObjectsOccuringOnMostPages(Map<Long, List<ElementFeatures>> formObjectsPerPage) {
int pageCount = formObjectsPerPage.keySet().size();
int minPagesFilter = (int) (OCCURING_ON_PAGES_THRESHOLD_FACTOR * pageCount);
@ -169,7 +169,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
ElementReader reader = new ElementReader();
ElementWriter writer = new ElementWriter();
@ -190,7 +190,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static void writeAllElementsExceptWatermarks(Page page,
private void writeAllElementsExceptWatermarks(Page page,
ElementReader reader,
ElementWriter writer,
List<ElementFeatures> watermarksElementFeaturesList,
@ -204,7 +204,7 @@ public class WatermarkRemovalService {
}
private static void processElements(Page page,
private void processElements(Page page,
ElementReader reader,
ElementWriter writer,
List<ElementFeatures> watermarksElementFeaturesList,
@ -232,7 +232,7 @@ public class WatermarkRemovalService {
@SneakyThrows
private static void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
String hashValueOfImage = ImageHashFactory.calculate(element);
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
@ -246,7 +246,7 @@ public class WatermarkRemovalService {
}
private static void processForms(Page page,
private void processForms(Page page,
Element element,
ElementReader reader,
ElementWriter writer,