RED-7075: Watermark Removal finished
This commit is contained in:
parent
e609fc450f
commit
5e4edbec66
@ -16,6 +16,22 @@ import lombok.experimental.UtilityClass;
|
||||
@UtilityClass
|
||||
public class ImageHashFactory {
|
||||
|
||||
@SneakyThrows
|
||||
public String calculate(Element element) {
|
||||
|
||||
com.pdftron.pdf.Image image = new com.pdftron.pdf.Image(element.getXObject());
|
||||
|
||||
byte[] imageBytes = getBytesOfImage(image);
|
||||
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageBytes);
|
||||
BufferedImage image1 = ImageIO.read(byteArrayInputStream);
|
||||
|
||||
String hash = getSimplePHash(image1);
|
||||
|
||||
return hash;
|
||||
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private byte[] getBytesOfImage(com.pdftron.pdf.Image inputImage) {
|
||||
// 0 because the memory filter determines the size
|
||||
@ -31,19 +47,7 @@ public class ImageHashFactory {
|
||||
filterWriter.destroy();
|
||||
return res;
|
||||
}
|
||||
@SneakyThrows
|
||||
public String calculate(Element element) {
|
||||
com.pdftron.pdf.Image image = new com.pdftron.pdf.Image(element.getXObject());
|
||||
|
||||
byte[] imageBytes = getBytesOfImage(image);
|
||||
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(imageBytes);
|
||||
BufferedImage image1 = ImageIO.read(byteArrayInputStream);
|
||||
|
||||
String hash = getSimplePHash(image1);
|
||||
|
||||
return hash;
|
||||
|
||||
}
|
||||
|
||||
public String getSimplePHash(BufferedImage image) {
|
||||
// Resize the image to a fixed size (e.g., 8x8 pixels)
|
||||
@ -74,8 +78,10 @@ public class ImageHashFactory {
|
||||
return hashBuilder.toString();
|
||||
}
|
||||
|
||||
|
||||
// Helper method to calculate the average grayscale pixel value
|
||||
private int calculateAverage(BufferedImage image) {
|
||||
|
||||
int total = 0;
|
||||
int width = image.getWidth();
|
||||
int height = image.getHeight();
|
||||
@ -87,30 +93,4 @@ public class ImageHashFactory {
|
||||
return total / (width * height);
|
||||
}
|
||||
|
||||
// to hash images either use getDHash or getSimplePHash
|
||||
public String getDHash(BufferedImage image) throws Exception {
|
||||
BufferedImage resizedImage = resizeImage(image, 9, 8); // Resize image to 9x8 for dHash
|
||||
|
||||
long hash = 0L;
|
||||
for (int y = 0; y < 8; y++) {
|
||||
for (int x = 0; x < 8; x++) {
|
||||
int leftPixel = resizedImage.getRGB(x, y);
|
||||
int rightPixel = resizedImage.getRGB(x + 1, y);
|
||||
hash <<= 1;
|
||||
hash |= leftPixel < rightPixel ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
return Long.toHexString(hash);
|
||||
}
|
||||
|
||||
// Helper method to resize the image to the desired dimensions
|
||||
private BufferedImage resizeImage(BufferedImage image, int width, int height) {
|
||||
BufferedImage resizedImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
|
||||
resizedImage.getGraphics().drawImage(image, 0, 0, width, height, null);
|
||||
return resizedImage;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -66,7 +66,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static Map<Long, List<ElementFeatures>> findAllFormObjectsAndImages(PDFDoc pdfDoc) {
|
||||
private Map<Long, List<ElementFeatures>> findAllFormObjectsAndImages(PDFDoc pdfDoc) {
|
||||
|
||||
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage = new LinkedList<>();
|
||||
Map<Long, List<ElementFeatures>> formObjectsAndImagesForPages = new HashMap<>();
|
||||
@ -98,7 +98,7 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private static void processElement(Element element,
|
||||
private void processElement(Element element,
|
||||
Set<Long> visitedXObjIds,
|
||||
List<ElementFeatures> elementFeaturesLinkedList,
|
||||
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
|
||||
@ -120,7 +120,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
|
||||
String hashOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
|
||||
@ -129,7 +129,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void processXObject(Element element,
|
||||
private void processXObject(Element element,
|
||||
Set<Long> visitedXObjIds,
|
||||
List<ElementFeatures> elementFeaturesLinkedList,
|
||||
List<ElementFeatures> formObjectsOccuringMoreThanOnceOnAPage,
|
||||
@ -152,7 +152,7 @@ public class WatermarkRemovalService {
|
||||
/*
|
||||
parameter
|
||||
*/
|
||||
private static List<ElementFeatures> filterSameFormObjectsOccuringOnMostPages(Map<Long, List<ElementFeatures>> formObjectsPerPage) {
|
||||
private List<ElementFeatures> filterSameFormObjectsOccuringOnMostPages(Map<Long, List<ElementFeatures>> formObjectsPerPage) {
|
||||
|
||||
int pageCount = formObjectsPerPage.keySet().size();
|
||||
int minPagesFilter = (int) (OCCURING_ON_PAGES_THRESHOLD_FACTOR * pageCount);
|
||||
@ -169,7 +169,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
private void removeAllWatermarks(PDFDoc pdfDoc, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
ElementReader reader = new ElementReader();
|
||||
ElementWriter writer = new ElementWriter();
|
||||
@ -190,7 +190,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void writeAllElementsExceptWatermarks(Page page,
|
||||
private void writeAllElementsExceptWatermarks(Page page,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
List<ElementFeatures> watermarksElementFeaturesList,
|
||||
@ -204,7 +204,7 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private static void processElements(Page page,
|
||||
private void processElements(Page page,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
List<ElementFeatures> watermarksElementFeaturesList,
|
||||
@ -232,7 +232,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private static void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
String hashValueOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
|
||||
@ -246,7 +246,7 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private static void processForms(Page page,
|
||||
private void processForms(Page page,
|
||||
Element element,
|
||||
ElementReader reader,
|
||||
ElementWriter writer,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user