RED-7075: Styling changes
This commit is contained in:
parent
42836ae35a
commit
bf3015161b
@ -22,6 +22,9 @@ import lombok.experimental.SuperBuilder;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class ElementFeatures {
|
||||
|
||||
final private static double RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR = 0.2; // specify how much the x and y value are allowed to differ
|
||||
final private static double RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR = 0.1; // the scale the images are allowed to differ
|
||||
final private static double HAMMING_DISTANCE_THRESHOLD = 4; // defines the similarity of the hash of images
|
||||
int elementType;
|
||||
Rectangle2D boundingBox;
|
||||
|
||||
@ -68,53 +71,33 @@ public class ElementFeatures {
|
||||
}
|
||||
|
||||
|
||||
public boolean similarMatches(Element element) throws PDFNetException {
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return element.getType() == elementType && //
|
||||
element.getBBox() != null && //
|
||||
rectsSimilarMatch(element.getBBox());
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && areRectsSimilar(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean rectsSimilarMatch(Rect rect) {
|
||||
private boolean areRectsSimilar(Rectangle2D rectangle2D) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return similarEqualPosition(rect.getX1(), boundingBox.getX(), rect.getWidth()) && //
|
||||
similarEqualPosition(rect.getY1(), boundingBox.getY(), rect.getHeight()) && //
|
||||
similarEqualSize(rect.getWidth(), boundingBox.getWidth()) && //
|
||||
similarEqualSize(rect.getHeight(), boundingBox.getHeight());
|
||||
return isPositionSimilar(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
|
||||
isPositionSimilar(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
|
||||
isSizeSimilar(rectangle2D.getWidth(), boundingBox.getWidth()) && //
|
||||
isSizeSimilar(rectangle2D.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
protected boolean similarEqualPosition(double a, double b, double boxSize) {
|
||||
protected boolean isPositionSimilar(double a, double b, double boxSize) {
|
||||
|
||||
return Math.abs(a - b) < boxSize * 0.2;
|
||||
return Math.abs(a - b) < boxSize * RECT_POSITION_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
protected boolean similarEqualSize(double a, double b) {
|
||||
protected boolean isSizeSimilar(double a, double b) {
|
||||
|
||||
return Math.abs(a - b) < a * 0.1;
|
||||
return Math.abs(a - b) < a * RECT_SIZE_SIMILARITY_THRESHOLD_FACTOR;
|
||||
}
|
||||
|
||||
|
||||
public boolean similarMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
return elementFeatures.getElementType() == elementType && elementFeatures.getBoundingBox() != null && rectsSimilarMatch(elementFeatures.getBoundingBox());
|
||||
}
|
||||
|
||||
|
||||
private boolean rectsSimilarMatch(Rectangle2D rectangle2D) {
|
||||
// To address the inconsistencies in the calculation of the bounding box we check equality with a tolerance
|
||||
|
||||
return similarEqualPosition(rectangle2D.getX(), boundingBox.getX(), rectangle2D.getWidth()) && //
|
||||
similarEqualPosition(rectangle2D.getY(), boundingBox.getY(), rectangle2D.getHeight()) && //
|
||||
similarEqualSize(rectangle2D.getWidth(), boundingBox.getWidth()) && //
|
||||
similarEqualSize(rectangle2D.getHeight(), boundingBox.getHeight());
|
||||
}
|
||||
|
||||
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@Getter
|
||||
@SuperBuilder
|
||||
@ -215,26 +198,20 @@ public class ElementFeatures {
|
||||
return false;
|
||||
}
|
||||
return super.almostMatches(elementFeatures) && this.dataSize == ((Image) elementFeatures).getDataSize() && this.height == ((Image) elementFeatures).getHeight() && this.width == ((Image) elementFeatures).getWidth() && this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() && this.componentNum == ((Image) elementFeatures).getComponentNum() && this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() && calculateHammingDistance(
|
||||
((Image) elementFeatures).getHashOfImage()) <= 4;
|
||||
((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
public boolean similarMatches(Element element) throws PDFNetException {
|
||||
public boolean isSimilarTo(ElementFeatures elementFeatures) {
|
||||
|
||||
return super.almostMatches(element) && //
|
||||
dataSize == element.getImageDataSize() && //
|
||||
height == element.getImageHeight() && //
|
||||
width == element.getImageWidth() && //
|
||||
renderingIntent == element.getImageRenderingIntent() && //
|
||||
componentNum == element.getComponentNum() && //
|
||||
bitsPerComponent == element.getBitsPerComponent();
|
||||
}
|
||||
|
||||
|
||||
public boolean similarMatches(ElementFeatures elementFeatures) {
|
||||
|
||||
return super.similarMatches(elementFeatures) && this.dataSize == ((Image) elementFeatures).getDataSize() && this.height == ((Image) elementFeatures).getHeight() && this.width == ((Image) elementFeatures).getWidth() && this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() && this.componentNum == ((Image) elementFeatures).getComponentNum() && this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() && calculateHammingDistance(
|
||||
((Image) elementFeatures).getHashOfImage()) <= 4;
|
||||
return super.isSimilarTo(elementFeatures) && //
|
||||
//this.dataSize == ((Image) elementFeatures).getDataSize() && //
|
||||
//this.height == ((Image) elementFeatures).getHeight() && //
|
||||
//this.width == ((Image) elementFeatures).getWidth() && //
|
||||
//this.renderingIntent == ((Image) elementFeatures).getRenderingIntent() && //
|
||||
//this.componentNum == ((Image) elementFeatures).getComponentNum() && //
|
||||
//this.bitsPerComponent == ((Image) elementFeatures).getBitsPerComponent() && //
|
||||
calculateHammingDistance(((Image) elementFeatures).getHashOfImage()) <= HAMMING_DISTANCE_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -45,8 +45,10 @@ public class WatermarkRemovalService {
|
||||
|
||||
/**
|
||||
* The method remove watermark works only for Documents with size greater than MIN_PAGES_THRESHOLD.
|
||||
* First the possible watermarks (big XObjects or Images) will be detected and then checked if those appear on most pages according to the
|
||||
* OCCURING_ON_PAGES_THRESHOLD_FACTOR by using image hashing for similarity and size and stream size of the xobjects.
|
||||
* The following watermarks will be found: big XObjects, big Images, small Images that appear in the middle of the page, and
|
||||
* text that is rotated and big enough compared to height of page.
|
||||
* First the possible watermarks will be detected and then checked if those appear on most pages according to the
|
||||
* OCCURING_ON_PAGES_THRESHOLD_FACTOR. We us image hashing for similarity between pictures and size and stream size of the xobjects.
|
||||
* If so, these detected and confirmed will not be written to the pdf file.
|
||||
*
|
||||
* @param pdfFile PDFFile to remove watermarks
|
||||
@ -142,16 +144,7 @@ public class WatermarkRemovalService {
|
||||
shouldTextSearchBeContinued(elementFeaturesLinkedList);
|
||||
}
|
||||
|
||||
if (!foundTextWatermark) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (Math.abs(element.getCTM().getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(element.getCTM()
|
||||
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD))) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (Math.max(element.getBBox().getY1(), element.getBBox().getY2()) < page.getVisibleContentBox().getY1() + page.getPageHeight() * TEXT_POSITION_THRESHOLD) {
|
||||
if (!couldTextBeAWatermark(element, page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -165,6 +158,14 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean isTextRotated(Element element) {
|
||||
|
||||
return Math.abs(element.getCTM().getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(element.getCTM()
|
||||
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD));
|
||||
}
|
||||
|
||||
|
||||
private void shouldTextSearchBeContinued(List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
|
||||
int countTextWatermarks = 0;
|
||||
@ -245,7 +246,7 @@ public class WatermarkRemovalService {
|
||||
.filter(elementFeature -> formObjectsPerPage.values()
|
||||
.stream()
|
||||
.filter(elementFeaturesOnPage -> elementFeaturesOnPage.stream()
|
||||
.anyMatch(elementFeature.getElementType() == Element.e_image || elementFeature.getElementType() == Element.e_inline_image ? elementFeature::similarMatches : elementFeature::almostMatches))
|
||||
.anyMatch(elementFeature.getElementType() == Element.e_image || elementFeature.getElementType() == Element.e_inline_image ? elementFeature::isSimilarTo : elementFeature::almostMatches))
|
||||
.count() >= minPagesFilter)
|
||||
.toList();
|
||||
}
|
||||
@ -322,18 +323,7 @@ public class WatermarkRemovalService {
|
||||
@SneakyThrows
|
||||
private void processText(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList, Page page) {
|
||||
|
||||
if (!foundTextWatermark) {
|
||||
writer.writeElement(element);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Math.abs(element.getCTM().getB()) < Math.sin(Math.toRadians(ROTATED_TEXT_THRESHOLD)) || Math.abs(element.getCTM()
|
||||
.getB()) > Math.sin(Math.toRadians(70 - ROTATED_TEXT_THRESHOLD))) {
|
||||
writer.writeElement(element);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Math.max(element.getBBox().getY1(), element.getBBox().getY2()) < page.getVisibleContentBox().getY1() + page.getPageHeight() * TEXT_POSITION_THRESHOLD) {
|
||||
if (!couldTextBeAWatermark(element, page)) {
|
||||
writer.writeElement(element);
|
||||
return;
|
||||
}
|
||||
@ -348,13 +338,30 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
private boolean couldTextBeAWatermark(Element element, Page page) throws PDFNetException {
|
||||
|
||||
if (!foundTextWatermark) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isTextRotated(element)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Math.max(element.getBBox().getY1(), element.getBBox().getY2()) < page.getVisibleContentBox().getY1() + page.getPageHeight() * TEXT_POSITION_THRESHOLD) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void removeImages(Element element, ElementWriter writer, List<ElementFeatures> watermarksElementFeaturesList) {
|
||||
|
||||
String hashValueOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures imageFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashValueOfImage);
|
||||
for (ElementFeatures elementFeatures : watermarksElementFeaturesList) {
|
||||
if (elementFeatures.similarMatches(imageFeatures)) {
|
||||
if (elementFeatures.isSimilarTo(imageFeatures)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user