RED-7075: Some Styling improvements

This commit is contained in:
RaphaelArnold 2023-08-31 15:27:38 +02:00
parent 89c2ab02ea
commit 4394991432

View File

@ -36,7 +36,7 @@ public class WatermarkRemovalService {
final static double TEXT_POSITION_THRESHOLD = 0.15;
final static double MIN_TEXTWATERMAK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height
final static double MIN_TEXTWATERMARK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height
final static int PAGE_NUMBER_TEXT_SEARCH_THRESHOLD = 5; // stop text based search after 5 pages without watermark
final static double ROTATED_TEXT_THRESHOLD = 12.5; //this is in degrees
@ -126,31 +126,12 @@ public class WatermarkRemovalService {
return;
}
if (element.getType() == Element.e_form) {
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
return;
}
processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
} else if (element.getType() == Element.e_image || element.getType() == Element.e_inline_image) {
if (element.getXObject() == null) {
return;
}
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox()
.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox()
.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
if (isLocatedNearBorder) {
return;
}
}
processImages(element, elementFeaturesLinkedList);
} else if (element.getType() == Element.e_text) {
processText(element, elementFeaturesLinkedList, page);
switch (element.getType()) {
case Element.e_form -> processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
case Element.e_image, Element.e_inline_image -> processImages(element, elementFeaturesLinkedList, page, minAreaCoveringPage);
case Element.e_text -> processText(element, elementFeaturesLinkedList, page);
}
}
@ -174,7 +155,7 @@ public class WatermarkRemovalService {
return;
}
boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMAK_HEIGHT_THRESHOLD;
boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMARK_HEIGHT_THRESHOLD;
if (isBigEnough) {
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeatures(element);
@ -199,7 +180,14 @@ public class WatermarkRemovalService {
@SneakyThrows
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList, Page page, double minAreaCoveringPage) {
if (element.getXObject() == null) {
return;
}
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage && isLocatedNearBorder(element, page)) {
return;
}
String hashOfImage = ImageHashFactory.calculate(element);
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
@ -207,6 +195,16 @@ public class WatermarkRemovalService {
}
@SneakyThrows
private boolean isLocatedNearBorder(Element element, Page page) {
return element.getBBox().getY1() < page.getVisibleContentBox().getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox()
.getY2() > page.getVisibleContentBox().getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
}
@SneakyThrows
private void processXObject(Element element,
Set<Long> visitedXObjIds,
@ -215,6 +213,10 @@ public class WatermarkRemovalService {
double minAreaCoveringPage,
Page page) {
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
return;
}
if (visitedXObjIds.add(element.getXObject().getObjNum())) {
ElementReader xObjectReader = new ElementReader();
xObjectReader.begin(element.getXObject());
@ -301,13 +303,8 @@ public class WatermarkRemovalService {
continue;
}
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage) {
boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox()
.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox()
.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
if (isLocatedNearBorder && element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
if (isLocatedNearBorder(element, page) && element.getBBox().getHeight() * element.getBBox()
.getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
writer.writeElement(element);
continue;
}