RED-7075: Some Styling improvements
This commit is contained in:
parent
89c2ab02ea
commit
4394991432
@ -36,7 +36,7 @@ public class WatermarkRemovalService {
|
||||
|
||||
final static double TEXT_POSITION_THRESHOLD = 0.15;
|
||||
|
||||
final static double MIN_TEXTWATERMAK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height
|
||||
final static double MIN_TEXTWATERMARK_HEIGHT_THRESHOLD = 0.125; // multiplied with page height
|
||||
|
||||
final static int PAGE_NUMBER_TEXT_SEARCH_THRESHOLD = 5; // stop text based search after 5 pages without watermark
|
||||
final static double ROTATED_TEXT_THRESHOLD = 12.5; //this is in degrees
|
||||
@ -126,31 +126,12 @@ public class WatermarkRemovalService {
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.getType() == Element.e_form) {
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
|
||||
return;
|
||||
}
|
||||
processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
} else if (element.getType() == Element.e_image || element.getType() == Element.e_inline_image) {
|
||||
if (element.getXObject() == null) {
|
||||
return;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
|
||||
boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox()
|
||||
.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox()
|
||||
.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
|
||||
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
|
||||
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
|
||||
if (isLocatedNearBorder) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
processImages(element, elementFeaturesLinkedList);
|
||||
} else if (element.getType() == Element.e_text) {
|
||||
processText(element, elementFeaturesLinkedList, page);
|
||||
switch (element.getType()) {
|
||||
case Element.e_form -> processXObject(element, visitedXObjIds, elementFeaturesLinkedList, formObjectsOccuringMoreThanOnceOnAPage, minAreaCoveringPage, page);
|
||||
case Element.e_image, Element.e_inline_image -> processImages(element, elementFeaturesLinkedList, page, minAreaCoveringPage);
|
||||
case Element.e_text -> processText(element, elementFeaturesLinkedList, page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -174,7 +155,7 @@ public class WatermarkRemovalService {
|
||||
return;
|
||||
}
|
||||
|
||||
boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMAK_HEIGHT_THRESHOLD;
|
||||
boolean isBigEnough = Math.abs(element.getBBox().getY1() - element.getBBox().getY2()) > page.getPageHeight() * MIN_TEXTWATERMARK_HEIGHT_THRESHOLD;
|
||||
|
||||
if (isBigEnough) {
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeatures(element);
|
||||
@ -199,7 +180,14 @@ public class WatermarkRemovalService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList) {
|
||||
private void processImages(Element element, List<ElementFeatures> elementFeaturesLinkedList, Page page, double minAreaCoveringPage) {
|
||||
|
||||
if (element.getXObject() == null) {
|
||||
return;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage && isLocatedNearBorder(element, page)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String hashOfImage = ImageHashFactory.calculate(element);
|
||||
ElementFeatures elementFeatures = ElementFeatureFactory.extractFeaturesWithHash(element, hashOfImage);
|
||||
@ -207,6 +195,16 @@ public class WatermarkRemovalService {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private boolean isLocatedNearBorder(Element element, Page page) {
|
||||
|
||||
return element.getBBox().getY1() < page.getVisibleContentBox().getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox()
|
||||
.getY2() > page.getVisibleContentBox().getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
|
||||
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
|
||||
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private void processXObject(Element element,
|
||||
Set<Long> visitedXObjIds,
|
||||
@ -215,6 +213,10 @@ public class WatermarkRemovalService {
|
||||
double minAreaCoveringPage,
|
||||
Page page) {
|
||||
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringPage) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (visitedXObjIds.add(element.getXObject().getObjNum())) {
|
||||
ElementReader xObjectReader = new ElementReader();
|
||||
xObjectReader.begin(element.getXObject());
|
||||
@ -301,13 +303,8 @@ public class WatermarkRemovalService {
|
||||
continue;
|
||||
}
|
||||
if (element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage) {
|
||||
boolean isLocatedNearBorder = element.getBBox().getY1() < page.getVisibleContentBox()
|
||||
.getY1() + page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getY2() > page.getVisibleContentBox()
|
||||
.getY2() - page.getPageHeight() * IMAGE_POSITION_HEIGHT_THRESHOLD || element.getBBox().getX1() < page.getVisibleContentBox()
|
||||
.getX1() + page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD || element.getBBox().getX2() > page.getVisibleContentBox()
|
||||
.getX2() - page.getPageWidth() * IMAGE_POSITION_WIDTH_THRESHOLD;
|
||||
|
||||
if (isLocatedNearBorder && element.getBBox().getHeight() * element.getBBox().getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
|
||||
if (isLocatedNearBorder(element, page) && element.getBBox().getHeight() * element.getBBox()
|
||||
.getWidth() < minAreaCoveringFromPage || element.getXObject() == null) {
|
||||
writer.writeElement(element);
|
||||
continue;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user