Ignore images that are contained in others
This commit is contained in:
parent
8ece616229
commit
d87cbdeaeb
@ -11,6 +11,8 @@ import lombok.NoArgsConstructor;
|
||||
@AllArgsConstructor
|
||||
public class RedRectangle2D {
|
||||
|
||||
public static final double THRESHOLD = 0.01;
|
||||
|
||||
private double x;
|
||||
private double y;
|
||||
private double width;
|
||||
@ -27,9 +29,14 @@ public class RedRectangle2D {
|
||||
}
|
||||
double x0 = getX();
|
||||
double y0 = getY();
|
||||
return x >= x0 &&
|
||||
y >= y0 &&
|
||||
(x + w) <= x0 + getWidth() &&
|
||||
(y + h) <= y0 + getHeight();
|
||||
return round(x) >= round(x0) &&
|
||||
round(y) >= round(y0) &&
|
||||
(x + w) - (x0 + getWidth()) <= THRESHOLD &&
|
||||
(y + h) - (y0 + getHeight()) <= THRESHOLD;
|
||||
}
|
||||
|
||||
private double round(double value) {
|
||||
double d = Math.pow(10, 2);
|
||||
return Math.round(value * d) / d;
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,6 +102,17 @@ public class PdfSegmentationService {
|
||||
page.setLandscape(isLandscape || isRotated);
|
||||
page.setPageNumber(pageNumber);
|
||||
List<PdfImage> mergedList = processImages(stripper.getImages());
|
||||
|
||||
List<PdfImage> imagesInImage = new ArrayList<>();
|
||||
for(PdfImage image: mergedList){
|
||||
for (PdfImage inner: mergedList){
|
||||
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
|
||||
imagesInImage.add(inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergedList.removeAll(imagesInImage);
|
||||
|
||||
page.setImages(mergedList);
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, page);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user