Ignore images that are contained in others

This commit is contained in:
Dominique Eifländer 2021-05-19 12:54:25 +02:00
parent 8ece616229
commit d87cbdeaeb
2 changed files with 22 additions and 4 deletions

View File

@ -11,6 +11,8 @@ import lombok.NoArgsConstructor;
@AllArgsConstructor
public class RedRectangle2D {
public static final double THRESHOLD = 0.01;
private double x;
private double y;
private double width;
@ -27,9 +29,14 @@ public class RedRectangle2D {
}
double x0 = getX();
double y0 = getY();
return x >= x0 &&
y >= y0 &&
(x + w) <= x0 + getWidth() &&
(y + h) <= y0 + getHeight();
return round(x) >= round(x0) &&
round(y) >= round(y0) &&
(x + w) - (x0 + getWidth()) <= THRESHOLD &&
(y + h) - (y0 + getHeight()) <= THRESHOLD;
}
private double round(double value) {
double d = Math.pow(10, 2);
return Math.round(value * d) / d;
}
}

View File

@ -102,6 +102,17 @@ public class PdfSegmentationService {
page.setLandscape(isLandscape || isRotated);
page.setPageNumber(pageNumber);
List<PdfImage> mergedList = processImages(stripper.getImages());
List<PdfImage> imagesInImage = new ArrayList<>();
for(PdfImage image: mergedList){
for (PdfImage inner: mergedList){
if(image != inner && image.getPosition().contains(inner.getPosition().getX(), inner.getPosition().getY(), inner.getPosition().getWidth(), inner.getPosition().getHeight())){
imagesInImage.add(inner);
}
}
}
mergedList.removeAll(imagesInImage);
page.setImages(mergedList);
tableExtractionService.extractTables(cleanRulings, page);