RED-10249: regex found incorrectly due to wrong text sorting

This commit is contained in:
Kilian Schuettler 2024-11-04 12:16:44 +01:00
parent 7338e06fb0
commit ca2f3512d2

View File

@ -243,10 +243,7 @@ public class LayoutParsingPipeline {
List<ClassificationPage> classificationPages = new ArrayList<>();
// parsing the structure elements could be useful as well
if (layoutParsingType != LayoutParsingType.REDACT_MANAGER_OLD) {
classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));
}
classificationDocument.setOutlineObjectTree(outlineExtractorService.getOutlineObjectTree(originDocument));
long pageCount = originDocument.getNumberOfPages();
@ -363,11 +360,11 @@ public class LayoutParsingPipeline {
private static void updateClassificationPage(PDPage pdPage,
PDRectangle pdr,
ClassificationPage classificationPage,
CleanRulings cleanRulings,
int pageNumber,
PageInformation pageInformation) {
PDRectangle pdr,
ClassificationPage classificationPage,
CleanRulings cleanRulings,
int pageNumber,
PageInformation pageInformation) {
int rotation = pdPage.getRotation();
boolean isLandscape = pdr.getWidth() > pdr.getHeight() && (rotation == 0 || rotation == 180) || pdr.getHeight() > pdr.getWidth() && (rotation == 90 || rotation == 270);