Pull request #161: Fixed missing whitespaces

Merge in RED/redaction-service from fixMissingSpaces to master

* commit '35dec94ccdfc569f24d6983c5b2810dfd0f80e7c':
  Fixed missing whitespaces
This commit is contained in:
Dominique Eiflaender 2021-05-28 15:04:47 +02:00
commit b7d399c461
2 changed files with 23 additions and 0 deletions

View File

@ -46,6 +46,17 @@ public class PDFAreaTextStripper extends PDFTextStripperByArea {
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0")) && i <= textPositions.size() - 2) {

View File

@ -300,6 +300,18 @@ public class PDFLinesTextStripper extends PDFTextStripper {
startIndex = i;
}
if (textPositions.get(i).getRotation() == 0 && i > 0 && textPositions.get(i).getX() > textPositions.get(i - 1).getEndX() + 1) {
List<TextPosition> sublist = textPositions.subList(startIndex, i);
if (!(sublist.isEmpty() || sublist.size() == 1 && (sublist.get(0)
.getUnicode()
.equals(" ") || sublist.get(0).getUnicode().equals("\u00A0")))) {
textPositionSequences.add(new TextPositionSequence(sublist, pageNumber));
}
startIndex = i;
}
if (i > 0 && (textPositions.get(i).getUnicode().equals(" ") || textPositions.get(i)
.getUnicode()
.equals("\u00A0")) && i <= textPositions.size() - 2) {