Pull request #2: RED-5911 Bugfix for removed texts within tables

Merge in RED/ocr-service from RED-5911 to master

* commit '0e8dfed4410b28a6316f8e328fc166339852565f':
  RED-5911 Bugfix for removed texts within tables
This commit is contained in:
Philipp Schramm 2023-01-04 15:02:15 +01:00
commit e535861da8
2 changed files with 4 additions and 2 deletions

View File

@ -285,7 +285,7 @@ public class OCRService {
var gState = element.getGState();
//See PDF Reference 5.3 Text rendering modes, 3 = Invisible, however this ocr does not use it.
if (!filledRectangleIntersection && gState.getTextRenderMode() != 3) {
if (!filledRectangleIntersection && gState.getTextRenderMode() != 3 || filledRectangleIntersection && gState.getTextRenderMode() == 0) {
writer.writeElement(element);
}
}
@ -294,10 +294,10 @@ public class OCRService {
@SneakyThrows
private void processPath(Element element, ElementWriter writer, Set<Rect> filledRectangles) {
writer.writeElement(element);
if (element.getPathData() != null && element.getPathData().getPoints().length > 4) {
filledRectangles.add(element.getBBox());
}
writer.writeElement(element);
}

View File

@ -76,6 +76,8 @@ public class OcrServiceIntegrationTest {
var out = FileUtils.openOutputStream(new File(getTemporaryDirectory() + "/" + fileName + ".pdf"));
IOUtils.copy(response, out);
System.out.println("File:" + getTemporaryDirectory() + "/" + fileName + ".pdf");
}