RED-4254: Added dirty hack in pdfbox classes to find words that contains uniqueCharacters with 2 chars like 'RA'
This commit is contained in:
parent
6b89572c87
commit
264d7e3a87
@ -301,12 +301,24 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
|||||||
nextY -= pageSize.getLowerLeftY();
|
nextY -= pageSize.getLowerLeftY();
|
||||||
}
|
}
|
||||||
|
|
||||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||||
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
if(unicodeMapping.length() == 2){
|
||||||
Math.abs(dyDisplay), dxDisplay,
|
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||||
Math.abs(spaceWidthDisplay), unicodeMapping, new int[] { code }, font,
|
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||||
fontSize,
|
Math.abs(dyDisplay), dxDisplay,
|
||||||
(int)(fontSize * textMatrix.getScalingFactorX())));
|
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(0)), new int[] { code }, font,
|
||||||
|
fontSize,
|
||||||
|
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||||
|
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||||
|
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||||
|
Math.abs(dyDisplay), dxDisplay,
|
||||||
|
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(1)), new int[] { code }, font,
|
||||||
|
fontSize,
|
||||||
|
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||||
|
} else {
|
||||||
|
|
||||||
|
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), dxDisplay, Math.abs(spaceWidthDisplay), unicodeMapping, new int[]{code}, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -807,6 +807,30 @@ public class RedactionIntegrationTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicodeProblem() throws IOException {
|
||||||
|
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
AnalyzeRequest request = prepareStorage("files/new/unicodeProblem.pdf");
|
||||||
|
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||||
|
AnalyzeResult result = analyzeService.analyze(request);
|
||||||
|
|
||||||
|
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||||
|
.dossierId(TEST_DOSSIER_ID)
|
||||||
|
.fileId(TEST_FILE_ID)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||||
|
fileOutputStream.write(annotateResponse.getDocument());
|
||||||
|
}
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
|
||||||
|
System.out.println("duration: " + (end - start));
|
||||||
|
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRotations() throws IOException {
|
public void testRotations() throws IOException {
|
||||||
|
|
||||||
|
|||||||
@ -12,4 +12,5 @@ Xinyi Y. Tao
|
|||||||
Dorn
|
Dorn
|
||||||
Prasher
|
Prasher
|
||||||
David
|
David
|
||||||
annotation
|
annotation
|
||||||
|
J.B. RASCLE
|
||||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user