RED-4254: Added dirty hack in pdfbox classes to find words that contains uniqueCharacters with 2 chars like 'RA'
This commit is contained in:
parent
6b89572c87
commit
264d7e3a87
@ -301,12 +301,24 @@ class LegacyPDFStreamEngine extends PDFStreamEngine
|
||||
nextY -= pageSize.getLowerLeftY();
|
||||
}
|
||||
|
||||
// This is a hack for unicode letter with 2 chars e.g. RA see unicodeProblem.pdf
|
||||
if(unicodeMapping.length() == 2){
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||
Math.abs(dyDisplay), dxDisplay,
|
||||
Math.abs(spaceWidthDisplay), unicodeMapping, new int[] { code }, font,
|
||||
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(0)), new int[] { code }, font,
|
||||
fontSize,
|
||||
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
|
||||
pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY,
|
||||
Math.abs(dyDisplay), dxDisplay,
|
||||
Math.abs(spaceWidthDisplay), Character.toString(unicodeMapping.charAt(1)), new int[] { code }, font,
|
||||
fontSize,
|
||||
(int)(fontSize * textMatrix.getScalingFactorX())));
|
||||
} else {
|
||||
|
||||
processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), dxDisplay, Math.abs(spaceWidthDisplay), unicodeMapping, new int[]{code}, font, fontSize, (int) (fontSize * textMatrix.getScalingFactorX())));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -807,6 +807,30 @@ public class RedactionIntegrationTest {
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUnicodeProblem() throws IOException {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
AnalyzeRequest request = prepareStorage("files/new/unicodeProblem.pdf");
|
||||
analyzeService.analyzeDocumentStructure(new StructureAnalyzeRequest(request.getDossierId(), request.getFileId()));
|
||||
AnalyzeResult result = analyzeService.analyze(request);
|
||||
|
||||
AnnotateResponse annotateResponse = annotationService.annotate(AnnotateRequest.builder()
|
||||
.dossierId(TEST_DOSSIER_ID)
|
||||
.fileId(TEST_FILE_ID)
|
||||
.build());
|
||||
|
||||
try (FileOutputStream fileOutputStream = new FileOutputStream(OsUtils.getTemporaryDirectory() + "/Annotated.pdf")) {
|
||||
fileOutputStream.write(annotateResponse.getDocument());
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
|
||||
System.out.println("duration: " + (end - start));
|
||||
System.out.println("numberOfPages: " + result.getNumberOfPages());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRotations() throws IOException {
|
||||
|
||||
|
||||
@ -13,3 +13,4 @@ Dorn
|
||||
Prasher
|
||||
David
|
||||
annotation
|
||||
J.B. RASCLE
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user