RED-6126: performance-test
*re-enabled overlap detection *re-creating helper document for every page instead of reusing and adding/removing pages
This commit is contained in:
parent
e705f869fd
commit
6ccf3f80fc
@ -89,7 +89,7 @@ public class InvisibleElementRemovalService {
|
||||
|
||||
context.visitedXObjIds().clear();
|
||||
|
||||
// removeOverlappedElements(page, writer, context);
|
||||
removeOverlappedElements(page, writer, context);
|
||||
}
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.REMOVE_UNUSED, null);
|
||||
}
|
||||
|
||||
@ -6,7 +6,6 @@ import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Map;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import org.springframework.amqp.rabbit.core.RabbitTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@ -23,6 +22,7 @@ import com.pdftron.pdf.Page;
|
||||
import com.pdftron.pdf.RectCollection;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import io.micrometer.core.annotation.Timed;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -81,15 +81,16 @@ public class OCRService {
|
||||
|
||||
Map<Integer, RectCollection> pageIdToRectCollection = imagePositionRetrievalService.getImagePositionPerPage(pdfDoc, true);
|
||||
|
||||
OCROptions options = new OCROptions();
|
||||
PDFDoc ocrPageDoc = new PDFDoc();
|
||||
|
||||
int numProcessedPages = 0;
|
||||
for (Integer pageId : pageIdToRectCollection.keySet()) {
|
||||
try {
|
||||
// optimization by only scanning pages that contain images
|
||||
// optimization: creating a new document is faster than reusing the same and adding/removing pages one by one
|
||||
OCROptions options = new OCROptions();
|
||||
PDFDoc ocrPageDoc = new PDFDoc();
|
||||
// optimization: only scanning pages that contain images
|
||||
Page pdfPage = pdfDoc.getPage(pageId);
|
||||
pdfPage.setMediaBox(pdfPage.getCropBox()); // this line ensures the ocr text is placed correctly by PDFTron
|
||||
// optimization: this line ensures the ocr text is placed correctly by PDFTron
|
||||
pdfPage.setMediaBox(pdfPage.getCropBox());
|
||||
ocrPageDoc.pagePushBack(pdfPage);
|
||||
options.addTextZonesForPage(pageIdToRectCollection.get(pageId), 1);
|
||||
options.addLang(ENGLISH);
|
||||
@ -109,7 +110,7 @@ public class OCRService {
|
||||
Page ocrPage = ocrPageDoc.getPage(1);
|
||||
pdfDoc.pageInsert(pdfDoc.getPageIterator(pageId), ocrPage);
|
||||
pdfDoc.pageRemove(pdfDoc.getPageIterator(pageId + 1));
|
||||
ocrPageDoc.pageRemove(ocrPageDoc.getPageIterator(1));
|
||||
ocrPageDoc.close();
|
||||
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
|
||||
@ -124,8 +125,6 @@ public class OCRService {
|
||||
}
|
||||
}
|
||||
|
||||
ocrPageDoc.close();
|
||||
|
||||
rabbitTemplate.convertAndSend(MessagingConfiguration.OCR_STATUS_UPDATE_RESPONSE_QUEUE,
|
||||
objectMapper.writeValueAsString(OCRStatusUpdateResponse.builder()
|
||||
.fileId(fileId)
|
||||
@ -137,4 +136,5 @@ public class OCRService {
|
||||
Optimizer.optimize(pdfDoc);
|
||||
pdfDoc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,16 @@
|
||||
<Configuration>
|
||||
|
||||
<Appenders>
|
||||
<Console name="CONSOLE" target="SYSTEM_OUT">
|
||||
<PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/>
|
||||
</Console>
|
||||
</Appenders>
|
||||
|
||||
<Loggers>
|
||||
<Root level="warn">
|
||||
<AppenderRef ref="CONSOLE"/>
|
||||
</Root>
|
||||
<Logger name="com.iqser" level="info"/>
|
||||
</Loggers>
|
||||
|
||||
</Configuration>
|
||||
Loading…
x
Reference in New Issue
Block a user