From 007cbfd1eedddee46538c1b069b87f909a4b7a41 Mon Sep 17 00:00:00 2001 From: Kilian Schuettler Date: Fri, 8 Mar 2024 12:42:40 +0100 Subject: [PATCH] RED-7384: Fixes for ClassCastException * changed save -> incrementalSave * always use origin file instead of reusing viewerdoc * Sometimes the viewer document is corrupted after saving and missing the contentstreams on a random page, for the files we viewed it did not seem to happen with incrementalSave.might only be a timing issue though --- .../layoutparser/processor/LayoutParsingPipeline.java | 6 ++++-- .../processor/model/text/RedTextPosition.java | 9 +++++++-- .../service/viewerdoc/service/ViewerDocumentService.java | 8 +++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java index 053fbdf..9f4afcb 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/LayoutParsingPipeline.java @@ -94,8 +94,10 @@ public class LayoutParsingPipeline { log.info("Starting layout parsing for {}", layoutParsingRequest.identifier()); File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId()); - File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()) - .orElse(originFile); +// File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()) +// .orElse(originFile); + File viewerDocumentFile = originFile; + ImageServiceResponse imageServiceResponse = new ImageServiceResponse(); if (layoutParsingRequest.imagesFileStorageId() diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java index ccea113..8826d7b 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/text/RedTextPosition.java @@ -1,7 +1,6 @@ package com.knecon.fforesight.service.layoutparser.processor.model.text; import org.apache.pdfbox.text.TextPosition; -import org.springframework.beans.BeanUtils; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -50,7 +49,13 @@ public class RedTextPosition { public static RedTextPosition fromTextPosition(TextPosition textPosition) { var pos = new RedTextPosition(); - BeanUtils.copyProperties(textPosition, pos); + pos.setRotation(textPosition.getRotation()); + pos.setPageHeight(textPosition.getPageHeight()); + pos.setPageWidth(textPosition.getPageWidth()); + pos.setUnicode(textPosition.getUnicode()); + pos.setDir(textPosition.getDir()); + pos.setWidthOfSpace(textPosition.getWidthOfSpace()); + pos.setFontSizeInPt(textPosition.getFontSizeInPt()); pos.setFontName(textPosition.getFont().getName()); pos.setFontSizeInPt(textPosition.getFontSizeInPt()); diff --git a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocumentService.java b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocumentService.java index cc63845..1585f9d 100644 --- a/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocumentService.java +++ b/layoutparser-service/viewer-doc-processor/src/main/java/com/knecon/fforesight/service/viewerdoc/service/ViewerDocumentService.java @@ -126,8 +126,8 @@ public class ViewerDocumentService { pdDocument = openPDDocument(tmpFile.toFile()); } } - observedIncrementalSave(pdDocument, destinationFile); + observedIncrementalSave(pdDocument, destinationFile); pdDocument.close(); assert tmpFile.toFile().delete(); } @@ -282,10 +282,12 @@ public class ViewerDocumentService { @SneakyThrows private void observedIncrementalSave(PDDocument pdDocument, File outputFile) { - + /* + Sometimes the viewer document is corrupted after saving and missing the content streams on a random page, for the files we viewed it did not seem to happen with incrementalSave. It might only be a timing issue though + */ Observation.createNotStarted("ViewerDocumentService", registry).contextualName("incremental-save").observe(() -> { try (var out = new FileOutputStream(outputFile)) { - pdDocument.save(out); + pdDocument.saveIncremental(out); } catch (IOException e) { throw new RuntimeException(e); }