From 5b6a706c28581cb057f06fe4dd7f88960d64a585 Mon Sep 17 00:00:00 2001 From: Timo Bejan Date: Thu, 8 Aug 2024 16:17:44 +0300 Subject: [PATCH] CLAR-139 - fixed outline error for unparsable object --- .../model/outline/OutlineExtractorService.java | 12 ++++++++++-- .../layoutparser/server/OutlineDetectionTest.java | 10 ++++++++++ .../src/test/resources/files/syngenta | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java index 3cc94ce..99a34c4 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/model/outline/OutlineExtractorService.java @@ -82,10 +82,18 @@ public class OutlineExtractorService { String title = item.getTitle(); - PDPage page = item.findDestinationPage(document); - if (page == null) { + PDPage page; + try { + // Can throw: "Error: can't convert to Destination COSArray" for some OCR'd PDFs + page = item.findDestinationPage(document); + if (page == null) { + return Optional.empty(); + } + }catch (IOException e){ + log.info(String.format("Error occurred during position resolution for outline item with title %s: " + e, title)); return Optional.empty(); } + int pageNumber = document.getPages().indexOf(page); Optional outlinePosition = Optional.empty(); diff --git a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java index 0ed162b..0f486eb 100644 --- a/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java +++ b/layoutparser-service/layoutparser-service-server/src/test/java/com/knecon/fforesight/service/layoutparser/server/OutlineDetectionTest.java @@ -1,5 +1,6 @@ package com.knecon.fforesight.service.layoutparser.server; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -56,6 +57,15 @@ public class OutlineDetectionTest extends AbstractTest { pdfNetInitializer.init(); } + @Test + @SneakyThrows + public void testOutlineError(){ + String fileName = "files/syngenta/CustomerFiles/Clarifynd/VV-470942.pdf"; + + ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.CLARIFYND); + assertThat(classificationDocument).isNotNull(); + } + @Test @SneakyThrows diff --git a/layoutparser-service/layoutparser-service-server/src/test/resources/files/syngenta b/layoutparser-service/layoutparser-service-server/src/test/resources/files/syngenta index 21fefb6..c6fd9e8 160000 --- a/layoutparser-service/layoutparser-service-server/src/test/resources/files/syngenta +++ b/layoutparser-service/layoutparser-service-server/src/test/resources/files/syngenta @@ -1 +1 @@ -Subproject commit 21fefb64bf27ca2b3329a6c69d90a27450b17930 +Subproject commit c6fd9e849f3efd7d1507401f63629b91dec9f4ec