Merge branch 'CLARI-139' into 'main'

CLAR-139 - fixed outline error for unparsable object

See merge request fforesight/layout-parser!188
This commit is contained in:
Timo Bejan 2024-08-08 16:36:41 +02:00
commit a9287ec406
3 changed files with 21 additions and 3 deletions

View File

@ -82,10 +82,18 @@ public class OutlineExtractorService {
String title = item.getTitle();
PDPage page = item.findDestinationPage(document);
if (page == null) {
PDPage page;
try {
// Can throw: "Error: can't convert to Destination COSArray" for some OCR'd PDFs
page = item.findDestinationPage(document);
if (page == null) {
return Optional.empty();
}
}catch (IOException e){
log.info(String.format("Error occurred during position resolution for outline item with title %s: " + e, title));
return Optional.empty();
}
int pageNumber = document.getPages().indexOf(page);
Optional<Point2D> outlinePosition = Optional.empty();

View File

@ -1,5 +1,6 @@
package com.knecon.fforesight.service.layoutparser.server;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@ -56,6 +57,15 @@ public class OutlineDetectionTest extends AbstractTest {
pdfNetInitializer.init();
}
@Test
@SneakyThrows
public void testOutlineError(){
String fileName = "files/syngenta/CustomerFiles/Clarifynd/VV-470942.pdf";
ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.CLARIFYND);
assertThat(classificationDocument).isNotNull();
}
@Test
@SneakyThrows

@ -1 +1 @@
Subproject commit 21fefb64bf27ca2b3329a6c69d90a27450b17930
Subproject commit c6fd9e849f3efd7d1507401f63629b91dec9f4ec