RED-7461: improve header/footer recognition
This commit is contained in:
parent
3722fff476
commit
2b15fd1d3c
@ -103,7 +103,7 @@ public class LayoutParsingPipeline {
|
||||
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
|
||||
|
||||
try (var out = new ByteArrayOutputStream()) {
|
||||
viewerDocumentService.createViewerDocument(originDocument, documentGraph, out);
|
||||
viewerDocumentService.createViewerDocument(originDocument, documentGraph, out, false);
|
||||
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, out);
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.services;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.MarkedContentUtils;
|
||||
@ -13,6 +14,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.Classification
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.FloatFrequencyCounter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Cell;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.Ruling;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.TablePageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;
|
||||
@ -20,65 +22,61 @@ import com.knecon.fforesight.service.layoutparser.processor.utils.PositionUtils;
|
||||
@Service
|
||||
public class BodyTextFrameService {
|
||||
|
||||
private static final float RULING_HEIGHT_THRESHOLD = 0.15f; // multiplied with page height. Header/Footer Rulings must be within that border of the page.
|
||||
private static final float RULING_WIDTH_THRESHOLD = 0.75f; // multiplied with page width. Header/Footer Rulings must be at least that wide.
|
||||
|
||||
public void setBodyTextFrames(ClassificationDocument classificationDocument, LayoutParsingType layoutParsingType) {
|
||||
|
||||
Rectangle bodyTextFrame = calculateBodyTextFrame(classificationDocument.getPages(), classificationDocument.getFontSizeCounter(), false, layoutParsingType);
|
||||
Rectangle landscapeBodyTextFrame = calculateBodyTextFrame(classificationDocument.getPages(), classificationDocument.getFontSizeCounter(), true, layoutParsingType);
|
||||
for (ClassificationPage page : classificationDocument.getPages()) {
|
||||
setBodyTextFrameAdjustedToPage(page, bodyTextFrame, landscapeBodyTextFrame);
|
||||
var updatedBodyTextFrame = getBodyTextFrameFromRulings(page, bodyTextFrame, landscapeBodyTextFrame);
|
||||
setBodyTextFrameAdjustedToPage(page, updatedBodyTextFrame, updatedBodyTextFrame);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
private Rectangle calculateBodyTextFrameByRulings(List<ClassificationPage> pages) {
|
||||
private Rectangle getBodyTextFrameFromRulings(ClassificationPage page, Rectangle bodyTextFrame, Rectangle landscapeBodyTextFrame) {
|
||||
|
||||
Map<ClassificationPage, List<Ruling>> potentialHeaderRulingsPerPage = new HashMap<>();
|
||||
Map<ClassificationPage, List<Ruling>> potentialFooterRulingsPerPage = new HashMap<>();
|
||||
|
||||
for (var page : pages) {
|
||||
potentialHeaderRulingsPerPage.put(page,
|
||||
page.getCleanRulings()
|
||||
.getHorizontal()
|
||||
.stream()
|
||||
.filter(ruling -> ruling.getY1() > page.getPageHeight() * 0.8)
|
||||
.filter(ruling -> ruling.getWidth() > 0.6 * page.getPageWidth())
|
||||
.toList());
|
||||
potentialFooterRulingsPerPage.put(page,
|
||||
page.getCleanRulings()
|
||||
.getHorizontal()
|
||||
.stream()
|
||||
.filter(ruling -> ruling.getY1() < page.getPageHeight() * 0.2)
|
||||
.filter(ruling -> ruling.getWidth() > 0.6 * page.getPageWidth())
|
||||
.toList());
|
||||
List<Ruling> potentialFooterRulings = getPotentialFooterRulings(page);
|
||||
List<Ruling> potentialHeaderRulings = getPotentialHeaderRulings(page);
|
||||
var x = bodyTextFrame.getTopLeft().getX();
|
||||
var y = bodyTextFrame.getTopLeft().getY();
|
||||
var w = bodyTextFrame.getWidth();
|
||||
var h = bodyTextFrame.getHeight();
|
||||
if (!potentialFooterRulings.isEmpty()) {
|
||||
h = y + h - potentialFooterRulings.get(0).getTop();
|
||||
y = potentialFooterRulings.get(0).getTop();
|
||||
}
|
||||
|
||||
Optional<Ruling> headerRuling = potentialHeaderRulingsPerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.filter(ruling -> potentialHeaderRulingsPerPage.values()
|
||||
.stream()
|
||||
.filter(rulingsPerPage -> rulingsPerPage.stream().anyMatch(ruling::almostMatches))
|
||||
.count() > pages.size() * RULING_THRESHOLD_FACTOR)
|
||||
.min(Comparator.comparingDouble(Ruling::getY1));
|
||||
|
||||
Optional<Ruling> footerRuling = potentialFooterRulingsPerPage.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.filter(ruling -> potentialHeaderRulingsPerPage.values()
|
||||
.stream()
|
||||
.filter(rulingsPerPage -> rulingsPerPage.stream().anyMatch(ruling::almostMatches))
|
||||
.count() > pages.size() * RULING_THRESHOLD_FACTOR)
|
||||
.max(Comparator.comparingDouble(Ruling::getY1));
|
||||
|
||||
double maxY = headerRuling.isPresent() ? headerRuling.get().y1 : pages.stream().mapToDouble(ClassificationPage::getPageHeight).max().orElse(Double.MAX_VALUE);
|
||||
double minY = footerRuling.map(ruling -> ruling.y1).orElse(0F);
|
||||
double maxX = pages.stream().mapToDouble(ClassificationPage::getPageWidth).max().orElse(Double.MAX_VALUE);
|
||||
|
||||
return new Rectangle(new Point((float) maxX, (float) maxY), (float) 0, (float) minY, -1);
|
||||
if (!potentialHeaderRulings.isEmpty()) {
|
||||
h = potentialHeaderRulings.get(0).getBottom() - bodyTextFrame.getTopLeft().getY();
|
||||
}
|
||||
return new Rectangle(new Point(x, y), w, h, page.getPageNumber());
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> getPotentialFooterRulings(ClassificationPage page) {
|
||||
|
||||
return page.getCleanRulings()
|
||||
.getHorizontal()
|
||||
.stream()
|
||||
.filter(ruling -> ruling.getY1() < page.getPageHeight() * RULING_HEIGHT_THRESHOLD)
|
||||
.filter(ruling -> ruling.getWidth() > RULING_WIDTH_THRESHOLD * page.getPageWidth())
|
||||
.sorted(Comparator.comparingDouble(Ruling::getTop))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
private List<Ruling> getPotentialHeaderRulings(ClassificationPage page) {
|
||||
|
||||
return page.getCleanRulings()
|
||||
.getHorizontal()
|
||||
.stream()
|
||||
.filter(ruling -> ruling.getY1() > page.getPageHeight() * (1 - RULING_HEIGHT_THRESHOLD))
|
||||
.filter(ruling -> ruling.getWidth() > RULING_WIDTH_THRESHOLD * page.getPageWidth())
|
||||
.sorted(Comparator.comparingDouble(Ruling::getBottom).reversed())
|
||||
.toList();
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@ -129,10 +127,10 @@ public class BodyTextFrameService {
|
||||
* @param landscape Calculate for landscape or portrait
|
||||
* @return Rectangle of the text frame
|
||||
*/
|
||||
private Rectangle calculateBodyTextFrame(List<ClassificationPage> pages,
|
||||
FloatFrequencyCounter documentFontSizeCounter,
|
||||
boolean landscape,
|
||||
LayoutParsingType layoutParsingType) {
|
||||
protected Rectangle calculateBodyTextFrame(List<ClassificationPage> pages,
|
||||
FloatFrequencyCounter documentFontSizeCounter,
|
||||
boolean landscape,
|
||||
LayoutParsingType layoutParsingType) {
|
||||
|
||||
float approximateHeaderLineCount;
|
||||
if (layoutParsingType.equals(LayoutParsingType.TAAS)) {
|
||||
|
||||
@ -39,8 +39,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@RequiredArgsConstructor
|
||||
public class ViewerDocumentService {
|
||||
|
||||
private static final String layerName = "Layout grid";
|
||||
|
||||
private static final String LAYER_NAME = "Layout grid";
|
||||
private static final int FONT_SIZE = 10;
|
||||
public static final float LINE_WIDTH = 1f;
|
||||
|
||||
@ -48,14 +48,14 @@ public class ViewerDocumentService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream) {
|
||||
public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream, boolean layerVisibilityDefaultValue) {
|
||||
|
||||
log.info("Start Viewer Document Creation");
|
||||
LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document);
|
||||
// PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one.
|
||||
// If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones by using saveIncremental it's very fast.
|
||||
Set<COSDictionary> dictionariesToUpdate = new HashSet<>();
|
||||
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate);
|
||||
PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate, layerVisibilityDefaultValue);
|
||||
PDFont font = PDType1Font.HELVETICA;
|
||||
|
||||
for (int pageNumber = 0; pageNumber < pdDocument.getNumberOfPages(); pageNumber++) {
|
||||
@ -119,6 +119,7 @@ public class ViewerDocumentService {
|
||||
dictionariesToUpdate.add(pdPage.getResources().getCOSObject());
|
||||
}
|
||||
dictionariesToUpdate.add(pdDocument.getDocumentInformation().getCOSObject());
|
||||
// dictionariesToUpdate.add(pdDocument.getDocument().getTrailer());
|
||||
pdDocument.saveIncremental(outputStream, dictionariesToUpdate);
|
||||
log.info("Saved Viewer Document");
|
||||
}
|
||||
@ -145,7 +146,7 @@ public class ViewerDocumentService {
|
||||
}
|
||||
|
||||
|
||||
private static PDOptionalContentGroup addLayerToDocument(PDDocument pdDocument, Set<COSDictionary> dictionariesToUpdate) {
|
||||
private static PDOptionalContentGroup addLayerToDocument(PDDocument pdDocument, Set<COSDictionary> dictionariesToUpdate, boolean layerVisibilityDefaultValue) {
|
||||
|
||||
PDDocumentCatalog catalog = pdDocument.getDocumentCatalog();
|
||||
PDOptionalContentProperties ocprops = catalog.getOCProperties();
|
||||
@ -154,13 +155,13 @@ public class ViewerDocumentService {
|
||||
catalog.setOCProperties(ocprops);
|
||||
}
|
||||
PDOptionalContentGroup layer = null;
|
||||
if (ocprops.hasGroup(layerName)) {
|
||||
layer = ocprops.getGroup(layerName);
|
||||
if (ocprops.hasGroup(LAYER_NAME)) {
|
||||
layer = ocprops.getGroup(LAYER_NAME);
|
||||
} else {
|
||||
layer = new PDOptionalContentGroup(layerName);
|
||||
layer = new PDOptionalContentGroup(LAYER_NAME);
|
||||
ocprops.addGroup(layer);
|
||||
}
|
||||
ocprops.setGroupEnabled(layer, false);
|
||||
ocprops.setGroupEnabled(layer, layerVisibilityDefaultValue);
|
||||
dictionariesToUpdate.add(catalog.getCOSObject());
|
||||
return layer;
|
||||
}
|
||||
|
||||
@ -12,10 +12,11 @@ import org.springframework.core.io.ClassPathResource;
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentDataTests extends BuildDocumentGraphTest{
|
||||
public class DocumentDataTests extends BuildDocumentTest {
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void createDocumentDataForAllFiles() {
|
||||
|
||||
@ -20,10 +20,11 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.model.ima
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
|
||||
public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
|
||||
@ -16,11 +16,12 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Ta
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentGraphMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.PropertiesMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
|
||||
public class DocumentGraphMappingTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
|
||||
@ -13,13 +13,14 @@ import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class DocumentGraphVisualizationTest extends BuildDocumentGraphTest {
|
||||
public class DocumentGraphVisualizationTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
|
||||
@ -12,10 +12,11 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.ViewerDocumentService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class ViewerDocumentTest extends BuildDocumentGraphTest {
|
||||
public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
@ -28,7 +29,7 @@ public class ViewerDocumentTest extends BuildDocumentGraphTest {
|
||||
Document document = buildGraph(fileName, LayoutParsingType.DOCUMINE);
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
try (var pdDocument = Loader.loadPDF(new ClassPathResource(fileName).getInputStream()); var out = new FileOutputStream(tmpFileName)) {
|
||||
viewerDocumentService.createViewerDocument(pdDocument, document, out);
|
||||
viewerDocumentService.createViewerDocument(pdDocument, document, out, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.services;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.visualizations.PdfDraw;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
class BodyTextFrameServiceTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
public void testCalculateBodyTextFrame() {
|
||||
|
||||
String filename = "files/211.pdf";
|
||||
String outputFilename = "/tmp/" + Path.of(filename).getFileName() + "_MAINBODY.pdf";
|
||||
ClassificationDocument document = parseLayout(filename, LayoutParsingType.TAAS);
|
||||
PdfDraw.drawRectanglesPerPage(filename,
|
||||
document.getPages().stream().map(page -> List.of(RectangleTransformations.toRectangle2D(page.getBodyTextFrame()))).toList(),
|
||||
outputFilename);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -5,7 +5,6 @@ import java.util.Collections;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.PageContents;
|
||||
@ -19,13 +18,13 @@ import lombok.SneakyThrows;
|
||||
public class RulingCleaningServiceTest {
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
// @Disabled
|
||||
@SneakyThrows
|
||||
public void textRulingExtraction() {
|
||||
|
||||
String fileName = "files/BASF/2013-1110704.pdf";
|
||||
String fileName = "files/211.pdf";
|
||||
String lineFileName = "/tmp/" + Path.of(fileName).getFileName().toString() + "_LINES.pdf";
|
||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents("files/BASF/2013-1110704.pdf");
|
||||
List<PageContents> pageContents = PageContentExtractor.getSortedPageContents(fileName);
|
||||
PdfDraw.drawLinesPerPage(fileName, pageContents.stream().map(PageContents::getRulings).toList(), lineFileName);
|
||||
|
||||
RulingCleaningService rulingCleaningService = new RulingCleaningService();
|
||||
|
||||
@ -1,39 +1,35 @@
|
||||
package com.knecon.fforesight.service.layoutparser.server.graph;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
package com.knecon.fforesight.service.layoutparser.server.utils;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipeline;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
|
||||
public class BuildDocumentGraphTest extends AbstractTest {
|
||||
public abstract class BuildDocumentTest extends AbstractTest {
|
||||
|
||||
@Autowired
|
||||
protected LayoutParsingPipeline layoutParsingPipeline;
|
||||
|
||||
|
||||
@Test
|
||||
@Disabled
|
||||
public void buildMetolachlor() {
|
||||
@SneakyThrows
|
||||
protected ClassificationDocument parseLayout(String filename, LayoutParsingType layoutParsingType) {
|
||||
|
||||
Document documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf");
|
||||
assertEquals(221, documentGraph.getPages().size());
|
||||
assertEquals(220, documentGraph.getPages().stream().filter(page -> page.getHeader().hasText()).count());
|
||||
assertEquals(0, documentGraph.getPages().stream().filter(page -> page.getFooter().hasText()).count());
|
||||
ClassPathResource fileResource = new ClassPathResource(filename);
|
||||
prepareStorage(filename);
|
||||
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType, pdDocument, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -52,14 +48,9 @@ public class BuildDocumentGraphTest extends AbstractTest {
|
||||
} else {
|
||||
prepareStorage(filename);
|
||||
}
|
||||
ClassPathResource fileResource = new ClassPathResource(filename);
|
||||
|
||||
try (InputStream inputStream = fileResource.getInputStream(); PDDocument pdDocument = Loader.loadPDF(inputStream)) {
|
||||
return DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(layoutParsingType,
|
||||
pdDocument,
|
||||
layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID),
|
||||
new TableServiceResponse()));
|
||||
}
|
||||
return DocumentGraphFactory.buildDocumentGraph(parseLayout(filename, layoutParsingType));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user