diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/TableExtractorResponseAdapter.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/TableExtractorResponseAdapter.java index 1cb0b49..4b8d553 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/TableExtractorResponseAdapter.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/python_api/adapter/TableExtractorResponseAdapter.java @@ -50,7 +50,7 @@ public class TableExtractorResponseAdapter { tableCells.setWidth(tableCells.getX1()- tableCells.getX0()); tableCells.setHeight(tableCells.getY1()- tableCells.getY0()); tableCells.setLabel(t.getTable().getLabel()); - log.info("Parsed table cell {}",tableCells); + log.info("Parsed table cell {} with label {}",tableCells, tableCells.getLabel()); parsedTableCells.add(tableCells); t.getObjects().forEach(o -> { TableExtractorCells objectCell = new TableExtractorCells(); @@ -61,7 +61,7 @@ public class TableExtractorResponseAdapter { objectCell.setWidth(objectCell.getX1()- objectCell.getX0()); objectCell.setHeight(objectCell.getY1()- objectCell.getY0()); objectCell.setLabel(o.getLabel()); - log.info("Parsed object cell {}",objectCell); + log.info("Parsed object cell {} with label {}",objectCell, objectCell.getLabel()); parsedTableCells.add(objectCell); }); }); diff --git a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java index 7267f17..435b4ae 100644 --- a/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java +++ b/layoutparser-service/layoutparser-service-processor/src/main/java/com/knecon/fforesight/service/layoutparser/processor/services/visualization/ViewerDocumentService.java @@ -5,7 +5,6 @@ import java.awt.geom.AffineTransform; import java.awt.geom.Rectangle2D; import java.io.IOException; import java.io.OutputStream; -import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -28,19 +27,13 @@ import org.apache.pdfbox.util.Matrix; import org.springframework.stereotype.Service; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; -import com.knecon.fforesight.service.layoutparser.processor.model.table.Rectangle; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredLine; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.ColoredRectangle; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.FilledRectangle; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.LayoutGrid; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.PlacedText; import com.knecon.fforesight.service.layoutparser.processor.model.visualization.VisualizationsOnPage; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTable; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.ExtractedTableData; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorCells; -import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableExtractorData; -import com.knecon.fforesight.service.layoutparser.processor.utils.PdfVisualisationUtility; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; @@ -58,17 +51,19 @@ public class ViewerDocumentService { private final LayoutGridService layoutGridService; - @SneakyThrows - public void createViewerDocument(PDDocument pdDocument, Document document, OutputStream outputStream, Map> extractedTableCells, boolean layerVisibilityDefaultValue) { + public void createViewerDocument(PDDocument pdDocument, + Document document, + OutputStream outputStream, + Map> extractedTableCells, + boolean layerVisibilityDefaultValue) { LayoutGrid layoutGrid = layoutGridService.createLayoutGrid(document); // PDDocument.save() is very slow, since it actually traverses the entire pdf and writes a new one. // If we collect all COSDictionaries we changed and tell it explicitly to only add the changed ones by using saveIncremental it's very fast. Set dictionariesToUpdate = new HashSet<>(); - - PDOptionalContentGroup tableExtractorLayer = addLayerToDocument(pdDocument, dictionariesToUpdate, true); - + PDOptionalContentGroup layer = addLayerToDocument(pdDocument, dictionariesToUpdate, layerVisibilityDefaultValue); + PDOptionalContentGroup visualLayoutParsingLayer = addLayerToDocument(pdDocument, dictionariesToUpdate, true); PDFont font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); for (int pageNumber = 0; pageNumber < pdDocument.getNumberOfPages(); pageNumber++) { @@ -85,9 +80,7 @@ public class ViewerDocumentService { assert pageNumber == visualizationsOnPage.getPageNumber(); // We need to append to the content stream, otherwise the content could be overlapped by following content. try (var contentStream = new PDPageContentStream(pdDocument, pdPage, PDPageContentStream.AppendMode.APPEND, true)) { - - - contentStream.beginMarkedContent(COSName.OC, tableExtractorLayer); + contentStream.beginMarkedContent(COSName.OC, visualLayoutParsingLayer); contentStream.saveGraphicsState(); contentStream.setLineWidth(LINE_WIDTH); @@ -110,7 +103,6 @@ public class ViewerDocumentService { } contentStream.restoreGraphicsState(); contentStream.endMarkedContent(); - } dictionariesToUpdate.add(pdPage.getCOSObject()); dictionariesToUpdate.add(pdPage.getResources().getCOSObject());