Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef23ee0ade | ||
|
|
af31f52b47 | ||
|
|
b5152112ee | ||
|
|
85ea4ef455 | ||
|
|
01f8c01fff | ||
|
|
0b6a292c75 | ||
|
|
e24020589c | ||
|
|
c619b845e8 |
@ -8,13 +8,20 @@ import lombok.Builder;
|
||||
@Builder
|
||||
@Schema(description = "Object containing information about the layout parsing.")
|
||||
public record LayoutParsingFinishedEvent(
|
||||
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
|
||||
Map<String, String> identifier,//
|
||||
@Schema(description = "The duration of a single layout parsing in ms.")
|
||||
long duration,//
|
||||
@Schema(description = "The number of pages of the parsed document.")
|
||||
int numberOfPages,//
|
||||
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
|
||||
String message) {
|
||||
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") //
|
||||
Map<String, String> identifier,
|
||||
|
||||
@Schema(description = "The duration of a single layout parsing in ms.") //
|
||||
long duration,
|
||||
|
||||
@Schema(description = "The number of pages of the parsed document.") //
|
||||
int numberOfPages,
|
||||
|
||||
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") //
|
||||
String message,
|
||||
|
||||
@Schema(description = "The app version of the layout parser.") //
|
||||
String layoutParserVersion
|
||||
) {
|
||||
|
||||
}
|
||||
|
||||
@ -13,9 +13,8 @@ import lombok.experimental.FieldDefaults;
|
||||
@Configuration
|
||||
@ConfigurationProperties("layoutparser")
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class LayoutparserSettings {
|
||||
public class LayoutParserSettings {
|
||||
|
||||
boolean debug;
|
||||
LayoutParsingType layoutParsingTypeOverride;
|
||||
String pdftronLicense;
|
||||
}
|
||||
@ -20,6 +20,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper;
|
||||
@ -87,29 +88,32 @@ import lombok.extern.slf4j.Slf4j;
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class LayoutParsingPipeline {
|
||||
|
||||
ImageServiceResponseAdapter imageServiceResponseAdapter;
|
||||
CvTableParsingAdapter cvTableParsingAdapter;
|
||||
LayoutParsingStorageService layoutParsingStorageService;
|
||||
SectionsBuilderService sectionsBuilderService;
|
||||
SimplifiedSectionTextService simplifiedSectionTextService;
|
||||
RulingCleaningService rulingCleaningService;
|
||||
TableExtractionService tableExtractionService;
|
||||
DocuMineBlockificationService docuMineBlockificationService;
|
||||
RedactManagerBlockificationService redactManagerBlockificationService;
|
||||
BlockificationPostprocessingService blockificationPostprocessingService;
|
||||
DocstrumBlockificationService docstrumBlockificationService;
|
||||
LayoutGridService layoutGridService;
|
||||
ObservationRegistry observationRegistry;
|
||||
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||
GraphicExtractorService graphicExtractorService;
|
||||
OutlineExtractorService outlineExtractorService;
|
||||
SectionTreeBuilderService sectionTreeBuilderService;
|
||||
SectionTreeEnhancementService sectionTreeEnhancementService;
|
||||
LayoutparserSettings settings;
|
||||
ClassificationService classificationService;
|
||||
final ImageServiceResponseAdapter imageServiceResponseAdapter;
|
||||
final CvTableParsingAdapter cvTableParsingAdapter;
|
||||
final LayoutParsingStorageService layoutParsingStorageService;
|
||||
final SectionsBuilderService sectionsBuilderService;
|
||||
final SimplifiedSectionTextService simplifiedSectionTextService;
|
||||
final RulingCleaningService rulingCleaningService;
|
||||
final TableExtractionService tableExtractionService;
|
||||
final DocuMineBlockificationService docuMineBlockificationService;
|
||||
final RedactManagerBlockificationService redactManagerBlockificationService;
|
||||
final BlockificationPostprocessingService blockificationPostprocessingService;
|
||||
final DocstrumBlockificationService docstrumBlockificationService;
|
||||
final LayoutGridService layoutGridService;
|
||||
final ObservationRegistry observationRegistry;
|
||||
final VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||
final GraphicExtractorService graphicExtractorService;
|
||||
final OutlineExtractorService outlineExtractorService;
|
||||
final SectionTreeBuilderService sectionTreeBuilderService;
|
||||
final SectionTreeEnhancementService sectionTreeEnhancementService;
|
||||
final LayoutParserSettings settings;
|
||||
final ClassificationService classificationService;
|
||||
|
||||
@Value("${LAYOUT_PARSER_VERSION:}")
|
||||
private String layoutParserVersion;
|
||||
|
||||
|
||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||
@ -118,17 +122,23 @@ public class LayoutParsingPipeline {
|
||||
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
||||
|
||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
|
||||
.orElse(originFile);
|
||||
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
|
||||
.map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse());
|
||||
.map(layoutParsingStorageService::getVisualLayoutParsingFile)
|
||||
.orElse(new VisualLayoutParsingResponse());
|
||||
ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
|
||||
.map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse());
|
||||
.map(layoutParsingStorageService::getImagesFile)
|
||||
.orElse(new ImageServiceResponse());
|
||||
TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
|
||||
.map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse());
|
||||
.map(layoutParsingStorageService::getTablesFile)
|
||||
.orElse(new TableServiceResponse());
|
||||
|
||||
ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
|
||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
|
||||
LayoutParsingType layoutParsingType = settings.getLayoutParsingTypeOverride() == null //
|
||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride();
|
||||
|
||||
ClassificationDocument classificationDocument = parseLayout(layoutParsingType,
|
||||
originFile,
|
||||
imageServiceResponse,
|
||||
tableServiceResponse,
|
||||
@ -137,18 +147,19 @@ public class LayoutParsingPipeline {
|
||||
|
||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||
|
||||
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
|
||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
|
||||
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(layoutParsingType, classificationDocument);
|
||||
|
||||
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
||||
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, false);
|
||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, layoutParsingType, layoutParserVersion, false);
|
||||
|
||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||
|
||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document()));
|
||||
if (layoutParsingRequest.documentMarkdownFileStorageId().isPresent()) {
|
||||
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId().get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
|
||||
if (layoutParsingRequest.documentMarkdownFileStorageId()
|
||||
.isPresent()) {
|
||||
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId()
|
||||
.get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
|
||||
}
|
||||
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document()));
|
||||
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
|
||||
@ -188,6 +199,7 @@ public class LayoutParsingPipeline {
|
||||
layoutParsingRequest.pageFileStorageId(),
|
||||
layoutParsingRequest.simplifiedTextStorageId(),
|
||||
layoutParsingRequest.viewerDocumentStorageId()))
|
||||
.layoutParserVersion(layoutParserVersion)
|
||||
.build();
|
||||
|
||||
}
|
||||
@ -385,7 +397,8 @@ public class LayoutParsingPipeline {
|
||||
.flatMap(Collection::stream)
|
||||
.map(Character::getTextPosition)
|
||||
.filter(pos -> pos.getDir().equals(dir))
|
||||
.mapToDouble(RedTextPosition::getExactDir).average().orElse(0);
|
||||
.mapToDouble(RedTextPosition::getExactDir).average()
|
||||
.orElse(0);
|
||||
|
||||
if (averageRotation == 0) {
|
||||
continue;
|
||||
|
||||
@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
|
||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||
public class Line extends TextBoundingBox {
|
||||
|
||||
private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
|
||||
private static final double WORD_DISTANCE_MULTIPLIER = 0.17;
|
||||
|
||||
@EqualsAndHashCode.Include
|
||||
private final double x0;
|
||||
@ -157,6 +157,9 @@ public class Line extends TextBoundingBox {
|
||||
|
||||
private void computeWords(List<Character> characters, double wordSpacing) {
|
||||
|
||||
// Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
|
||||
// If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
|
||||
// I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
|
||||
Word word = new Word();
|
||||
Character previous = null;
|
||||
for (Character current : characters) {
|
||||
|
||||
@ -71,6 +71,9 @@ public class TableOfContentsClassificationService {
|
||||
|
||||
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
|
||||
|
||||
if (start >= textBlocks.size()) {
|
||||
return start;
|
||||
}
|
||||
ClassificationPage startPage = textBlocks.get(start).page();
|
||||
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
|
||||
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();
|
||||
|
||||
@ -10,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.OutlineMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid;
|
||||
@ -32,9 +33,15 @@ public class LayoutGridService {
|
||||
|
||||
@SneakyThrows
|
||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||
public void addLayoutGrid(File originFile, DocumentWithVisualization document, File destinationFile, boolean layerVisibilityDefaultValue) {
|
||||
public void addLayoutGrid(File originFile,
|
||||
DocumentWithVisualization document,
|
||||
File destinationFile,
|
||||
LayoutParsingType layoutParsingType,
|
||||
String layoutParserVersion,
|
||||
boolean layerVisibilityDefaultValue) {
|
||||
|
||||
LayoutGrid layoutGrid = createLayoutGrid(document.document());
|
||||
String layoutParsingTypeName = layoutParsingType.name();
|
||||
LayoutGrid layoutGrid = createLayoutGrid(document.document(), layoutParserVersion, layoutParsingTypeName);
|
||||
Outline outline = OutlineMapper.createOutline(document.document());
|
||||
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
|
||||
|
||||
@ -42,16 +49,23 @@ public class LayoutGridService {
|
||||
document.layoutDebugLayer().addOutlineHeadlines(document.document());
|
||||
|
||||
if (document.layoutDebugLayer().isActive()) {
|
||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.layoutDebugLayer()), outline);
|
||||
viewerDocumentService.addLayerGroups(originFile,
|
||||
destinationFile,
|
||||
List.of(layoutGrid, document.layoutDebugLayer()),
|
||||
layoutParserVersion,
|
||||
layoutParsingTypeName,
|
||||
outline);
|
||||
} else {
|
||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), outline);
|
||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), layoutParserVersion, layoutParsingTypeName, outline);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private LayoutGrid createLayoutGrid(Document document) {
|
||||
private LayoutGrid createLayoutGrid(Document document, String layoutParserVersion, String layoutParsingType) {
|
||||
|
||||
LayoutGrid layoutGrid = new LayoutGrid();
|
||||
layoutGrid.addVersionAndLayoutParsingType(layoutParserVersion, layoutParsingType, document.getFirstPage());
|
||||
|
||||
document.streamAllSubNodes()
|
||||
.peek(layoutGrid::addTreeId)
|
||||
.forEach(semanticNode -> {
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.visualization;
|
||||
|
||||
import java.awt.Color;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.geom.Line2D;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
@ -25,6 +26,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
|
||||
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
|
||||
@ -89,10 +93,19 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
public void addTreeId(SemanticNode semanticNode) {
|
||||
|
||||
Page page = semanticNode.getFirstPage();
|
||||
if (semanticNode.getBBox().get(page) == null) {
|
||||
if (semanticNode.getBBox()
|
||||
.get(page) == null) {
|
||||
return;
|
||||
}
|
||||
addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR);
|
||||
addPlacedText(page,
|
||||
semanticNode.getBBox()
|
||||
.get(page),
|
||||
semanticNode.getBBox()
|
||||
.get(page),
|
||||
buildTreeIdString(semanticNode),
|
||||
1,
|
||||
treeIds,
|
||||
TREEID_COLOR);
|
||||
}
|
||||
|
||||
|
||||
@ -121,7 +134,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
.toList();
|
||||
Integer maxChildDepth = subSections.stream()
|
||||
.map(node -> node.getTreeId().size())
|
||||
.max(Integer::compareTo).orElse(section.getTreeId().size());
|
||||
.max(Integer::compareTo)
|
||||
.orElse(section.getTreeId().size());
|
||||
int ownDepth = section.getTreeId().size();
|
||||
|
||||
Page firstPage = section.getFirstPage();
|
||||
@ -307,7 +321,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
Visualizations visualizations = semanticNode.getType().equals(NodeType.TABLE_OF_CONTENTS) ? toc : sections;
|
||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
|
||||
int lineWidthModifier = maxChildDepth - ownDepth;
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||
|
||||
SemanticNode highestParent = semanticNode.getHighestParent();
|
||||
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
|
||||
@ -356,7 +371,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
List<Double> ys = yStream.collect(Collectors.toList());
|
||||
ys.remove(0);
|
||||
|
||||
Rectangle2D tableBBox = table.getBBox().get(page);
|
||||
Rectangle2D tableBBox = table.getBBox()
|
||||
.get(page);
|
||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
|
||||
|
||||
xs.forEach(x -> {
|
||||
@ -398,6 +414,21 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
||||
}
|
||||
|
||||
|
||||
public void addVersionAndLayoutParsingType(String version, String layoutParsingType, Page page) {
|
||||
|
||||
PageInformation pageInformation = PageInformation.fromPage(page);
|
||||
double startHeight = pageInformation.heightRot() - 5;
|
||||
Point2D point1 = new Point2D.Double(0, startHeight);
|
||||
Point2D point2 = new Point2D.Double(0, startHeight - FONT_SIZE * 1.5);
|
||||
AffineTransform affineTransform = CoordinateTransforms.calculatePageCoordsToInitialUserSpaceCoords(pageInformation);
|
||||
affineTransform.transform(point1, point1);
|
||||
affineTransform.transform(point2, point2);
|
||||
getOrCreateVisualizationsOnPage(page.getNumber(), this.versionAndType).getPlacedTexts()
|
||||
.addAll(List.of(PlacedText.textFacingUp(String.valueOf(version), point1, FONT_SIZE, Color.BLACK, FONT),
|
||||
PlacedText.textFacingUp(String.valueOf(layoutParsingType), point2, FONT_SIZE, Color.BLACK, FONT)));
|
||||
}
|
||||
|
||||
|
||||
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
|
||||
|
||||
}
|
||||
|
||||
@ -39,6 +39,7 @@ dependencies {
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
||||
implementation("ch.qos.logback:logback-classic")
|
||||
api("com.iqser.red.commons:metric-commons:2.3.0")
|
||||
|
||||
implementation("com.pdftron:PDFNet:10.11.0")
|
||||
|
||||
|
||||
@ -17,7 +17,6 @@ import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||
@ -80,7 +79,7 @@ public class OutlineDetectionTest extends AbstractTest {
|
||||
long start = System.currentTimeMillis();
|
||||
ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.DOCUMINE_OLD);
|
||||
var document = buildGraph(fileName, classificationDocument);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.DOCUMINE_OLD, "TEST_VERSION", true);
|
||||
OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree();
|
||||
assertEquals(outlineObjectTree.getRootNodes().size(), 8);
|
||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(2).size(), 1);
|
||||
|
||||
@ -11,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
@ -31,6 +30,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
|
||||
static String TEST_VERSION = "TEST_VERSION";
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void init() {
|
||||
@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
var document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
|
||||
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
|
||||
}
|
||||
|
||||
@ -79,7 +80,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
var document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE_OLD, classificationDocument);
|
||||
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -10,6 +10,7 @@ If optionalContent is false, the layer will not be created as a OCG, and will no
|
||||
*/
|
||||
public record LayerIdentifier(String name, String markedContentName) {
|
||||
|
||||
|
||||
public String markedContentName() {
|
||||
// The prefix KNECON_ is used to identify marked contents as knecon contents later on
|
||||
return KNECON_IDENTIFIER_PREFIX + markedContentName;
|
||||
@ -40,6 +41,7 @@ public record LayerIdentifier(String name, String markedContentName) {
|
||||
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_VERSION_AND_TYPE = new LayerIdentifier("Version and Type", "LAYOUT_PARSER_VERSION_AND_TYPE");
|
||||
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
|
||||
|
||||
//layout grid debug
|
||||
|
||||
@ -46,12 +46,13 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
|
||||
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
|
||||
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
|
||||
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
|
||||
protected final Visualizations versionAndType = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_VERSION_AND_TYPE).build();
|
||||
|
||||
|
||||
@Override
|
||||
public List<Visualizations> getVisualizations() {
|
||||
|
||||
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds);
|
||||
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds, versionAndType);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -54,7 +54,7 @@ public class PDFTronViewerDocumentService {
|
||||
|
||||
@SneakyThrows
|
||||
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, Outline outline) {
|
||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType, Outline outline) {
|
||||
|
||||
synchronized (PDFNet.class) { // synchronized with class, to ensure multiple instances are also synchronized
|
||||
|
||||
@ -116,7 +116,7 @@ public class PDFTronViewerDocumentService {
|
||||
|
||||
// OutlineUtility.addOutline(pdfDoc, outline);
|
||||
|
||||
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc);
|
||||
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc, layoutParserVersion, layoutParsingType);
|
||||
|
||||
saveDocument(pdfDoc, destinationFile);
|
||||
} finally {
|
||||
@ -128,9 +128,9 @@ public class PDFTronViewerDocumentService {
|
||||
|
||||
@SneakyThrows
|
||||
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups) {
|
||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType) {
|
||||
|
||||
addLayerGroups(originFile, destinationFile, layerGroups, new Outline());
|
||||
addLayerGroups(originFile, destinationFile, layerGroups, layoutParserVersion, layoutParsingType, new Outline());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.sdf.Obj;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.experimental.UtilityClass;
|
||||
@ -21,10 +22,15 @@ public class ViewerDocVersioningUtility {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void setVersionInDocument(PDFDoc pdfDoc) {
|
||||
public void setVersionInDocument(PDFDoc pdfDoc, String layoutParserVersion, String layoutParsingType) {
|
||||
|
||||
pdfDoc.getDocInfo().setAuthor(AUTHOR);
|
||||
pdfDoc.getDocInfo().setKeywords(CUSTOM_DICT + ":" + currentVersion);
|
||||
|
||||
Obj versionInfo = pdfDoc.getSDFDoc().createIndirectDict();
|
||||
versionInfo.putString("LayoutParserVersion", layoutParserVersion);
|
||||
versionInfo.putString("LayoutParsingType", layoutParsingType);
|
||||
pdfDoc.getRoot().put("KneconVersionInfo", versionInfo);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,26 +1,18 @@
|
||||
package com.knecon.fforesight.service.viewerdoc.service;
|
||||
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import javax.swing.table.AbstractTableModel;
|
||||
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
|
||||
import com.pdftron.common.Matrix2D;
|
||||
import com.pdftron.pdf.ElementBuilder;
|
||||
import com.pdftron.pdf.ElementReader;
|
||||
import com.pdftron.pdf.ElementWriter;
|
||||
import com.pdftron.pdf.PDFDoc;
|
||||
import com.pdftron.pdf.PDFNet;
|
||||
import com.pdftron.pdf.Page;
|
||||
import com.pdftron.sdf.SDFDoc;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
@ -48,7 +40,7 @@ class ViewerDocVersioningUtilityTest {
|
||||
File file = new ClassPathResource("files/empty.pdf").getFile();
|
||||
Path tmpFile = Files.createTempFile("markedDocument", ".pdf");
|
||||
try (var in = new FileInputStream(file); var doc = new PDFDoc(in); var out = new FileOutputStream(tmpFile.toFile())) {
|
||||
ViewerDocVersioningUtility.setVersionInDocument(doc);
|
||||
ViewerDocVersioningUtility.setVersionInDocument(doc, "layoutParserVersion", "layoutParsingType");
|
||||
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||
}
|
||||
assert ViewerDocVersioningUtility.isCurrentVersion(tmpFile.toFile());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user