Compare commits

..

1 Commits

Author SHA1 Message Date
Kilian Schuettler
1fb67454ec RED-9139: update document version 2024-11-15 16:35:07 +01:00
18 changed files with 78 additions and 158 deletions

View File

@ -8,7 +8,7 @@ plugins {
group = "com.knecon.fforesight"
val documentVersion by rootProject.extra { "4.433.0" }
val documentVersion by rootProject.extra { "4.432.0" }
java.sourceCompatibility = JavaVersion.VERSION_17
java.targetCompatibility = JavaVersion.VERSION_17

View File

@ -8,20 +8,13 @@ import lombok.Builder;
@Builder
@Schema(description = "Object containing information about the layout parsing.")
public record LayoutParsingFinishedEvent(
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") //
Map<String, String> identifier,
@Schema(description = "The duration of a single layout parsing in ms.") //
long duration,
@Schema(description = "The number of pages of the parsed document.") //
int numberOfPages,
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") //
String message,
@Schema(description = "The app version of the layout parser.") //
String layoutParserVersion
) {
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
Map<String, String> identifier,//
@Schema(description = "The duration of a single layout parsing in ms.")
long duration,//
@Schema(description = "The number of pages of the parsed document.")
int numberOfPages,//
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
String message) {
}

View File

@ -20,7 +20,6 @@ import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper;
@ -88,32 +87,29 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j
@Service
@RequiredArgsConstructor
@FieldDefaults(level = AccessLevel.PRIVATE)
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
public class LayoutParsingPipeline {
final ImageServiceResponseAdapter imageServiceResponseAdapter;
final CvTableParsingAdapter cvTableParsingAdapter;
final LayoutParsingStorageService layoutParsingStorageService;
final SectionsBuilderService sectionsBuilderService;
final SimplifiedSectionTextService simplifiedSectionTextService;
final RulingCleaningService rulingCleaningService;
final TableExtractionService tableExtractionService;
final DocuMineBlockificationService docuMineBlockificationService;
final RedactManagerBlockificationService redactManagerBlockificationService;
final BlockificationPostprocessingService blockificationPostprocessingService;
final DocstrumBlockificationService docstrumBlockificationService;
final LayoutGridService layoutGridService;
final ObservationRegistry observationRegistry;
final VisualLayoutParsingAdapter visualLayoutParsingAdapter;
final GraphicExtractorService graphicExtractorService;
final OutlineExtractorService outlineExtractorService;
final SectionTreeBuilderService sectionTreeBuilderService;
final SectionTreeEnhancementService sectionTreeEnhancementService;
final LayoutParserSettings settings;
final ClassificationService classificationService;
@Value("${LAYOUT_PARSER_VERSION:}")
private String layoutParserVersion;
ImageServiceResponseAdapter imageServiceResponseAdapter;
CvTableParsingAdapter cvTableParsingAdapter;
LayoutParsingStorageService layoutParsingStorageService;
SectionsBuilderService sectionsBuilderService;
SimplifiedSectionTextService simplifiedSectionTextService;
RulingCleaningService rulingCleaningService;
TableExtractionService tableExtractionService;
DocuMineBlockificationService docuMineBlockificationService;
RedactManagerBlockificationService redactManagerBlockificationService;
BlockificationPostprocessingService blockificationPostprocessingService;
DocstrumBlockificationService docstrumBlockificationService;
LayoutGridService layoutGridService;
ObservationRegistry observationRegistry;
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
GraphicExtractorService graphicExtractorService;
OutlineExtractorService outlineExtractorService;
SectionTreeBuilderService sectionTreeBuilderService;
SectionTreeEnhancementService sectionTreeEnhancementService;
LayoutparserSettings settings;
ClassificationService classificationService;
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
@ -122,23 +118,17 @@ public class LayoutParsingPipeline {
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
.orElse(originFile);
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
.map(layoutParsingStorageService::getVisualLayoutParsingFile)
.orElse(new VisualLayoutParsingResponse());
.map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse());
ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
.map(layoutParsingStorageService::getImagesFile)
.orElse(new ImageServiceResponse());
.map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse());
TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
.map(layoutParsingStorageService::getTablesFile)
.orElse(new TableServiceResponse());
.map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse());
LayoutParsingType layoutParsingType = settings.getLayoutParsingTypeOverride() == null //
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride();
ClassificationDocument classificationDocument = parseLayout(layoutParsingType,
ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
originFile,
imageServiceResponse,
tableServiceResponse,
@ -147,19 +137,18 @@ public class LayoutParsingPipeline {
log.info("Building document graph for {}", layoutParsingRequest.identifier());
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(layoutParsingType, classificationDocument);
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, layoutParsingType, layoutParserVersion, false);
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, false);
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document()));
if (layoutParsingRequest.documentMarkdownFileStorageId()
.isPresent()) {
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId()
.get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
if (layoutParsingRequest.documentMarkdownFileStorageId().isPresent()) {
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId().get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
}
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document()));
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
@ -199,7 +188,6 @@ public class LayoutParsingPipeline {
layoutParsingRequest.pageFileStorageId(),
layoutParsingRequest.simplifiedTextStorageId(),
layoutParsingRequest.viewerDocumentStorageId()))
.layoutParserVersion(layoutParserVersion)
.build();
}
@ -397,8 +385,7 @@ public class LayoutParsingPipeline {
.flatMap(Collection::stream)
.map(Character::getTextPosition)
.filter(pos -> pos.getDir().equals(dir))
.mapToDouble(RedTextPosition::getExactDir).average()
.orElse(0);
.mapToDouble(RedTextPosition::getExactDir).average().orElse(0);
if (averageRotation == 0) {
continue;

View File

@ -13,8 +13,9 @@ import lombok.experimental.FieldDefaults;
@Configuration
@ConfigurationProperties("layoutparser")
@FieldDefaults(level = AccessLevel.PRIVATE)
public class LayoutParserSettings {
public class LayoutparserSettings {
boolean debug;
LayoutParsingType layoutParsingTypeOverride;
String pdftronLicense;
}

View File

@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
public class Line extends TextBoundingBox {
private static final double WORD_DISTANCE_MULTIPLIER = 0.17;
private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
@EqualsAndHashCode.Include
private final double x0;
@ -157,9 +157,6 @@ public class Line extends TextBoundingBox {
private void computeWords(List<Character> characters, double wordSpacing) {
// Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
// If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
// I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
Word word = new Word();
Character previous = null;
for (Character current : characters) {

View File

@ -18,14 +18,10 @@ public class ClassificationPatterns {
public static final Pattern TABLE_OR_FIGURE_HEADLINE_PATTERN = Pattern.compile(
public static final Pattern TABLE_OR_FIGURE_PATTERN = Pattern.compile(
"^\\s*(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
Pattern.CASE_INSENSITIVE);
public static final Pattern TABLE_MID_SENTENCE_PATTERN = Pattern.compile(
"(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
Pattern.CASE_INSENSITIVE);
public static final Pattern ALPHANUMERIC = Pattern.compile("[a-zA-Z0-9]");
public static final Pattern NUMERIC = Pattern.compile("[0-9]+");

View File

@ -6,8 +6,7 @@ import static com.knecon.fforesight.service.layoutparser.processor.services.clas
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_PATTERN_WITH_SLASHES;
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_2_IDENTIFER_PATTERN;
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN;
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_MID_SENTENCE_PATTERN;
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_OR_FIGURE_HEADLINE_PATTERN;
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_OR_FIGURE_PATTERN;
import java.util.ArrayList;
import java.util.Comparator;
@ -84,8 +83,7 @@ public class DocuMineClassificationService {
Matcher atLeast3Matcher = AT_LEAST_3_CHARS_PATTERN.matcher(textBlock.toString());
Matcher headlineWithSlashesMatcher = HEADLINE_PATTERN_WITH_SLASHES.matcher(textBlock.toString());
Matcher amountMatcher = AMOUNT_PATTERN.matcher(textBlock.toString());
Matcher tableOrFigureMatcher = TABLE_OR_FIGURE_HEADLINE_PATTERN.matcher(textBlock.toString());
Matcher tableMidSentenceMatcher = TABLE_MID_SENTENCE_PATTERN.matcher(textBlock.toString());
Matcher tableOrFigureMatcher = TABLE_OR_FIGURE_PATTERN.matcher(textBlock.toString());
Matcher headlineWithSingleIdentifierMatcher = HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN.matcher(textBlock.toString());
boolean isAtLeast3Characters = atLeast3Matcher.reset().find();
boolean headlineWithSlashesMatches = headlineWithSlashesMatcher.reset().matches();
@ -150,8 +148,6 @@ public class DocuMineClassificationService {
&& greaterOrEqualFontThanPageAverage(textBlock, page)//
&& PositionUtils.getApproxLineCount(textBlock) < 2.9//
&& (tableOrFigureMatcher.reset().find() || (headlineWithSingleIdentifierMatcher.reset().find() && listIdentifiers.isEmpty())) //
&& tableMidSentenceMatcher.reset().results()
.count() <= 1 //
&& !isAmount//
&& !headlineWithSlashesMatches) {

View File

@ -71,9 +71,6 @@ public class TableOfContentsClassificationService {
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
if (start >= textBlocks.size()) {
return start;
}
ClassificationPage startPage = textBlocks.get(start).page();
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();

View File

@ -10,7 +10,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.OutlineMapper;
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid;
@ -33,15 +32,9 @@ public class LayoutGridService {
@SneakyThrows
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
public void addLayoutGrid(File originFile,
DocumentWithVisualization document,
File destinationFile,
LayoutParsingType layoutParsingType,
String layoutParserVersion,
boolean layerVisibilityDefaultValue) {
public void addLayoutGrid(File originFile, DocumentWithVisualization document, File destinationFile, boolean layerVisibilityDefaultValue) {
String layoutParsingTypeName = layoutParsingType.name();
LayoutGrid layoutGrid = createLayoutGrid(document.document(), layoutParserVersion, layoutParsingTypeName);
LayoutGrid layoutGrid = createLayoutGrid(document.document());
Outline outline = OutlineMapper.createOutline(document.document());
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
@ -49,23 +42,16 @@ public class LayoutGridService {
document.layoutDebugLayer().addOutlineHeadlines(document.document());
if (document.layoutDebugLayer().isActive()) {
viewerDocumentService.addLayerGroups(originFile,
destinationFile,
List.of(layoutGrid, document.layoutDebugLayer()),
layoutParserVersion,
layoutParsingTypeName,
outline);
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.layoutDebugLayer()), outline);
} else {
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), layoutParserVersion, layoutParsingTypeName, outline);
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), outline);
}
}
private LayoutGrid createLayoutGrid(Document document, String layoutParserVersion, String layoutParsingType) {
private LayoutGrid createLayoutGrid(Document document) {
LayoutGrid layoutGrid = new LayoutGrid();
layoutGrid.addVersionAndLayoutParsingType(layoutParserVersion, layoutParsingType, document.getFirstPage());
document.streamAllSubNodes()
.peek(layoutGrid::addTreeId)
.forEach(semanticNode -> {

View File

@ -1,7 +1,6 @@
package com.knecon.fforesight.service.layoutparser.processor.visualization;
import java.awt.Color;
import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
@ -26,9 +25,6 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms;
import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
@ -93,19 +89,10 @@ public class LayoutGrid extends LayoutGridLayerConfig {
public void addTreeId(SemanticNode semanticNode) {
Page page = semanticNode.getFirstPage();
if (semanticNode.getBBox()
.get(page) == null) {
if (semanticNode.getBBox().get(page) == null) {
return;
}
addPlacedText(page,
semanticNode.getBBox()
.get(page),
semanticNode.getBBox()
.get(page),
buildTreeIdString(semanticNode),
1,
treeIds,
TREEID_COLOR);
addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR);
}
@ -134,8 +121,7 @@ public class LayoutGrid extends LayoutGridLayerConfig {
.toList();
Integer maxChildDepth = subSections.stream()
.map(node -> node.getTreeId().size())
.max(Integer::compareTo)
.orElse(section.getTreeId().size());
.max(Integer::compareTo).orElse(section.getTreeId().size());
int ownDepth = section.getTreeId().size();
Page firstPage = section.getFirstPage();
@ -321,8 +307,7 @@ public class LayoutGrid extends LayoutGridLayerConfig {
Visualizations visualizations = semanticNode.getType().equals(NodeType.TABLE_OF_CONTENTS) ? toc : sections;
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
int lineWidthModifier = maxChildDepth - ownDepth;
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
SemanticNode highestParent = semanticNode.getHighestParent();
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
@ -371,8 +356,7 @@ public class LayoutGrid extends LayoutGridLayerConfig {
List<Double> ys = yStream.collect(Collectors.toList());
ys.remove(0);
Rectangle2D tableBBox = table.getBBox()
.get(page);
Rectangle2D tableBBox = table.getBBox().get(page);
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
xs.forEach(x -> {
@ -414,21 +398,6 @@ public class LayoutGrid extends LayoutGridLayerConfig {
}
public void addVersionAndLayoutParsingType(String version, String layoutParsingType, Page page) {
PageInformation pageInformation = PageInformation.fromPage(page);
double startHeight = pageInformation.heightRot() - 5;
Point2D point1 = new Point2D.Double(0, startHeight);
Point2D point2 = new Point2D.Double(0, startHeight - FONT_SIZE * 1.5);
AffineTransform affineTransform = CoordinateTransforms.calculatePageCoordsToInitialUserSpaceCoords(pageInformation);
affineTransform.transform(point1, point1);
affineTransform.transform(point2, point2);
getOrCreateVisualizationsOnPage(page.getNumber(), this.versionAndType).getPlacedTexts()
.addAll(List.of(PlacedText.textFacingUp(String.valueOf(version), point1, FONT_SIZE, Color.BLACK, FONT),
PlacedText.textFacingUp(String.valueOf(layoutParsingType), point2, FONT_SIZE, Color.BLACK, FONT)));
}
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
}

View File

@ -39,7 +39,6 @@ dependencies {
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
implementation("ch.qos.logback:logback-classic")
api("com.iqser.red.commons:metric-commons:2.3.0")
implementation("com.pdftron:PDFNet:10.11.0")

View File

@ -17,6 +17,7 @@ import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
@ -79,7 +80,7 @@ public class OutlineDetectionTest extends AbstractTest {
long start = System.currentTimeMillis();
ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.DOCUMINE_OLD);
var document = buildGraph(fileName, classificationDocument);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.DOCUMINE_OLD, "TEST_VERSION", true);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree();
assertEquals(outlineObjectTree.getRootNodes().size(), 8);
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(2).size(), 1);

View File

@ -11,6 +11,7 @@ import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import com.iqser.red.commons.jackson.ObjectMapperFactory;
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
@ -30,8 +31,6 @@ public class ViewerDocumentTest extends BuildDocumentTest {
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
static String TEST_VERSION = "TEST_VERSION";
@BeforeEach
public void init() {
@ -52,7 +51,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
long start = System.currentTimeMillis();
var document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
}
@ -80,7 +79,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
var document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE_OLD, classificationDocument);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
}
}

View File

@ -10,7 +10,6 @@ If optionalContent is false, the layer will not be created as a OCG, and will no
*/
public record LayerIdentifier(String name, String markedContentName) {
public String markedContentName() {
// The prefix KNECON_ is used to identify marked contents as knecon contents later on
return KNECON_IDENTIFIER_PREFIX + markedContentName;
@ -41,7 +40,6 @@ public record LayerIdentifier(String name, String markedContentName) {
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
public static final LayerIdentifier KNECON_LAYOUT_VERSION_AND_TYPE = new LayerIdentifier("Version and Type", "LAYOUT_PARSER_VERSION_AND_TYPE");
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
//layout grid debug

View File

@ -46,13 +46,12 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
protected final Visualizations versionAndType = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_VERSION_AND_TYPE).build();
@Override
public List<Visualizations> getVisualizations() {
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds, versionAndType);
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds);
}
}

View File

@ -54,7 +54,7 @@ public class PDFTronViewerDocumentService {
@SneakyThrows
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType, Outline outline) {
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, Outline outline) {
synchronized (PDFNet.class) { // synchronized with class, to ensure multiple instances are also synchronized
@ -116,7 +116,7 @@ public class PDFTronViewerDocumentService {
// OutlineUtility.addOutline(pdfDoc, outline);
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc, layoutParserVersion, layoutParsingType);
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc);
saveDocument(pdfDoc, destinationFile);
} finally {
@ -128,9 +128,9 @@ public class PDFTronViewerDocumentService {
@SneakyThrows
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType) {
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups) {
addLayerGroups(originFile, destinationFile, layerGroups, layoutParserVersion, layoutParsingType, new Outline());
addLayerGroups(originFile, destinationFile, layerGroups, new Outline());
}

View File

@ -8,7 +8,6 @@ import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.sdf.Obj;
import lombok.SneakyThrows;
import lombok.experimental.UtilityClass;
@ -22,15 +21,10 @@ public class ViewerDocVersioningUtility {
@SneakyThrows
public void setVersionInDocument(PDFDoc pdfDoc, String layoutParserVersion, String layoutParsingType) {
public void setVersionInDocument(PDFDoc pdfDoc) {
pdfDoc.getDocInfo().setAuthor(AUTHOR);
pdfDoc.getDocInfo().setKeywords(CUSTOM_DICT + ":" + currentVersion);
Obj versionInfo = pdfDoc.getSDFDoc().createIndirectDict();
versionInfo.putString("LayoutParserVersion", layoutParserVersion);
versionInfo.putString("LayoutParsingType", layoutParsingType);
pdfDoc.getRoot().put("KneconVersionInfo", versionInfo);
}

View File

@ -1,18 +1,26 @@
package com.knecon.fforesight.service.viewerdoc.service;
import java.awt.geom.AffineTransform;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import javax.swing.table.AbstractTableModel;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.core.io.ClassPathResource;
import com.pdftron.common.Matrix2D;
import com.pdftron.pdf.ElementBuilder;
import com.pdftron.pdf.ElementReader;
import com.pdftron.pdf.ElementWriter;
import com.pdftron.pdf.PDFDoc;
import com.pdftron.pdf.PDFNet;
import com.pdftron.pdf.Page;
import com.pdftron.sdf.SDFDoc;
import lombok.SneakyThrows;
@ -40,7 +48,7 @@ class ViewerDocVersioningUtilityTest {
File file = new ClassPathResource("files/empty.pdf").getFile();
Path tmpFile = Files.createTempFile("markedDocument", ".pdf");
try (var in = new FileInputStream(file); var doc = new PDFDoc(in); var out = new FileOutputStream(tmpFile.toFile())) {
ViewerDocVersioningUtility.setVersionInDocument(doc, "layoutParserVersion", "layoutParsingType");
ViewerDocVersioningUtility.setVersionInDocument(doc);
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
}
assert ViewerDocVersioningUtility.isCurrentVersion(tmpFile.toFile());