Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef23ee0ade | ||
|
|
af31f52b47 | ||
|
|
b5152112ee | ||
|
|
85ea4ef455 | ||
|
|
01f8c01fff | ||
|
|
0b6a292c75 | ||
|
|
e24020589c | ||
|
|
c619b845e8 | ||
|
|
ed0371ca11 | ||
|
|
89b5be8d67 | ||
|
|
077ce60c9d |
@ -8,7 +8,7 @@ plugins {
|
|||||||
|
|
||||||
group = "com.knecon.fforesight"
|
group = "com.knecon.fforesight"
|
||||||
|
|
||||||
val documentVersion by rootProject.extra { "4.425.0-RED9139.13-RED9139.0-RED9139.0" }
|
val documentVersion by rootProject.extra { "4.433.0" }
|
||||||
|
|
||||||
java.sourceCompatibility = JavaVersion.VERSION_17
|
java.sourceCompatibility = JavaVersion.VERSION_17
|
||||||
java.targetCompatibility = JavaVersion.VERSION_17
|
java.targetCompatibility = JavaVersion.VERSION_17
|
||||||
|
|||||||
@ -8,13 +8,20 @@ import lombok.Builder;
|
|||||||
@Builder
|
@Builder
|
||||||
@Schema(description = "Object containing information about the layout parsing.")
|
@Schema(description = "Object containing information about the layout parsing.")
|
||||||
public record LayoutParsingFinishedEvent(
|
public record LayoutParsingFinishedEvent(
|
||||||
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.")
|
@Schema(description = "General purpose identifier. It is returned exactly the same way it is inserted with the LayoutParsingRequest.") //
|
||||||
Map<String, String> identifier,//
|
Map<String, String> identifier,
|
||||||
@Schema(description = "The duration of a single layout parsing in ms.")
|
|
||||||
long duration,//
|
@Schema(description = "The duration of a single layout parsing in ms.") //
|
||||||
@Schema(description = "The number of pages of the parsed document.")
|
long duration,
|
||||||
int numberOfPages,//
|
|
||||||
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.")
|
@Schema(description = "The number of pages of the parsed document.") //
|
||||||
String message) {
|
int numberOfPages,
|
||||||
|
|
||||||
|
@Schema(description = "A general message. It contains some information useful for a developer, like the paths where the files are stored. Not meant to be machine readable.") //
|
||||||
|
String message,
|
||||||
|
|
||||||
|
@Schema(description = "The app version of the layout parser.") //
|
||||||
|
String layoutParserVersion
|
||||||
|
) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,9 +13,8 @@ import lombok.experimental.FieldDefaults;
|
|||||||
@Configuration
|
@Configuration
|
||||||
@ConfigurationProperties("layoutparser")
|
@ConfigurationProperties("layoutparser")
|
||||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class LayoutparserSettings {
|
public class LayoutParserSettings {
|
||||||
|
|
||||||
boolean debug;
|
boolean debug;
|
||||||
LayoutParsingType layoutParsingTypeOverride;
|
LayoutParsingType layoutParsingTypeOverride;
|
||||||
String pdftronLicense;
|
|
||||||
}
|
}
|
||||||
@ -20,6 +20,7 @@ import org.apache.pdfbox.pdmodel.PDDocument;
|
|||||||
import org.apache.pdfbox.pdmodel.PDPage;
|
import org.apache.pdfbox.pdmodel.PDPage;
|
||||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||||
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper;
|
import com.iqser.red.service.redaction.v1.server.mapper.DocumentDataMapper;
|
||||||
@ -87,29 +88,32 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||||
public class LayoutParsingPipeline {
|
public class LayoutParsingPipeline {
|
||||||
|
|
||||||
ImageServiceResponseAdapter imageServiceResponseAdapter;
|
final ImageServiceResponseAdapter imageServiceResponseAdapter;
|
||||||
CvTableParsingAdapter cvTableParsingAdapter;
|
final CvTableParsingAdapter cvTableParsingAdapter;
|
||||||
LayoutParsingStorageService layoutParsingStorageService;
|
final LayoutParsingStorageService layoutParsingStorageService;
|
||||||
SectionsBuilderService sectionsBuilderService;
|
final SectionsBuilderService sectionsBuilderService;
|
||||||
SimplifiedSectionTextService simplifiedSectionTextService;
|
final SimplifiedSectionTextService simplifiedSectionTextService;
|
||||||
RulingCleaningService rulingCleaningService;
|
final RulingCleaningService rulingCleaningService;
|
||||||
TableExtractionService tableExtractionService;
|
final TableExtractionService tableExtractionService;
|
||||||
DocuMineBlockificationService docuMineBlockificationService;
|
final DocuMineBlockificationService docuMineBlockificationService;
|
||||||
RedactManagerBlockificationService redactManagerBlockificationService;
|
final RedactManagerBlockificationService redactManagerBlockificationService;
|
||||||
BlockificationPostprocessingService blockificationPostprocessingService;
|
final BlockificationPostprocessingService blockificationPostprocessingService;
|
||||||
DocstrumBlockificationService docstrumBlockificationService;
|
final DocstrumBlockificationService docstrumBlockificationService;
|
||||||
LayoutGridService layoutGridService;
|
final LayoutGridService layoutGridService;
|
||||||
ObservationRegistry observationRegistry;
|
final ObservationRegistry observationRegistry;
|
||||||
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
final VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||||
GraphicExtractorService graphicExtractorService;
|
final GraphicExtractorService graphicExtractorService;
|
||||||
OutlineExtractorService outlineExtractorService;
|
final OutlineExtractorService outlineExtractorService;
|
||||||
SectionTreeBuilderService sectionTreeBuilderService;
|
final SectionTreeBuilderService sectionTreeBuilderService;
|
||||||
SectionTreeEnhancementService sectionTreeEnhancementService;
|
final SectionTreeEnhancementService sectionTreeEnhancementService;
|
||||||
LayoutparserSettings settings;
|
final LayoutParserSettings settings;
|
||||||
ClassificationService classificationService;
|
final ClassificationService classificationService;
|
||||||
|
|
||||||
|
@Value("${LAYOUT_PARSER_VERSION:}")
|
||||||
|
private String layoutParserVersion;
|
||||||
|
|
||||||
|
|
||||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||||
@ -118,17 +122,23 @@ public class LayoutParsingPipeline {
|
|||||||
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
log.info("Starting layout parsing for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId())
|
||||||
|
.orElse(originFile);
|
||||||
|
|
||||||
VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
|
VisualLayoutParsingResponse visualLayoutParsingResponse = layoutParsingRequest.visualLayoutParsingFileId()
|
||||||
.map(layoutParsingStorageService::getVisualLayoutParsingFile).orElse(new VisualLayoutParsingResponse());
|
.map(layoutParsingStorageService::getVisualLayoutParsingFile)
|
||||||
|
.orElse(new VisualLayoutParsingResponse());
|
||||||
ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
|
ImageServiceResponse imageServiceResponse = layoutParsingRequest.imagesFileStorageId()
|
||||||
.map(layoutParsingStorageService::getImagesFile).orElse(new ImageServiceResponse());
|
.map(layoutParsingStorageService::getImagesFile)
|
||||||
|
.orElse(new ImageServiceResponse());
|
||||||
TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
|
TableServiceResponse tableServiceResponse = layoutParsingRequest.tablesFileStorageId()
|
||||||
.map(layoutParsingStorageService::getTablesFile).orElse(new TableServiceResponse());
|
.map(layoutParsingStorageService::getTablesFile)
|
||||||
|
.orElse(new TableServiceResponse());
|
||||||
|
|
||||||
ClassificationDocument classificationDocument = parseLayout(settings.getLayoutParsingTypeOverride() == null //
|
LayoutParsingType layoutParsingType = settings.getLayoutParsingTypeOverride() == null //
|
||||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(),
|
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride();
|
||||||
|
|
||||||
|
ClassificationDocument classificationDocument = parseLayout(layoutParsingType,
|
||||||
originFile,
|
originFile,
|
||||||
imageServiceResponse,
|
imageServiceResponse,
|
||||||
tableServiceResponse,
|
tableServiceResponse,
|
||||||
@ -137,18 +147,19 @@ public class LayoutParsingPipeline {
|
|||||||
|
|
||||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(settings.getLayoutParsingTypeOverride() == null //
|
DocumentWithVisualization documentWithVisualization = observeBuildDocumentGraph(layoutParsingType, classificationDocument);
|
||||||
? layoutParsingRequest.layoutParsingType() : settings.getLayoutParsingTypeOverride(), classificationDocument);
|
|
||||||
|
|
||||||
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
log.info("Creating viewer document for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, false);
|
layoutGridService.addLayoutGrid(viewerDocumentFile, documentWithVisualization, viewerDocumentFile, layoutParsingType, layoutParserVersion, false);
|
||||||
|
|
||||||
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
log.info("Storing resulting files for {}", layoutParsingRequest.identifier());
|
||||||
|
|
||||||
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document()));
|
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentWithVisualization.document()));
|
||||||
if (layoutParsingRequest.documentMarkdownFileStorageId().isPresent()) {
|
if (layoutParsingRequest.documentMarkdownFileStorageId()
|
||||||
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId().get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
|
.isPresent()) {
|
||||||
|
layoutParsingStorageService.storeMarkdownFile(layoutParsingRequest.documentMarkdownFileStorageId()
|
||||||
|
.get(), new MarkdownMapper().toMarkdownContent(documentWithVisualization.document()));
|
||||||
}
|
}
|
||||||
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document()));
|
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentWithVisualization.document()));
|
||||||
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
|
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
|
||||||
@ -188,6 +199,7 @@ public class LayoutParsingPipeline {
|
|||||||
layoutParsingRequest.pageFileStorageId(),
|
layoutParsingRequest.pageFileStorageId(),
|
||||||
layoutParsingRequest.simplifiedTextStorageId(),
|
layoutParsingRequest.simplifiedTextStorageId(),
|
||||||
layoutParsingRequest.viewerDocumentStorageId()))
|
layoutParsingRequest.viewerDocumentStorageId()))
|
||||||
|
.layoutParserVersion(layoutParserVersion)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -385,7 +397,8 @@ public class LayoutParsingPipeline {
|
|||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
.map(Character::getTextPosition)
|
.map(Character::getTextPosition)
|
||||||
.filter(pos -> pos.getDir().equals(dir))
|
.filter(pos -> pos.getDir().equals(dir))
|
||||||
.mapToDouble(RedTextPosition::getExactDir).average().orElse(0);
|
.mapToDouble(RedTextPosition::getExactDir).average()
|
||||||
|
.orElse(0);
|
||||||
|
|
||||||
if (averageRotation == 0) {
|
if (averageRotation == 0) {
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@ -24,7 +24,7 @@ import lombok.EqualsAndHashCode;
|
|||||||
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
@EqualsAndHashCode(onlyExplicitlyIncluded = true, callSuper = false)
|
||||||
public class Line extends TextBoundingBox {
|
public class Line extends TextBoundingBox {
|
||||||
|
|
||||||
private static final double WORD_DISTANCE_MULTIPLIER = 0.18;
|
private static final double WORD_DISTANCE_MULTIPLIER = 0.17;
|
||||||
|
|
||||||
@EqualsAndHashCode.Include
|
@EqualsAndHashCode.Include
|
||||||
private final double x0;
|
private final double x0;
|
||||||
@ -157,6 +157,9 @@ public class Line extends TextBoundingBox {
|
|||||||
|
|
||||||
private void computeWords(List<Character> characters, double wordSpacing) {
|
private void computeWords(List<Character> characters, double wordSpacing) {
|
||||||
|
|
||||||
|
// Imo, the width of space should be scaled with the font size, but it only depends on the median distance between horizontal neighbours.
|
||||||
|
// If there are large differences in fontsize on a page, this might lead to missing spaces for the smaller fonts and too many for larger fonts.
|
||||||
|
// I've just now changed the scaling factor. If you come across this comment with missing whitespaces again, try scaling the fontsize instead of simply changing the factor again.
|
||||||
Word word = new Word();
|
Word word = new Word();
|
||||||
Character previous = null;
|
Character previous = null;
|
||||||
for (Character current : characters) {
|
for (Character current : characters) {
|
||||||
|
|||||||
@ -18,10 +18,14 @@ public class ClassificationPatterns {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static final Pattern TABLE_OR_FIGURE_PATTERN = Pattern.compile(
|
public static final Pattern TABLE_OR_FIGURE_HEADLINE_PATTERN = Pattern.compile(
|
||||||
"^\\s*(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
|
"^\\s*(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
|
||||||
Pattern.CASE_INSENSITIVE);
|
Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
|
public static final Pattern TABLE_MID_SENTENCE_PATTERN = Pattern.compile(
|
||||||
|
"(?:table|continued\\s+table|appendix|figure)\\s+(?:[xvi]+|[a-z0-9]{1,3}(?:\\.[0-9]{1,3})*(?:-[0-9]{1,3})?)\\b",
|
||||||
|
Pattern.CASE_INSENSITIVE);
|
||||||
|
|
||||||
public static final Pattern ALPHANUMERIC = Pattern.compile("[a-zA-Z0-9]");
|
public static final Pattern ALPHANUMERIC = Pattern.compile("[a-zA-Z0-9]");
|
||||||
|
|
||||||
public static final Pattern NUMERIC = Pattern.compile("[0-9]+");
|
public static final Pattern NUMERIC = Pattern.compile("[0-9]+");
|
||||||
|
|||||||
@ -6,7 +6,8 @@ import static com.knecon.fforesight.service.layoutparser.processor.services.clas
|
|||||||
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_PATTERN_WITH_SLASHES;
|
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_PATTERN_WITH_SLASHES;
|
||||||
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_2_IDENTIFER_PATTERN;
|
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_2_IDENTIFER_PATTERN;
|
||||||
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN;
|
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN;
|
||||||
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_OR_FIGURE_PATTERN;
|
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_MID_SENTENCE_PATTERN;
|
||||||
|
import static com.knecon.fforesight.service.layoutparser.processor.services.classification.ClassificationPatterns.TABLE_OR_FIGURE_HEADLINE_PATTERN;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
@ -83,7 +84,8 @@ public class DocuMineClassificationService {
|
|||||||
Matcher atLeast3Matcher = AT_LEAST_3_CHARS_PATTERN.matcher(textBlock.toString());
|
Matcher atLeast3Matcher = AT_LEAST_3_CHARS_PATTERN.matcher(textBlock.toString());
|
||||||
Matcher headlineWithSlashesMatcher = HEADLINE_PATTERN_WITH_SLASHES.matcher(textBlock.toString());
|
Matcher headlineWithSlashesMatcher = HEADLINE_PATTERN_WITH_SLASHES.matcher(textBlock.toString());
|
||||||
Matcher amountMatcher = AMOUNT_PATTERN.matcher(textBlock.toString());
|
Matcher amountMatcher = AMOUNT_PATTERN.matcher(textBlock.toString());
|
||||||
Matcher tableOrFigureMatcher = TABLE_OR_FIGURE_PATTERN.matcher(textBlock.toString());
|
Matcher tableOrFigureMatcher = TABLE_OR_FIGURE_HEADLINE_PATTERN.matcher(textBlock.toString());
|
||||||
|
Matcher tableMidSentenceMatcher = TABLE_MID_SENTENCE_PATTERN.matcher(textBlock.toString());
|
||||||
Matcher headlineWithSingleIdentifierMatcher = HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN.matcher(textBlock.toString());
|
Matcher headlineWithSingleIdentifierMatcher = HEADLINE_WITH_SINGLE_IDENTIFER_PATTERN.matcher(textBlock.toString());
|
||||||
boolean isAtLeast3Characters = atLeast3Matcher.reset().find();
|
boolean isAtLeast3Characters = atLeast3Matcher.reset().find();
|
||||||
boolean headlineWithSlashesMatches = headlineWithSlashesMatcher.reset().matches();
|
boolean headlineWithSlashesMatches = headlineWithSlashesMatcher.reset().matches();
|
||||||
@ -148,6 +150,8 @@ public class DocuMineClassificationService {
|
|||||||
&& greaterOrEqualFontThanPageAverage(textBlock, page)//
|
&& greaterOrEqualFontThanPageAverage(textBlock, page)//
|
||||||
&& PositionUtils.getApproxLineCount(textBlock) < 2.9//
|
&& PositionUtils.getApproxLineCount(textBlock) < 2.9//
|
||||||
&& (tableOrFigureMatcher.reset().find() || (headlineWithSingleIdentifierMatcher.reset().find() && listIdentifiers.isEmpty())) //
|
&& (tableOrFigureMatcher.reset().find() || (headlineWithSingleIdentifierMatcher.reset().find() && listIdentifiers.isEmpty())) //
|
||||||
|
&& tableMidSentenceMatcher.reset().results()
|
||||||
|
.count() <= 1 //
|
||||||
&& !isAmount//
|
&& !isAmount//
|
||||||
&& !headlineWithSlashesMatches) {
|
&& !headlineWithSlashesMatches) {
|
||||||
|
|
||||||
|
|||||||
@ -71,6 +71,9 @@ public class TableOfContentsClassificationService {
|
|||||||
|
|
||||||
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
|
private int identifyTOCItems(int start, List<TextBlockOnPage> textBlocks, ClassificationDocument document) {
|
||||||
|
|
||||||
|
if (start >= textBlocks.size()) {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
ClassificationPage startPage = textBlocks.get(start).page();
|
ClassificationPage startPage = textBlocks.get(start).page();
|
||||||
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
|
List<TextBlockOnPage> initialLookAhead = textBlocks.subList(start, Math.min(start + SURROUNDING_BLOCKS_RADIUS, textBlocks.size()));
|
||||||
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();
|
HashMap<NumberWord, TextBlockOnPage> numberToBlockLookup = new HashMap<>();
|
||||||
|
|||||||
@ -10,6 +10,7 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.Headline;
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Image;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Paragraph;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization;
|
import com.knecon.fforesight.service.layoutparser.processor.model.DocumentWithVisualization;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.OutlineMapper;
|
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.OutlineMapper;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid;
|
import com.knecon.fforesight.service.layoutparser.processor.visualization.LayoutGrid;
|
||||||
@ -32,9 +33,15 @@ public class LayoutGridService {
|
|||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
@Observed(name = "ViewerDocumentService", contextualName = "create-viewer-document")
|
||||||
public void addLayoutGrid(File originFile, DocumentWithVisualization document, File destinationFile, boolean layerVisibilityDefaultValue) {
|
public void addLayoutGrid(File originFile,
|
||||||
|
DocumentWithVisualization document,
|
||||||
|
File destinationFile,
|
||||||
|
LayoutParsingType layoutParsingType,
|
||||||
|
String layoutParserVersion,
|
||||||
|
boolean layerVisibilityDefaultValue) {
|
||||||
|
|
||||||
LayoutGrid layoutGrid = createLayoutGrid(document.document());
|
String layoutParsingTypeName = layoutParsingType.name();
|
||||||
|
LayoutGrid layoutGrid = createLayoutGrid(document.document(), layoutParserVersion, layoutParsingTypeName);
|
||||||
Outline outline = OutlineMapper.createOutline(document.document());
|
Outline outline = OutlineMapper.createOutline(document.document());
|
||||||
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
|
layoutGrid.setVisibleByDefault(layerVisibilityDefaultValue);
|
||||||
|
|
||||||
@ -42,16 +49,23 @@ public class LayoutGridService {
|
|||||||
document.layoutDebugLayer().addOutlineHeadlines(document.document());
|
document.layoutDebugLayer().addOutlineHeadlines(document.document());
|
||||||
|
|
||||||
if (document.layoutDebugLayer().isActive()) {
|
if (document.layoutDebugLayer().isActive()) {
|
||||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid, document.layoutDebugLayer()), outline);
|
viewerDocumentService.addLayerGroups(originFile,
|
||||||
|
destinationFile,
|
||||||
|
List.of(layoutGrid, document.layoutDebugLayer()),
|
||||||
|
layoutParserVersion,
|
||||||
|
layoutParsingTypeName,
|
||||||
|
outline);
|
||||||
} else {
|
} else {
|
||||||
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), outline);
|
viewerDocumentService.addLayerGroups(originFile, destinationFile, List.of(layoutGrid), layoutParserVersion, layoutParsingTypeName, outline);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private LayoutGrid createLayoutGrid(Document document) {
|
private LayoutGrid createLayoutGrid(Document document, String layoutParserVersion, String layoutParsingType) {
|
||||||
|
|
||||||
LayoutGrid layoutGrid = new LayoutGrid();
|
LayoutGrid layoutGrid = new LayoutGrid();
|
||||||
|
layoutGrid.addVersionAndLayoutParsingType(layoutParserVersion, layoutParsingType, document.getFirstPage());
|
||||||
|
|
||||||
document.streamAllSubNodes()
|
document.streamAllSubNodes()
|
||||||
.peek(layoutGrid::addTreeId)
|
.peek(layoutGrid::addTreeId)
|
||||||
.forEach(semanticNode -> {
|
.forEach(semanticNode -> {
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package com.knecon.fforesight.service.layoutparser.processor.visualization;
|
package com.knecon.fforesight.service.layoutparser.processor.visualization;
|
||||||
|
|
||||||
import java.awt.Color;
|
import java.awt.Color;
|
||||||
|
import java.awt.geom.AffineTransform;
|
||||||
import java.awt.geom.Line2D;
|
import java.awt.geom.Line2D;
|
||||||
import java.awt.geom.Point2D;
|
import java.awt.geom.Point2D;
|
||||||
import java.awt.geom.Rectangle2D;
|
import java.awt.geom.Rectangle2D;
|
||||||
@ -25,6 +26,9 @@ import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNo
|
|||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Table;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.TableCell;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.utils.CoordinateTransforms;
|
||||||
|
import com.knecon.fforesight.service.layoutparser.processor.utils.PageInformation;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
import com.knecon.fforesight.service.layoutparser.processor.utils.RectangleTransformations;
|
||||||
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
|
import com.knecon.fforesight.service.viewerdoc.layers.LayoutGridLayerConfig;
|
||||||
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
|
import com.knecon.fforesight.service.viewerdoc.model.ColoredLine;
|
||||||
@ -89,10 +93,19 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
|||||||
public void addTreeId(SemanticNode semanticNode) {
|
public void addTreeId(SemanticNode semanticNode) {
|
||||||
|
|
||||||
Page page = semanticNode.getFirstPage();
|
Page page = semanticNode.getFirstPage();
|
||||||
if (semanticNode.getBBox().get(page) == null) {
|
if (semanticNode.getBBox()
|
||||||
|
.get(page) == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
addPlacedText(page, semanticNode.getBBox().get(page), semanticNode.getBBox().get(page), buildTreeIdString(semanticNode), 1, treeIds, TREEID_COLOR);
|
addPlacedText(page,
|
||||||
|
semanticNode.getBBox()
|
||||||
|
.get(page),
|
||||||
|
semanticNode.getBBox()
|
||||||
|
.get(page),
|
||||||
|
buildTreeIdString(semanticNode),
|
||||||
|
1,
|
||||||
|
treeIds,
|
||||||
|
TREEID_COLOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -121,7 +134,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
|||||||
.toList();
|
.toList();
|
||||||
Integer maxChildDepth = subSections.stream()
|
Integer maxChildDepth = subSections.stream()
|
||||||
.map(node -> node.getTreeId().size())
|
.map(node -> node.getTreeId().size())
|
||||||
.max(Integer::compareTo).orElse(section.getTreeId().size());
|
.max(Integer::compareTo)
|
||||||
|
.orElse(section.getTreeId().size());
|
||||||
int ownDepth = section.getTreeId().size();
|
int ownDepth = section.getTreeId().size();
|
||||||
|
|
||||||
Page firstPage = section.getFirstPage();
|
Page firstPage = section.getFirstPage();
|
||||||
@ -307,7 +321,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
|||||||
Visualizations visualizations = semanticNode.getType().equals(NodeType.TABLE_OF_CONTENTS) ? toc : sections;
|
Visualizations visualizations = semanticNode.getType().equals(NodeType.TABLE_OF_CONTENTS) ? toc : sections;
|
||||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
|
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), visualizations).getColoredLines();
|
||||||
int lineWidthModifier = maxChildDepth - ownDepth;
|
int lineWidthModifier = maxChildDepth - ownDepth;
|
||||||
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox().get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
Rectangle2D r = RectangleTransformations.pad(semanticNode.getBBox()
|
||||||
|
.get(page), LINE_WIDTH * (1 + lineWidthModifier), LINE_WIDTH * (1 + lineWidthModifier));
|
||||||
|
|
||||||
SemanticNode highestParent = semanticNode.getHighestParent();
|
SemanticNode highestParent = semanticNode.getHighestParent();
|
||||||
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
|
Rectangle2D highestParentRect = rectangleMap.get(new RectangleIdentifier(highestParent.getTreeId(), page.getNumber()));
|
||||||
@ -356,7 +371,8 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
|||||||
List<Double> ys = yStream.collect(Collectors.toList());
|
List<Double> ys = yStream.collect(Collectors.toList());
|
||||||
ys.remove(0);
|
ys.remove(0);
|
||||||
|
|
||||||
Rectangle2D tableBBox = table.getBBox().get(page);
|
Rectangle2D tableBBox = table.getBBox()
|
||||||
|
.get(page);
|
||||||
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
|
List<ColoredLine> coloredLines = getOrCreateVisualizationsOnPage(page.getNumber(), tables).getColoredLines();
|
||||||
|
|
||||||
xs.forEach(x -> {
|
xs.forEach(x -> {
|
||||||
@ -398,6 +414,21 @@ public class LayoutGrid extends LayoutGridLayerConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addVersionAndLayoutParsingType(String version, String layoutParsingType, Page page) {
|
||||||
|
|
||||||
|
PageInformation pageInformation = PageInformation.fromPage(page);
|
||||||
|
double startHeight = pageInformation.heightRot() - 5;
|
||||||
|
Point2D point1 = new Point2D.Double(0, startHeight);
|
||||||
|
Point2D point2 = new Point2D.Double(0, startHeight - FONT_SIZE * 1.5);
|
||||||
|
AffineTransform affineTransform = CoordinateTransforms.calculatePageCoordsToInitialUserSpaceCoords(pageInformation);
|
||||||
|
affineTransform.transform(point1, point1);
|
||||||
|
affineTransform.transform(point2, point2);
|
||||||
|
getOrCreateVisualizationsOnPage(page.getNumber(), this.versionAndType).getPlacedTexts()
|
||||||
|
.addAll(List.of(PlacedText.textFacingUp(String.valueOf(version), point1, FONT_SIZE, Color.BLACK, FONT),
|
||||||
|
PlacedText.textFacingUp(String.valueOf(layoutParsingType), point2, FONT_SIZE, Color.BLACK, FONT)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
|
private record RectangleAndLinesResult(List<ColoredLine> coloredLines, Rectangle2D rectangle, List<Line2D> pageLines) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -39,6 +39,7 @@ dependencies {
|
|||||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.0.4")
|
||||||
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
implementation("net.logstash.logback:logstash-logback-encoder:7.4")
|
||||||
implementation("ch.qos.logback:logback-classic")
|
implementation("ch.qos.logback:logback-classic")
|
||||||
|
api("com.iqser.red.commons:metric-commons:2.3.0")
|
||||||
|
|
||||||
implementation("com.pdftron:PDFNet:10.11.0")
|
implementation("com.pdftron:PDFNet:10.11.0")
|
||||||
|
|
||||||
|
|||||||
@ -17,7 +17,6 @@ import org.junit.jupiter.api.Test;
|
|||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Section;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SemanticNode;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
import com.iqser.red.service.redaction.v1.server.model.document.nodes.SuperSection;
|
||||||
@ -80,7 +79,7 @@ public class OutlineDetectionTest extends AbstractTest {
|
|||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.DOCUMINE_OLD);
|
ClassificationDocument classificationDocument = parseLayout(fileName, LayoutParsingType.DOCUMINE_OLD);
|
||||||
var document = buildGraph(fileName, classificationDocument);
|
var document = buildGraph(fileName, classificationDocument);
|
||||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.DOCUMINE_OLD, "TEST_VERSION", true);
|
||||||
OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree();
|
OutlineObjectTree outlineObjectTree = classificationDocument.getOutlineObjectTree();
|
||||||
assertEquals(outlineObjectTree.getRootNodes().size(), 8);
|
assertEquals(outlineObjectTree.getRootNodes().size(), 8);
|
||||||
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(2).size(), 1);
|
assertEquals(outlineObjectTree.getOutlineObjectsPerPage().get(2).size(), 1);
|
||||||
|
|||||||
@ -11,7 +11,6 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||||
import com.iqser.red.service.redaction.v1.server.model.document.nodes.Document;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||||
@ -31,6 +30,8 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
PDFTronViewerDocumentService viewerDocumentService = new PDFTronViewerDocumentService(null);
|
||||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||||
|
|
||||||
|
static String TEST_VERSION = "TEST_VERSION";
|
||||||
|
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void init() {
|
public void init() {
|
||||||
@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
|
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
var document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH);
|
var document = buildGraph(fileName, LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH);
|
||||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
|
||||||
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
|
System.out.printf("Total time: %.2fs%n", ((float) (System.currentTimeMillis() - start)) / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,7 +80,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||||
var document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE_OLD, classificationDocument);
|
var document = DocumentGraphFactory.buildDocumentGraph(LayoutParsingType.DOCUMINE_OLD, classificationDocument);
|
||||||
|
|
||||||
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), true);
|
layoutGridService.addLayoutGrid(documentFile, document, new File(tmpFileName), LayoutParsingType.REDACT_MANAGER_WITHOUT_DUPLICATE_PARAGRAPH, TEST_VERSION, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -10,6 +10,7 @@ If optionalContent is false, the layer will not be created as a OCG, and will no
|
|||||||
*/
|
*/
|
||||||
public record LayerIdentifier(String name, String markedContentName) {
|
public record LayerIdentifier(String name, String markedContentName) {
|
||||||
|
|
||||||
|
|
||||||
public String markedContentName() {
|
public String markedContentName() {
|
||||||
// The prefix KNECON_ is used to identify marked contents as knecon contents later on
|
// The prefix KNECON_ is used to identify marked contents as knecon contents later on
|
||||||
return KNECON_IDENTIFIER_PREFIX + markedContentName;
|
return KNECON_IDENTIFIER_PREFIX + markedContentName;
|
||||||
@ -40,6 +41,7 @@ public record LayerIdentifier(String name, String markedContentName) {
|
|||||||
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
|
public static final LayerIdentifier KNECON_LAYOUT_FIGURES = new LayerIdentifier("Figures", "LAYOUT_FIGURES");
|
||||||
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
|
public static final LayerIdentifier KNECON_LAYOUT_IMAGES = new LayerIdentifier("Images", "LAYOUT_IMAGES");
|
||||||
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
|
public static final LayerIdentifier KNECON_LAYOUT_TREE_IDs = new LayerIdentifier("Tree IDs", "LAYOUT_TREE_IDs");
|
||||||
|
public static final LayerIdentifier KNECON_LAYOUT_VERSION_AND_TYPE = new LayerIdentifier("Version and Type", "LAYOUT_PARSER_VERSION_AND_TYPE");
|
||||||
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
|
public static final LayerIdentifier KNECON_LAYOUT_TOC = new LayerIdentifier("Table of Contents", "TABLE_OF_CONTENTS");
|
||||||
|
|
||||||
//layout grid debug
|
//layout grid debug
|
||||||
|
|||||||
@ -46,12 +46,13 @@ public class LayoutGridLayerConfig extends AbstractLayerGroup {
|
|||||||
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
|
protected final Visualizations images = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_IMAGES).build();
|
||||||
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
|
protected final Visualizations keyValue = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_KEY_VALUE).build();
|
||||||
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
|
protected final Visualizations treeIds = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_TREE_IDs).build();
|
||||||
|
protected final Visualizations versionAndType = Visualizations.builder().layer(LayerIdentifier.KNECON_LAYOUT_VERSION_AND_TYPE).build();
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Visualizations> getVisualizations() {
|
public List<Visualizations> getVisualizations() {
|
||||||
|
|
||||||
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds);
|
return List.of(headlines, paragraphs, tables, sections, headerFooter, toc, keyValue, figures, images, treeIds, versionAndType);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -54,7 +54,7 @@ public class PDFTronViewerDocumentService {
|
|||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
||||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, Outline outline) {
|
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType, Outline outline) {
|
||||||
|
|
||||||
synchronized (PDFNet.class) { // synchronized with class, to ensure multiple instances are also synchronized
|
synchronized (PDFNet.class) { // synchronized with class, to ensure multiple instances are also synchronized
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ public class PDFTronViewerDocumentService {
|
|||||||
|
|
||||||
// OutlineUtility.addOutline(pdfDoc, outline);
|
// OutlineUtility.addOutline(pdfDoc, outline);
|
||||||
|
|
||||||
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc);
|
ViewerDocVersioningUtility.setVersionInDocument(pdfDoc, layoutParserVersion, layoutParsingType);
|
||||||
|
|
||||||
saveDocument(pdfDoc, destinationFile);
|
saveDocument(pdfDoc, destinationFile);
|
||||||
} finally {
|
} finally {
|
||||||
@ -128,9 +128,9 @@ public class PDFTronViewerDocumentService {
|
|||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
@Observed(name = "PDFTronViewerDocumentService", contextualName = "add-visualizations")
|
||||||
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups) {
|
public void addLayerGroups(File originFile, File destinationFile, List<LayerGroup> layerGroups, String layoutParserVersion, String layoutParsingType) {
|
||||||
|
|
||||||
addLayerGroups(originFile, destinationFile, layerGroups, new Outline());
|
addLayerGroups(originFile, destinationFile, layerGroups, layoutParserVersion, layoutParsingType, new Outline());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import org.apache.pdfbox.Loader;
|
|||||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||||
|
|
||||||
import com.pdftron.pdf.PDFDoc;
|
import com.pdftron.pdf.PDFDoc;
|
||||||
|
import com.pdftron.sdf.Obj;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import lombok.experimental.UtilityClass;
|
import lombok.experimental.UtilityClass;
|
||||||
@ -21,10 +22,15 @@ public class ViewerDocVersioningUtility {
|
|||||||
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void setVersionInDocument(PDFDoc pdfDoc) {
|
public void setVersionInDocument(PDFDoc pdfDoc, String layoutParserVersion, String layoutParsingType) {
|
||||||
|
|
||||||
pdfDoc.getDocInfo().setAuthor(AUTHOR);
|
pdfDoc.getDocInfo().setAuthor(AUTHOR);
|
||||||
pdfDoc.getDocInfo().setKeywords(CUSTOM_DICT + ":" + currentVersion);
|
pdfDoc.getDocInfo().setKeywords(CUSTOM_DICT + ":" + currentVersion);
|
||||||
|
|
||||||
|
Obj versionInfo = pdfDoc.getSDFDoc().createIndirectDict();
|
||||||
|
versionInfo.putString("LayoutParserVersion", layoutParserVersion);
|
||||||
|
versionInfo.putString("LayoutParsingType", layoutParsingType);
|
||||||
|
pdfDoc.getRoot().put("KneconVersionInfo", versionInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,26 +1,18 @@
|
|||||||
package com.knecon.fforesight.service.viewerdoc.service;
|
package com.knecon.fforesight.service.viewerdoc.service;
|
||||||
|
|
||||||
import java.awt.geom.AffineTransform;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
|
||||||
import javax.swing.table.AbstractTableModel;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.AfterAll;
|
import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.springframework.core.io.ClassPathResource;
|
import org.springframework.core.io.ClassPathResource;
|
||||||
|
|
||||||
import com.pdftron.common.Matrix2D;
|
|
||||||
import com.pdftron.pdf.ElementBuilder;
|
|
||||||
import com.pdftron.pdf.ElementReader;
|
|
||||||
import com.pdftron.pdf.ElementWriter;
|
|
||||||
import com.pdftron.pdf.PDFDoc;
|
import com.pdftron.pdf.PDFDoc;
|
||||||
import com.pdftron.pdf.PDFNet;
|
import com.pdftron.pdf.PDFNet;
|
||||||
import com.pdftron.pdf.Page;
|
|
||||||
import com.pdftron.sdf.SDFDoc;
|
import com.pdftron.sdf.SDFDoc;
|
||||||
|
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
@ -48,7 +40,7 @@ class ViewerDocVersioningUtilityTest {
|
|||||||
File file = new ClassPathResource("files/empty.pdf").getFile();
|
File file = new ClassPathResource("files/empty.pdf").getFile();
|
||||||
Path tmpFile = Files.createTempFile("markedDocument", ".pdf");
|
Path tmpFile = Files.createTempFile("markedDocument", ".pdf");
|
||||||
try (var in = new FileInputStream(file); var doc = new PDFDoc(in); var out = new FileOutputStream(tmpFile.toFile())) {
|
try (var in = new FileInputStream(file); var doc = new PDFDoc(in); var out = new FileOutputStream(tmpFile.toFile())) {
|
||||||
ViewerDocVersioningUtility.setVersionInDocument(doc);
|
ViewerDocVersioningUtility.setVersionInDocument(doc, "layoutParserVersion", "layoutParsingType");
|
||||||
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
doc.save(out, SDFDoc.SaveMode.LINEARIZED, null);
|
||||||
}
|
}
|
||||||
assert ViewerDocVersioningUtility.isCurrentVersion(tmpFile.toFile());
|
assert ViewerDocVersioningUtility.isCurrentVersion(tmpFile.toFile());
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user