RED-7074: Design Subsection section tree structure algorithm
* temp
This commit is contained in:
parent
f7aeb9a406
commit
d2dc369df3
@ -1,8 +1,10 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@ -42,6 +44,10 @@ public class Section implements GenericSemanticNode {
|
||||
@EqualsAndHashCode.Exclude
|
||||
Map<Page, Rectangle2D> bBoxCache;
|
||||
|
||||
@EqualsAndHashCode.Exclude
|
||||
boolean isMainSection;
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public NodeType getType() {
|
||||
@ -56,6 +62,11 @@ public class Section implements GenericSemanticNode {
|
||||
.isPresent();
|
||||
}
|
||||
|
||||
public boolean isLeafSection() {
|
||||
|
||||
return streamAllSubNodesOfType(NodeType.SECTION).findAny()
|
||||
.isEmpty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextBlock getTextBlock() {
|
||||
|
||||
@ -50,14 +50,17 @@ public class ConcatenatedTextBlock implements TextBlock {
|
||||
|
||||
public ConcatenatedTextBlock concat(TextBlock textBlock) {
|
||||
|
||||
int start = textBlock.getBoundary().start();
|
||||
int end = textBlock.getBoundary().end();
|
||||
if (this.atomicTextBlocks.isEmpty()) {
|
||||
boundary.setStart(textBlock.getBoundary().start());
|
||||
boundary.setEnd(textBlock.getBoundary().end());
|
||||
} else if (boundary.end() != textBlock.getBoundary().start()) {
|
||||
throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
|
||||
boundary.setStart(start);
|
||||
boundary.setEnd(end);
|
||||
} else if (boundary.end() != start) {
|
||||
//throw new UnsupportedOperationException(format("Can only concat consecutive TextBlocks, trying to concat %s and %s", boundary, textBlock.getBoundary()));
|
||||
return this;
|
||||
}
|
||||
this.atomicTextBlocks.addAll(textBlock.getAtomicTextBlocks());
|
||||
boundary.setEnd(textBlock.getBoundary().end());
|
||||
boundary.setEnd(end);
|
||||
this.searchText = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -51,7 +51,8 @@ public class OutlineExtractorService {
|
||||
List<OutlineObjectTreeNode> rootNodes = new ArrayList<>();
|
||||
if (documentOutline != null) {
|
||||
for (PDOutlineItem child : documentOutline.children()) {
|
||||
rootNodes.add(createOutlineObjectWithChildren(child, document, 1));
|
||||
Optional<OutlineObjectTreeNode> outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, 1);
|
||||
outlineObjectWithChildren.ifPresent(rootNodes::add);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,12 +61,14 @@ public class OutlineExtractorService {
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private OutlineObjectTreeNode createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) {
|
||||
private Optional<OutlineObjectTreeNode> createOutlineObjectWithChildren(PDOutlineItem item, PDDocument document, int depth) {
|
||||
|
||||
OutlineObjectTreeNode outlineObject = createOutlineObject(item, document, depth);
|
||||
for (var child : item.children()) {
|
||||
OutlineObjectTreeNode outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, depth + 1);
|
||||
outlineObject.addChild(outlineObjectWithChildren);
|
||||
Optional<OutlineObjectTreeNode> outlineObject = createOutlineObject(item, document, depth);
|
||||
if (outlineObject.isPresent()) {
|
||||
for (var child : item.children()) {
|
||||
Optional<OutlineObjectTreeNode> outlineObjectWithChildren = createOutlineObjectWithChildren(child, document, depth + 1);
|
||||
outlineObjectWithChildren.ifPresent(outlineObjectTreeNode -> outlineObject.get().addChild(outlineObjectTreeNode));
|
||||
}
|
||||
}
|
||||
|
||||
return outlineObject;
|
||||
@ -75,11 +78,14 @@ public class OutlineExtractorService {
|
||||
// if the structure elements are processed beforehand, another case can be handled here as well:
|
||||
// outline objects can reference structure elements (see pdf documentation)
|
||||
@SneakyThrows
|
||||
private OutlineObjectTreeNode createOutlineObject(PDOutlineItem item, PDDocument document, int depth) {
|
||||
private Optional<OutlineObjectTreeNode> createOutlineObject(PDOutlineItem item, PDDocument document, int depth) {
|
||||
|
||||
String title = item.getTitle();
|
||||
|
||||
PDPage page = item.findDestinationPage(document);
|
||||
if (page == null) {
|
||||
return Optional.empty();
|
||||
}
|
||||
int pageNumber = document.getPages().indexOf(page);
|
||||
|
||||
Optional<Point2D> outlinePosition = Optional.empty();
|
||||
@ -109,7 +115,7 @@ public class OutlineExtractorService {
|
||||
log.info(String.format("Error occurred during position resolution for outline item on page %s with title %s: " + e, pageNumber, title));
|
||||
}
|
||||
|
||||
return new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, outlinePosition.orElse(new Point2D.Float(0, 0)), depth));
|
||||
return Optional.of(new OutlineObjectTreeNode(new OutlineObject(title, pageNumber, outlinePosition.orElse(new Point2D.Float(0, 0)), depth)));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -50,8 +50,7 @@ public class SectionNodeFactory {
|
||||
|
||||
Map<Integer, List<AbstractPageBlock>> blocksPerPage = pageBlocks.stream()
|
||||
.collect(groupingBy(AbstractPageBlock::getPage));
|
||||
Section section = Section.builder().documentTree(context.getDocumentTree())
|
||||
.build();
|
||||
Section section = Section.builder().isMainSection(parentNode == null).documentTree(context.getDocumentTree()).build();
|
||||
|
||||
context.getSections().add(section);
|
||||
blocksPerPage.keySet()
|
||||
|
||||
@ -29,6 +29,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
|
||||
@Test
|
||||
@SneakyThrows
|
||||
@Disabled
|
||||
public void testViewerDocuments() {
|
||||
|
||||
String directory = "files/syngenta_190_deduplicated/";
|
||||
@ -81,7 +82,9 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/new/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
|
||||
String fileName = "files/documine/20_TiltPlus_SensibilizacaoCutanea.pdf";
|
||||
//String fileName = "files/new/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
//String fileName = "files/syngenta_190_deduplicated/1 Abamectin_prr.pdf";
|
||||
//String fileName = "files/new/abschlussarbeiten-template-institut-fur-informatik-padagogische-hochschule-karlsruhe.pdf";
|
||||
//String fileName = "files/new/kaust-official-thesis-template.pdf";
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user