RED-7074: Design Subsection section tree structure algorithm
This commit is contained in:
parent
23985b14be
commit
efb1a748af
@ -43,9 +43,9 @@ public class Document extends AbstractSemanticNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<Section> getMainSections() {
|
public List<Section> getAllSections() {
|
||||||
|
|
||||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
|
return streamAllSubNodesOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -82,9 +82,7 @@ public class TOCEnrichmentService {
|
|||||||
if(iterator.hasNext()) {
|
if(iterator.hasNext()) {
|
||||||
currentTOCItem = iterator.next();
|
currentTOCItem = iterator.next();
|
||||||
}
|
}
|
||||||
}
|
} else if (!foundFirstHeadline) {
|
||||||
|
|
||||||
if (!foundFirstHeadline) {
|
|
||||||
startBlocks.add(current);
|
startBlocks.add(current);
|
||||||
} else {
|
} else {
|
||||||
currentSection.getSectionBlocks().add(current);
|
currentSection.getSectionBlocks().add(current);
|
||||||
|
|||||||
@ -68,7 +68,7 @@ public class RedTextPosition extends BoundingBox {
|
|||||||
// I guess if we start with the initial user space positions and transform them the same way we do the rulings it would work.
|
// I guess if we start with the initial user space positions and transform them the same way we do the rulings it would work.
|
||||||
pos.setBBox(new Rectangle2D.Float(textPosition.getX(), textPosition.getY(), textPosition.getWidthDirAdj(), textPosition.getHeight()));
|
pos.setBBox(new Rectangle2D.Float(textPosition.getX(), textPosition.getY(), textPosition.getWidthDirAdj(), textPosition.getHeight()));
|
||||||
|
|
||||||
float textHeight = textPosition.getHeight() + HEIGHT_PADDING;
|
float textHeight = textPosition.getHeight() + 2 * HEIGHT_PADDING;
|
||||||
Rectangle2D.Float dirAdjPosition = new Rectangle2D.Float(textPosition.getXDirAdj(),
|
Rectangle2D.Float dirAdjPosition = new Rectangle2D.Float(textPosition.getXDirAdj(),
|
||||||
textPosition.getYDirAdj() - textHeight,
|
textPosition.getYDirAdj() - textHeight,
|
||||||
textPosition.getWidthDirAdj(),
|
textPosition.getWidthDirAdj(),
|
||||||
|
|||||||
@ -9,7 +9,6 @@ import org.springframework.stereotype.Service;
|
|||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
|
||||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
|
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
|
|
||||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@ -20,7 +19,7 @@ public class SimplifiedSectionTextService {
|
|||||||
|
|
||||||
public SimplifiedText toSimplifiedText(Document document) {
|
public SimplifiedText toSimplifiedText(Document document) {
|
||||||
|
|
||||||
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getMainSections()
|
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getAllSections()
|
||||||
.stream()
|
.stream()
|
||||||
.map(this::toSimplifiedSectionText)
|
.map(this::toSimplifiedSectionText)
|
||||||
.toList();
|
.toList();
|
||||||
|
|||||||
@ -69,11 +69,6 @@ public class SectionNodeFactory {
|
|||||||
|
|
||||||
addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document);
|
addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document);
|
||||||
if (containsTablesAndTextBlocks(pageBlocks)) {
|
if (containsTablesAndTextBlocks(pageBlocks)) {
|
||||||
|
|
||||||
if (pageBlocks.get(0).isHeadline()) {
|
|
||||||
pageBlocks.remove(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
|
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
|
||||||
section,
|
section,
|
||||||
true,
|
true,
|
||||||
@ -82,10 +77,6 @@ public class SectionNodeFactory {
|
|||||||
context,
|
context,
|
||||||
document));
|
document));
|
||||||
} else if (!isLeaf) {
|
} else if (!isLeaf) {
|
||||||
|
|
||||||
if (pageBlocks.get(0).isHeadline()) {
|
|
||||||
pageBlocks.remove(0);
|
|
||||||
}
|
|
||||||
addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
|
addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
|
||||||
} else {
|
} else {
|
||||||
addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
|
addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
|
||||||
|
|||||||
@ -31,7 +31,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
|||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void testViewerDocument() {
|
public void testViewerDocument() {
|
||||||
|
|
||||||
String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
String fileName = "files/syngenta/CustomerFiles/SinglePages/S4_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||||
|
|
||||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||||
|
|
||||||
|
|||||||
@ -114,7 +114,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
|||||||
ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile());
|
ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile());
|
||||||
|
|
||||||
assertThat(classificationDocument.getHeaders()
|
assertThat(classificationDocument.getHeaders()
|
||||||
.get(0).getTextBlocks().size()).isEqualTo(3);
|
.get(0).getTextBlocks().size()).isEqualTo(2);
|
||||||
assertThat(classificationDocument.getHeaders()
|
assertThat(classificationDocument.getHeaders()
|
||||||
.get(0).getTextBlocks()
|
.get(0).getTextBlocks()
|
||||||
.get(0).getSequences().size()).isEqualTo(8);
|
.get(0).getSequences().size()).isEqualTo(8);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user