Merge branch 'RED-7074_2' into 'main'
RED-7074: Design Subsection section tree structure algorithm See merge request fforesight/layout-parser!160
This commit is contained in:
commit
b6742c1e89
@ -43,9 +43,9 @@ public class Document extends AbstractSemanticNode {
|
||||
}
|
||||
|
||||
|
||||
public List<Section> getMainSections() {
|
||||
public List<Section> getAllSections() {
|
||||
|
||||
return streamChildrenOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||
return streamAllSubNodesOfType(NodeType.SECTION).map(node -> (Section) node)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@ -82,9 +82,7 @@ public class TOCEnrichmentService {
|
||||
if(iterator.hasNext()) {
|
||||
currentTOCItem = iterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundFirstHeadline) {
|
||||
} else if (!foundFirstHeadline) {
|
||||
startBlocks.add(current);
|
||||
} else {
|
||||
currentSection.getSectionBlocks().add(current);
|
||||
|
||||
@ -68,7 +68,7 @@ public class RedTextPosition extends BoundingBox {
|
||||
// I guess if we start with the initial user space positions and transform them the same way we do the rulings it would work.
|
||||
pos.setBBox(new Rectangle2D.Float(textPosition.getX(), textPosition.getY(), textPosition.getWidthDirAdj(), textPosition.getHeight()));
|
||||
|
||||
float textHeight = textPosition.getHeight() + HEIGHT_PADDING;
|
||||
float textHeight = textPosition.getHeight() + 2 * HEIGHT_PADDING;
|
||||
Rectangle2D.Float dirAdjPosition = new Rectangle2D.Float(textPosition.getXDirAdj(),
|
||||
textPosition.getYDirAdj() - textHeight,
|
||||
textPosition.getWidthDirAdj(),
|
||||
|
||||
@ -9,7 +9,6 @@ import org.springframework.stereotype.Service;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.GenericSemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -20,7 +19,7 @@ public class SimplifiedSectionTextService {
|
||||
|
||||
public SimplifiedText toSimplifiedText(Document document) {
|
||||
|
||||
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getMainSections()
|
||||
List<SimplifiedSectionText> simplifiedMainSectionsList = document.getAllSections()
|
||||
.stream()
|
||||
.map(this::toSimplifiedSectionText)
|
||||
.toList();
|
||||
|
||||
@ -69,11 +69,6 @@ public class SectionNodeFactory {
|
||||
|
||||
addFirstHeadlineDirectlyToSection(layoutParsingType, pageBlocks, context, section, document);
|
||||
if (containsTablesAndTextBlocks(pageBlocks)) {
|
||||
|
||||
if (pageBlocks.get(0).isHeadline()) {
|
||||
pageBlocks.remove(0);
|
||||
}
|
||||
|
||||
splitPageBlocksIntoSubSections(pageBlocks).forEach(subSectionPageBlocks -> addSection(layoutParsingType,
|
||||
section,
|
||||
true,
|
||||
@ -82,10 +77,6 @@ public class SectionNodeFactory {
|
||||
context,
|
||||
document));
|
||||
} else if (!isLeaf) {
|
||||
|
||||
if (pageBlocks.get(0).isHeadline()) {
|
||||
pageBlocks.remove(0);
|
||||
}
|
||||
addSection(layoutParsingType, section, true, pageBlocks, emptyList(), context, document);
|
||||
} else {
|
||||
addTablesAndParagraphsAndHeadlinesToSection(layoutParsingType, pageBlocks, context, section, document);
|
||||
|
||||
@ -31,7 +31,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
@SneakyThrows
|
||||
public void testViewerDocument() {
|
||||
|
||||
String fileName = "files/syngenta/CustomerFiles/S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
String fileName = "files/syngenta/CustomerFiles/SinglePages/S4_S-Metolachlor_RAR_01_Volume_1_2018-09-06.pdf";
|
||||
|
||||
String tmpFileName = "/tmp/" + Path.of(fileName).getFileName() + "_VIEWER.pdf";
|
||||
|
||||
|
||||
@ -114,7 +114,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
ClassificationDocument classificationDocument = buildClassificationDocument(pdfFileResource.getFile());
|
||||
|
||||
assertThat(classificationDocument.getHeaders()
|
||||
.get(0).getTextBlocks().size()).isEqualTo(3);
|
||||
.get(0).getTextBlocks().size()).isEqualTo(2);
|
||||
assertThat(classificationDocument.getHeaders()
|
||||
.get(0).getTextBlocks()
|
||||
.get(0).getSequences().size()).isEqualTo(8);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user