Compare commits

...

3 Commits

Author SHA1 Message Date
Kilian Schuettler
93ecd592d7 clari-added-section-chunk: change structure object from paragraph/headline to sections
* add sections without tables
2024-06-26 12:46:37 +02:00
Kilian Schuettler
6b63596516 clari-section-chunk: change structure object from paragraph/headline to sections
* add sections
2024-06-26 12:35:14 +02:00
Kilian Schuettler
80e701a0fd clari-section-chunk: change structure object from paragraph/headline to sections
* filter headers/footers
2024-06-26 12:28:53 +02:00
2 changed files with 4 additions and 2 deletions

View File

@ -163,7 +163,7 @@ public class LayoutParsingPipeline {
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
layoutParsingStorageService.storeViewerDocument(layoutParsingRequest, viewerDocumentFile);
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.CLARIFYND)) {
if (layoutParsingRequest.researchDocumentStorageId() != null) {
log.info("Building research document data for {}", layoutParsingRequest.identifier());
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);

View File

@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.TableDa
import com.knecon.fforesight.service.layoutparser.processor.model.graph.Boundary;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Page;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Table;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.TableCell;
@ -28,7 +29,8 @@ public class TaasDocumentDataMapper {
AtomicInteger structureObjectNumber = new AtomicInteger();
List<StructureObject> structureObjects = document.streamAllSubNodes()
.filter(node -> !node.getType().equals(NodeType.TABLE_CELL))
.filter(node -> !node.getType().equals(NodeType.SECTION))
.filter(node -> !node.getType().equals(NodeType.SUPER_SECTION))
.filter(node -> !(node.getType().equals(NodeType.SECTION) && ((Section) node).hasTables()))
.map(node -> {
if (node.getType().equals(NodeType.TABLE)) {
return TaasDocumentDataMapper.fromTableWithTableData((Table) node, structureObjectNumber.getAndIncrement());