add simplifiedText

This commit is contained in:
Kilian Schuettler 2023-07-31 15:29:47 +02:00
parent 7496914b37
commit 2a55654fcf
6 changed files with 40 additions and 3 deletions

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.processor.model.text;
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import lombok.AllArgsConstructor;
import lombok.Builder;

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.processor.model.text;
package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.util.ArrayList;
import java.util.List;

View File

@ -18,6 +18,7 @@ public record LayoutParsingRequest(
String textBlockFileStorageId,
String positionBlockFileStorageId,
String pageFileStorageId,
String simplifiedTextStorageId,
String sectionGridStorageId) {
}

View File

@ -10,10 +10,12 @@ import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
@ -43,6 +45,7 @@ public class LayoutParsingPipeline {
private final TaasClassificationService taasClassificationService;
private final RedactManagerClassificationService redactManagerClassificationService;
private final DocuMineClassificationService docuMineClassificationService;
private final SimplifiedSectionTextService simplifiedSectionTextService;
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
@ -65,7 +68,7 @@ public class LayoutParsingPipeline {
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);

View File

@ -18,6 +18,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.tenantcommons.TenantContext;
@ -110,4 +111,10 @@ public class LayoutParsingStorageService {
}
}
public void storeSimplifiedText(LayoutParsingRequest layoutParsingRequest, SimplifiedText simplifiedText) {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.simplifiedTextStorageId(), simplifiedText);
}
}

View File

@ -0,0 +1,26 @@
package com.knecon.fforesight.service.layoutparser.processor.services;
import java.util.List;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
@Service
public class SimplifiedSectionTextService {
public SimplifiedText toSimplifiedText(Document document) {
List<SimplifiedSectionText> simplifiedSectionTexts = document.getMainSections().stream().map(this::toSimplifiedSectionText).toList();
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedSectionTexts).build();
}
private SimplifiedSectionText toSimplifiedSectionText(Section section) {
return SimplifiedSectionText.builder().sectionNumber(section.getTreeId().get(0)).text(section.getTextBlock().getSearchText()).build();
}
}