add simplifiedText

This commit is contained in:
Kilian Schuettler 2023-07-31 15:29:47 +02:00
parent 7496914b37
commit 2a55654fcf
6 changed files with 40 additions and 3 deletions

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.processor.model.text; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import lombok.Builder; import lombok.Builder;

View File

@ -1,4 +1,4 @@
package com.knecon.fforesight.service.layoutparser.processor.model.text; package com.knecon.fforesight.service.layoutparser.internal.api.data.redaction;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;

View File

@ -18,6 +18,7 @@ public record LayoutParsingRequest(
String textBlockFileStorageId, String textBlockFileStorageId,
String positionBlockFileStorageId, String positionBlockFileStorageId,
String pageFileStorageId, String pageFileStorageId,
String simplifiedTextStorageId,
String sectionGridStorageId) { String sectionGridStorageId) {
} }

View File

@ -10,10 +10,12 @@ import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingType;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter; import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.services.SimplifiedSectionTextService;
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory; import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document; import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper; import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
@ -43,6 +45,7 @@ public class LayoutParsingPipeline {
private final TaasClassificationService taasClassificationService; private final TaasClassificationService taasClassificationService;
private final RedactManagerClassificationService redactManagerClassificationService; private final RedactManagerClassificationService redactManagerClassificationService;
private final DocuMineClassificationService docuMineClassificationService; private final DocuMineClassificationService docuMineClassificationService;
private final SimplifiedSectionTextService simplifiedSectionTextService;
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException { public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
@ -65,7 +68,7 @@ public class LayoutParsingPipeline {
layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph)); layoutParsingStorageService.storeSectionGrid(layoutParsingRequest, sectionGridCreatorService.createSectionGrid(documentGraph));
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph)); layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
layoutParsingStorageService.storeSimplifiedText(layoutParsingRequest, simplifiedSectionTextService.toSimplifiedText(documentGraph));
if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) { if (layoutParsingRequest.layoutParsingType().equals(LayoutParsingType.TAAS)) {
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph); var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData); layoutParsingStorageService.storeResearchDocumentData(layoutParsingRequest, researchDocumentData);

View File

@ -18,6 +18,7 @@ import com.iqser.red.storage.commons.service.StorageService;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData; import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest; import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse; import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
import com.knecon.fforesight.tenantcommons.TenantContext; import com.knecon.fforesight.tenantcommons.TenantContext;
@ -110,4 +111,10 @@ public class LayoutParsingStorageService {
} }
} }
public void storeSimplifiedText(LayoutParsingRequest layoutParsingRequest, SimplifiedText simplifiedText) {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.simplifiedTextStorageId(), simplifiedText);
}
} }

View File

@ -0,0 +1,26 @@
package com.knecon.fforesight.service.layoutparser.processor.services;
import java.util.List;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedSectionText;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Section;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.SimplifiedText;
@Service
public class SimplifiedSectionTextService {
public SimplifiedText toSimplifiedText(Document document) {
List<SimplifiedSectionText> simplifiedSectionTexts = document.getMainSections().stream().map(this::toSimplifiedSectionText).toList();
return SimplifiedText.builder().numberOfPages(document.getNumberOfPages()).sectionTexts(simplifiedSectionTexts).build();
}
private SimplifiedSectionText toSimplifiedSectionText(Section section) {
return SimplifiedSectionText.builder().sectionNumber(section.getTreeId().get(0)).text(section.getTextBlock().getSearchText()).build();
}
}