store new document type

This commit is contained in:
Timo Bejan 2023-07-13 13:00:56 +03:00
parent 15a6d46f5c
commit 3bc88bc9b7
6 changed files with 19 additions and 3 deletions

View File

@ -13,6 +13,7 @@ public record LayoutParsingRequest(
Optional<String> tablesFileStorageId,
Optional<String> imagesFileStorageId,
String structureFileStorageId,
String researchDocumentStorageId,
String textBlockFileStorageId,
String positionBlockFileStorageId,
String pageFileStorageId) {

View File

@ -7,6 +7,7 @@ import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingFinishedEvent;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.processor.adapter.CvTableParsingAdapter;
@ -20,6 +21,7 @@ import com.knecon.fforesight.service.layoutparser.processor.classification.servi
import com.knecon.fforesight.service.layoutparser.processor.factory.DocumentGraphFactory;
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.taas.TaasDocumentDataMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
@ -56,7 +58,9 @@ public class LayoutParsingService {
int numberOfPages = originDocument.getNumberOfPages();
originDocument.close();
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, DocumentDataMapper.toDocumentData(documentGraph));
var researchDocumentData = TaasDocumentDataMapper.fromDocument(documentGraph);
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, researchDocumentData, DocumentDataMapper.toDocumentData(documentGraph));
return LayoutParsingFinishedEvent.builder()
.identifier(layoutParsingRequest.identifier())

View File

@ -19,6 +19,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.At
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTreeData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.PageData;
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.table.TableServiceResponse;
@ -67,8 +68,9 @@ public class LayoutParsingStorageService {
}
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) throws IOException {
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, ResearchDocumentData researchDocumentData, DocumentData documentData) {
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.researchDocumentStorageId(), researchDocumentData);
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentTreeData());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getAtomicTextBlocks());
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getAtomicPositionBlocks());

View File

@ -14,6 +14,7 @@ import java.util.List;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
@ -50,6 +51,7 @@ public class BdrJsonBuildTest extends BaseTest {
@Test
@Disabled
public void writeBDRDocumentData() throws IOException {
String sourcePath = "/tmp/bdr_files";

View File

@ -6,6 +6,7 @@ import java.io.InputStream;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
@ -23,6 +24,7 @@ public class BuildDocumentGraphTest extends BaseTest {
private LayoutParsingService layoutParsingService;
@Test
@Disabled
public void buildMetolachlor() {
Document documentGraph = buildGraph("files/S-Metolachlor_RAR_01_Volume_1_2018-09-06");

View File

@ -1,5 +1,6 @@
package com.knecon.fforesight.service.layoutparser.server.graph;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
@ -7,6 +8,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentDataMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.redaction.DocumentGraphMapper;
import com.knecon.fforesight.service.layoutparser.processor.mapper.taas.TaasDocumentDataMapper;
import lombok.SneakyThrows;
@ -14,13 +16,16 @@ public class DocumentGraphMappingTest extends BuildDocumentGraphTest {
@Test
@SneakyThrows
@Disabled
public void testGraphMapping() {
Document document = buildGraph("files/crafted document");
LayoutParsingRequest layoutParsingRequest = buildStandardLayoutParsingRequest();
DocumentData documentData = DocumentDataMapper.toDocumentData(document);
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, documentData);
var researchDocumentData = TaasDocumentDataMapper.fromDocument(document);
layoutParsingStorageService.storeDocumentData(layoutParsingRequest, researchDocumentData, documentData);
DocumentData documentData2 = layoutParsingStorageService.readDocumentData(layoutParsingRequest);
Document newDocumentGraph = DocumentGraphMapper.toDocumentGraph(documentData2);