Merge branch 'RED-8481' into 'main'
RED-8481: Use visual layout parsing to detect signatures See merge request fforesight/layout-parser!101
This commit is contained in:
commit
0979a267d4
@ -18,11 +18,15 @@ public record LayoutParsingRequest(
|
||||
|
||||
@Schema(description = "Path to the original PDF file.")//
|
||||
@NonNull String originFileStorageId,//
|
||||
|
||||
|
||||
@Schema(description = "Optional Path to the table extraction file.")//
|
||||
Optional<String> tablesFileStorageId,//
|
||||
@Schema(description = "Optional Path to the image classification file.")//
|
||||
Optional<String> imagesFileStorageId,//
|
||||
|
||||
@Schema(description = "Optional Path to the the visual layout parsing service file") Optional<String> visualLayoutParsingFileId,//
|
||||
|
||||
@Schema(description = "Path where the Document Structure File will be stored.")//
|
||||
@NonNull String structureFileStorageId,//
|
||||
@Schema(description = "Path where the Research Data File will be stored.")//
|
||||
|
||||
@ -33,9 +33,11 @@ import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPageB
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.text.TextPositionSequence;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.CvTableParsingAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.VisualLayoutParsingAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableCells;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.BodyTextFrameService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
@ -86,6 +88,7 @@ public class LayoutParsingPipeline {
|
||||
RedactManagerBlockificationService redactManagerBlockificationService;
|
||||
LayoutGridService layoutGridService;
|
||||
ObservationRegistry observationRegistry;
|
||||
VisualLayoutParsingAdapter visualLayoutParsingAdapter;
|
||||
|
||||
|
||||
public LayoutParsingFinishedEvent parseLayoutAndSaveFilesToStorage(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||
@ -96,6 +99,11 @@ public class LayoutParsingPipeline {
|
||||
File originFile = layoutParsingStorageService.getOriginFile(layoutParsingRequest.originFileStorageId());
|
||||
File viewerDocumentFile = layoutParsingStorageService.getViewerDocFile(layoutParsingRequest.viewerDocumentStorageId()).orElse(originFile);
|
||||
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse = new VisualLayoutParsingResponse();
|
||||
if (layoutParsingRequest.visualLayoutParsingFileId().isPresent()) {
|
||||
visualLayoutParsingResponse = layoutParsingStorageService.getVisualLayoutParsingFile(layoutParsingRequest.visualLayoutParsingFileId().get());
|
||||
}
|
||||
|
||||
ImageServiceResponse imageServiceResponse = new ImageServiceResponse();
|
||||
if (layoutParsingRequest.imagesFileStorageId().isPresent()) {
|
||||
imageServiceResponse = layoutParsingStorageService.getImagesFile(layoutParsingRequest.imagesFileStorageId().get());
|
||||
@ -110,6 +118,7 @@ public class LayoutParsingPipeline {
|
||||
originFile,
|
||||
imageServiceResponse,
|
||||
tableServiceResponse,
|
||||
visualLayoutParsingResponse,
|
||||
layoutParsingRequest.identifier().toString());
|
||||
|
||||
log.info("Building document graph for {}", layoutParsingRequest.identifier());
|
||||
@ -198,12 +207,17 @@ public class LayoutParsingPipeline {
|
||||
File originFile,
|
||||
ImageServiceResponse imageServiceResponse,
|
||||
TableServiceResponse tableServiceResponse,
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse,
|
||||
String identifier) {
|
||||
|
||||
PDDocument originDocument = openDocument(originFile);
|
||||
addNumberOfPagesToTrace(originDocument.getNumberOfPages(), Files.size(originFile.toPath()));
|
||||
Map<Integer, List<TableCells>> pdfTableCells = cvTableParsingAdapter.buildCvParsedTablesPerPage(tableServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> pdfImages = imageServiceResponseAdapter.buildClassifiedImagesPerPage(imageServiceResponse);
|
||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||
if(signatures.size() > 0) {
|
||||
visualLayoutParsingAdapter.buildExtractedSignaturesPerPage(visualLayoutParsingResponse);
|
||||
}
|
||||
|
||||
ClassificationDocument classificationDocument = new ClassificationDocument();
|
||||
List<ClassificationPage> classificationPages = new ArrayList<>();
|
||||
@ -264,6 +278,10 @@ public class LayoutParsingPipeline {
|
||||
imageServiceResponseAdapter.findOcr(classificationPage);
|
||||
}
|
||||
|
||||
if(signatures.containsKey(pageNumber)) {
|
||||
classificationPage.setImages(signatures.get(pageNumber));
|
||||
}
|
||||
|
||||
tableExtractionService.extractTables(cleanRulings, classificationPage);
|
||||
|
||||
buildPageStatistics(classificationPage);
|
||||
|
||||
@ -10,8 +10,6 @@ import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
@ -22,6 +20,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.Researc
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import io.micrometer.observation.annotation.Observed;
|
||||
@ -79,6 +78,14 @@ public class LayoutParsingStorageService {
|
||||
}
|
||||
}
|
||||
|
||||
public VisualLayoutParsingResponse getVisualLayoutParsingFile(String storageId) throws IOException {
|
||||
|
||||
try (InputStream inputStream = getObject(storageId)) {
|
||||
VisualLayoutParsingResponse visualLayoutParsingResponse = objectMapper.readValue(inputStream, VisualLayoutParsingResponse.class);
|
||||
return visualLayoutParsingResponse;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Observed(name = "LayoutParsingStorageService", contextualName = "store-document-data")
|
||||
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) {
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.adapter;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.ImageType;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.image.ClassifiedImage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingBox;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResult;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class VisualLayoutParsingAdapter {
|
||||
|
||||
private static String SIGNATURES = "signature";
|
||||
|
||||
public Map<Integer, List<VisualLayoutParsingResult>> buildExtractedTablesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||
|
||||
Map<Integer, List<VisualLayoutParsingResult>> tableCells = new HashMap<>();
|
||||
visualLayoutParsingResponse.getData()
|
||||
.forEach(tableData -> tableCells.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertTableCells(tableData.getBoxes())));
|
||||
|
||||
return tableCells;
|
||||
|
||||
}
|
||||
|
||||
public Map<Integer, List<ClassifiedImage>> buildExtractedSignaturesPerPage(VisualLayoutParsingResponse visualLayoutParsingResponse) {
|
||||
|
||||
Map<Integer, List<ClassifiedImage>> signatures = new HashMap<>();
|
||||
visualLayoutParsingResponse.getData().forEach(tableData -> signatures.computeIfAbsent(tableData.getPage_idx(), tableCell -> new ArrayList<>()).addAll(convertSignatures(tableData.getPage_idx(), tableData.getBoxes())));
|
||||
|
||||
return signatures;
|
||||
}
|
||||
|
||||
|
||||
public List<VisualLayoutParsingResult> convertTableCells(List<VisualLayoutParsingBox> tableObjects) {
|
||||
|
||||
List<VisualLayoutParsingResult> parsedTableCells = new ArrayList<>();
|
||||
|
||||
tableObjects.stream().forEach(t -> {
|
||||
VisualLayoutParsingResult result = new VisualLayoutParsingResult();
|
||||
result.setX0(t.getBox().getX1());
|
||||
result.setX1(t.getBox().getX2());
|
||||
result.setY0(t.getBox().getY1());
|
||||
result.setY1(t.getBox().getY2());
|
||||
result.setWidth(result.getX1() - result.getX0());
|
||||
result.setHeight(result.getY1() - result.getY0());
|
||||
result.setLabel(t.getLabel());
|
||||
parsedTableCells.add(result);
|
||||
});
|
||||
|
||||
return parsedTableCells;
|
||||
|
||||
}
|
||||
|
||||
public List<ClassifiedImage> convertSignatures(int pageNumber, List<VisualLayoutParsingBox> tableObjects) {
|
||||
|
||||
List<ClassifiedImage> signatures = new ArrayList<>();
|
||||
|
||||
tableObjects.stream().forEach(t -> {
|
||||
if(t.getLabel().equals(SIGNATURES)) {
|
||||
ClassifiedImage signature = new ClassifiedImage(new Rectangle2D.Float(t.getBox().getX1(),t.getBox().getY1(),t.getBox().getX2() - t.getBox().getX1(),t.getBox().getY2() - t.getBox().getY1()),
|
||||
ImageType.SIGNATURE,false,pageNumber);
|
||||
|
||||
signatures.add(signature);
|
||||
}
|
||||
});
|
||||
|
||||
return signatures;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class VisualLayoutParsingBox {
|
||||
|
||||
private VisualLayoutParsingBoxValue box;
|
||||
private String label;
|
||||
private float probability;
|
||||
|
||||
}
|
||||
@ -0,0 +1,19 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class VisualLayoutParsingBoxValue {
|
||||
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float x2;
|
||||
private float y2;
|
||||
|
||||
}
|
||||
@ -0,0 +1,21 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class VisualLayoutParsingData {
|
||||
|
||||
private int page_idx;
|
||||
|
||||
private List<VisualLayoutParsingBox> boxes;
|
||||
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class VisualLayoutParsingResponse {
|
||||
|
||||
private String dossierId;
|
||||
private String fileId;
|
||||
private String targetFileExtension;
|
||||
private String responseFileExtension;
|
||||
private String X_TENANT_ID;
|
||||
private List<VisualLayoutParsingData> data;
|
||||
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package com.knecon.fforesight.service.layoutparser.processor.python_api.model.table;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class VisualLayoutParsingResult {
|
||||
|
||||
private float x0;
|
||||
private float y0;
|
||||
private float x1;
|
||||
private float y1;
|
||||
private float width;
|
||||
private float height;
|
||||
private String label;
|
||||
|
||||
}
|
||||
@ -27,6 +27,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.TaasDocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
@ -50,6 +51,7 @@ public class BdrJsonBuildTest extends AbstractTest {
|
||||
file,
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
file.toString()));
|
||||
}
|
||||
|
||||
|
||||
@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Do
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.tenantcommons.TenantsClient;
|
||||
|
||||
@ -98,6 +99,7 @@ public class HeadlinesGoldStandardIntegrationTest {
|
||||
pdfFileResource.getFile(),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
filePath));
|
||||
|
||||
var foundHeadlines = documentGraph.streamAllSubNodes()
|
||||
|
||||
@ -16,6 +16,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.mapper.DocumentDataMapper;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
@ -58,6 +59,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentTest {
|
||||
filename.toFile(),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
filename.toFile().toString()));
|
||||
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
||||
|
||||
@ -12,6 +12,7 @@ import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsi
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.visualization.LayoutGridService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.BuildDocumentTest;
|
||||
@ -51,7 +52,7 @@ public class ViewerDocumentTest extends BuildDocumentTest {
|
||||
var tableResponse = mapper.readValue(new ClassPathResource(tableFileName).getInputStream(), TableServiceResponse.class);
|
||||
var documentFile = new ClassPathResource(fileName).getFile();
|
||||
|
||||
var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, Path.of(fileName).getFileName().toFile().toString());
|
||||
var classificationDocument = layoutParsingPipeline.parseLayout(LayoutParsingType.DOCUMINE, documentFile, new ImageServiceResponse(), tableResponse, new VisualLayoutParsingResponse(),Path.of(fileName).getFileName().toFile().toString());
|
||||
ViewerDocumentService viewerDocumentService = new ViewerDocumentService(null);
|
||||
LayoutGridService layoutGridService = new LayoutGridService(viewerDocumentService);
|
||||
Document document = DocumentGraphFactory.buildDocumentGraph(classificationDocument);
|
||||
|
||||
@ -33,6 +33,7 @@ import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.C
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.adapter.ImageServiceResponseAdapter;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.SectionsBuilderService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.classification.RedactManagerClassificationService;
|
||||
import com.knecon.fforesight.service.layoutparser.server.utils.AbstractTest;
|
||||
@ -67,6 +68,7 @@ public class PdfSegmentationServiceTest extends AbstractTest {
|
||||
originDocument,
|
||||
new ImageServiceResponse(),
|
||||
tableServiceResponse,
|
||||
new VisualLayoutParsingResponse(),
|
||||
"document");
|
||||
|
||||
redactManagerClassificationService.classifyDocument(classificationDocument);
|
||||
|
||||
@ -21,6 +21,7 @@ import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Ta
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.table.CleanRulings;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.image.ImageServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.PageContentExtractor;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.RulingCleaningService;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
@ -80,11 +81,13 @@ public class RulingCleaningServiceTest extends BuildDocumentTest {
|
||||
filename.toFile(),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
filename.toFile().toString()));
|
||||
Document documentGraphAfter = DocumentGraphFactory.buildDocumentGraph(layoutParsingPipeline.parseLayout(LayoutParsingType.REDACT_MANAGER,
|
||||
filename.toFile(),
|
||||
new ImageServiceResponse(),
|
||||
new TableServiceResponse(),
|
||||
new VisualLayoutParsingResponse(),
|
||||
filename.toFile().toString()));
|
||||
DocumentData documentDataBefore = DocumentDataMapper.toDocumentData(documentGraphBefore);
|
||||
DocumentData documentDataAfter = DocumentDataMapper.toDocumentData(documentGraphAfter);
|
||||
|
||||
@ -20,6 +20,7 @@ import org.springframework.context.annotation.Import;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.test.context.junit.jupiter.SpringExtension;
|
||||
import org.xmlunit.builder.Input;
|
||||
|
||||
import com.iqser.red.commons.jackson.ObjectMapperFactory;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
@ -51,6 +52,8 @@ public abstract class AbstractTest {
|
||||
private RabbitTemplate rabbitTemplate;
|
||||
|
||||
protected final static String ORIGIN_FILE_ID = "origin";
|
||||
|
||||
protected final static String VISUAL_LAYOUT_FILE = "visual";
|
||||
protected final static String TABLE_FILE_ID = "table";
|
||||
protected final static String IMAGE_FILE_ID = "image";
|
||||
protected final static String STRUCTURE_FILE_ID = "structure";
|
||||
@ -96,7 +99,7 @@ public abstract class AbstractTest {
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(String file) {
|
||||
|
||||
return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json");
|
||||
return prepareStorage(file, "cv_table_parsing_response/empty.json", "image_service_response/empty.json","visual_layout_parsing_response/empty.json");
|
||||
}
|
||||
|
||||
|
||||
@ -116,6 +119,7 @@ public abstract class AbstractTest {
|
||||
.originFileStorageId(ORIGIN_FILE_ID)
|
||||
.tablesFileStorageId(Optional.of(TABLE_FILE_ID))
|
||||
.imagesFileStorageId(Optional.of(IMAGE_FILE_ID))
|
||||
.visualLayoutParsingFileId(Optional.of(VISUAL_LAYOUT_FILE))
|
||||
.structureFileStorageId(STRUCTURE_FILE_ID)
|
||||
.textBlockFileStorageId(TEXT_FILE_ID)
|
||||
.positionBlockFileStorageId(POSITION_FILE_ID)
|
||||
@ -136,6 +140,16 @@ public abstract class AbstractTest {
|
||||
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream());
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(String file, String cvServiceResponseFile, String imageInfoFile, String visualLayoutParsingResponseFile) {
|
||||
|
||||
ClassPathResource pdfFileResource = new ClassPathResource(file);
|
||||
ClassPathResource cvServiceResponseFileResource = new ClassPathResource(cvServiceResponseFile);
|
||||
ClassPathResource imageInfoFileResource = new ClassPathResource(imageInfoFile);
|
||||
ClassPathResource visualLayoutParsingResponseResource = new ClassPathResource(visualLayoutParsingResponseFile);
|
||||
|
||||
return prepareStorage(pdfFileResource.getInputStream(), cvServiceResponseFileResource.getInputStream(), imageInfoFileResource.getInputStream(), visualLayoutParsingResponseResource.getInputStream());
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream) {
|
||||
@ -147,6 +161,17 @@ public abstract class AbstractTest {
|
||||
return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
protected LayoutParsingRequest prepareStorage(InputStream fileStream, InputStream cvServiceResponseFileStream, InputStream imageInfoStream, InputStream visualLayoutParsingResponseFileStream) {
|
||||
|
||||
storageService.storeObject(TenantContext.getTenantId(), IMAGE_FILE_ID, imageInfoStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), TABLE_FILE_ID, cvServiceResponseFileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(), ORIGIN_FILE_ID, fileStream);
|
||||
storageService.storeObject(TenantContext.getTenantId(),VISUAL_LAYOUT_FILE,visualLayoutParsingResponseFileStream );
|
||||
|
||||
return buildDefaultLayoutParsingRequest(LayoutParsingType.REDACT_MANAGER);
|
||||
}
|
||||
|
||||
|
||||
@AfterEach
|
||||
public void cleanupStorage() {
|
||||
|
||||
@ -10,6 +10,7 @@ import com.knecon.fforesight.service.layoutparser.processor.LayoutParsingPipelin
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.ClassificationDocument;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.model.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.TableServiceResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.python_api.model.table.VisualLayoutParsingResponse;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.services.factory.DocumentGraphFactory;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
@ -25,7 +26,7 @@ public abstract class BuildDocumentTest extends AbstractTest {
|
||||
|
||||
File fileResource = new ClassPathResource(filename).getFile();
|
||||
prepareStorage(filename);
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), filename);
|
||||
return layoutParsingPipeline.parseLayout(layoutParsingType, fileResource, layoutParsingStorageService.getImagesFile(IMAGE_FILE_ID), new TableServiceResponse(), new VisualLayoutParsingResponse(),filename);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,8 @@
|
||||
{
|
||||
"dossierId": "123",
|
||||
"fileId": "123",
|
||||
"targetFileExtension": "ORIGIN.pdf.gz",
|
||||
"responseFileExtension": "EXTRACTED_TABLES.json.gz",
|
||||
"data": [],
|
||||
"X_TENANT_ID": ""
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user