rename Data classes
This commit is contained in:
parent
653f280fd1
commit
47fd8e05d1
@ -12,10 +12,10 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class DocumentData {
|
||||
|
||||
PageData[] pages;
|
||||
AtomicTextBlockData[] atomicTextBlocks;
|
||||
AtomicPositionBlockData[] atomicPositionBlocks;
|
||||
DocumentTreeData documentTreeData;
|
||||
DocumentPage[] pages;
|
||||
DocumentText[] atomicTextBlocks;
|
||||
DocumentPositions[] atomicPositionBlocks;
|
||||
DocumentStructure documentStructure;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -10,7 +10,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class PageData {
|
||||
public class DocumentPage {
|
||||
|
||||
int number;
|
||||
int height;
|
||||
@ -10,7 +10,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class AtomicPositionBlockData {
|
||||
public class DocumentPositions {
|
||||
|
||||
Long id;
|
||||
int[] stringIdxToPositionIdx;
|
||||
@ -17,7 +17,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentTreeData {
|
||||
public class DocumentStructure {
|
||||
|
||||
EntryData root;
|
||||
|
||||
@ -37,7 +37,7 @@ public class DocumentTreeData {
|
||||
|
||||
public Stream<EntryData> streamAllEntries() {
|
||||
|
||||
return Stream.concat(Stream.of(root), root.children.stream()).flatMap(DocumentTreeData::flatten);
|
||||
return Stream.concat(Stream.of(root), root.children.stream()).flatMap(DocumentStructure::flatten);
|
||||
}
|
||||
|
||||
|
||||
@ -49,7 +49,7 @@ public class DocumentTreeData {
|
||||
|
||||
private static Stream<EntryData> flatten(EntryData entry) {
|
||||
|
||||
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentTreeData::flatten));
|
||||
return Stream.concat(Stream.of(entry), entry.children.stream().flatMap(DocumentStructure::flatten));
|
||||
}
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ import lombok.experimental.FieldDefaults;
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@FieldDefaults(makeFinal = true, level = AccessLevel.PRIVATE)
|
||||
public class AtomicTextBlockData {
|
||||
public class DocumentText {
|
||||
|
||||
Long id;
|
||||
Long page;
|
||||
@ -15,11 +15,11 @@ import org.springframework.stereotype.Service;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.iqser.red.service.persistence.service.v1.api.shared.model.redactionlog.section.SectionGrid;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicPositionBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicTextBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTreeData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.PageData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.taas.ResearchDocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.queue.LayoutParsingRequest;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.adapter.model.image.ImageServiceResponse;
|
||||
@ -71,7 +71,7 @@ public class LayoutParsingStorageService {
|
||||
|
||||
public void storeDocumentData(LayoutParsingRequest layoutParsingRequest, DocumentData documentData) {
|
||||
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentTreeData());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), documentData.getDocumentStructure());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.textBlockFileStorageId(), documentData.getAtomicTextBlocks());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.positionBlockFileStorageId(), documentData.getAtomicPositionBlocks());
|
||||
storageService.storeJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), documentData.getPages());
|
||||
@ -92,20 +92,20 @@ public class LayoutParsingStorageService {
|
||||
|
||||
public DocumentData readDocumentData(LayoutParsingRequest layoutParsingRequest) throws IOException {
|
||||
|
||||
PageData[] pageData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), PageData[].class);
|
||||
AtomicTextBlockData[] atomicTextBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
DocumentPage[] documentPageData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.pageFileStorageId(), DocumentPage[].class);
|
||||
DocumentText[] documentTextBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
layoutParsingRequest.textBlockFileStorageId(),
|
||||
AtomicTextBlockData[].class);
|
||||
AtomicPositionBlockData[] atomicPositionBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
DocumentText[].class);
|
||||
DocumentPositions[] atomicPositionBlockData = storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
layoutParsingRequest.positionBlockFileStorageId(),
|
||||
AtomicPositionBlockData[].class);
|
||||
DocumentTreeData tableOfContentsData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), DocumentTreeData.class);
|
||||
DocumentPositions[].class);
|
||||
DocumentStructure tableOfContentsData = storageService.readJSONObject(TenantContext.getTenantId(), layoutParsingRequest.structureFileStorageId(), DocumentStructure.class);
|
||||
|
||||
return DocumentData.builder()
|
||||
.documentTreeData(tableOfContentsData)
|
||||
.documentStructure(tableOfContentsData)
|
||||
.atomicPositionBlocks(atomicPositionBlockData)
|
||||
.atomicTextBlocks(atomicTextBlockData)
|
||||
.pages(pageData)
|
||||
.atomicTextBlocks(documentTextBlockData)
|
||||
.pages(documentPageData)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
@ -11,8 +11,8 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicPositionBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicTextBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.Boundary;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Page;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.SemanticNode;
|
||||
@ -109,20 +109,20 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(AtomicTextBlockData atomicTextBlockData,
|
||||
AtomicPositionBlockData atomicPositionBlockData,
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentText documentText,
|
||||
DocumentPositions documentPositions,
|
||||
SemanticNode parent,
|
||||
Page page) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
.id(atomicTextBlockData.getId())
|
||||
.numberOnPage(atomicTextBlockData.getNumberOnPage())
|
||||
.id(documentText.getId())
|
||||
.numberOnPage(documentText.getNumberOnPage())
|
||||
.page(page)
|
||||
.boundary(new Boundary(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.searchText(atomicTextBlockData.getSearchText())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed().toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed().toList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
|
||||
.boundary(new Boundary(documentText.getStart(), documentText.getEnd()))
|
||||
.searchText(documentText.getSearchText())
|
||||
.lineBreaks(Arrays.stream(documentText.getLineBreaks()).boxed().toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(documentPositions.getStringIdxToPositionIdx()).boxed().toList())
|
||||
.positions(toRectangle2DList(documentPositions.getPositions()))
|
||||
.parent(parent)
|
||||
.build();
|
||||
}
|
||||
|
||||
@ -5,11 +5,11 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicPositionBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicTextBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.PageData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTreeData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Image;
|
||||
@ -26,36 +26,36 @@ public class DocumentDataMapper {
|
||||
|
||||
public DocumentData toDocumentData(Document document) {
|
||||
|
||||
List<AtomicTextBlockData> atomicTextBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
List<DocumentText> documentTextBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicTextBlockData)
|
||||
.toList();
|
||||
|
||||
List<AtomicPositionBlockData> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
List<DocumentPositions> atomicPositionBlockData = document.streamTerminalTextBlocksInOrder()
|
||||
.flatMap(textBlock -> textBlock.getAtomicTextBlocks().stream())
|
||||
.distinct()
|
||||
.map(DocumentDataMapper::toAtomicPositionBlockData)
|
||||
.toList();
|
||||
|
||||
List<PageData> pageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
|
||||
DocumentTreeData tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||
List<DocumentPage> documentPageData = document.getPages().stream().map(DocumentDataMapper::toPageData).toList();
|
||||
DocumentStructure tableOfContentsData = toDocumentTreeData(document.getDocumentTree());
|
||||
return DocumentData.builder()
|
||||
.atomicTextBlocks(atomicTextBlockData.toArray(new AtomicTextBlockData[0]))
|
||||
.atomicPositionBlocks(atomicPositionBlockData.toArray(new AtomicPositionBlockData[0]))
|
||||
.pages(pageData.toArray(new PageData[0]))
|
||||
.documentTreeData(tableOfContentsData)
|
||||
.atomicTextBlocks(documentTextBlockData.toArray(new DocumentText[0]))
|
||||
.atomicPositionBlocks(atomicPositionBlockData.toArray(new DocumentPositions[0]))
|
||||
.pages(documentPageData.toArray(new DocumentPage[0]))
|
||||
.documentStructure(tableOfContentsData)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private DocumentTreeData toDocumentTreeData(DocumentTree documentTree) {
|
||||
private DocumentStructure toDocumentTreeData(DocumentTree documentTree) {
|
||||
|
||||
return new DocumentTreeData(toEntryData(documentTree.getRoot()));
|
||||
return new DocumentStructure(toEntryData(documentTree.getRoot()));
|
||||
}
|
||||
|
||||
|
||||
private DocumentTreeData.EntryData toEntryData(DocumentTree.Entry entry) {
|
||||
private DocumentStructure.EntryData toEntryData(DocumentTree.Entry entry) {
|
||||
|
||||
Long[] atomicTextBlocks;
|
||||
|
||||
@ -72,7 +72,7 @@ public class DocumentDataMapper {
|
||||
default -> new HashMap<>();
|
||||
};
|
||||
|
||||
return DocumentTreeData.EntryData.builder()
|
||||
return DocumentStructure.EntryData.builder()
|
||||
.treeId(toPrimitiveIntArray(entry.getTreeId()))
|
||||
.children(entry.getChildren().stream().map(DocumentDataMapper::toEntryData).toList())
|
||||
.type(entry.getType())
|
||||
@ -89,15 +89,15 @@ public class DocumentDataMapper {
|
||||
}
|
||||
|
||||
|
||||
private PageData toPageData(Page p) {
|
||||
private DocumentPage toPageData(Page p) {
|
||||
|
||||
return PageData.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).build();
|
||||
return DocumentPage.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).build();
|
||||
}
|
||||
|
||||
|
||||
private AtomicTextBlockData toAtomicTextBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
private DocumentText toAtomicTextBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
|
||||
return AtomicTextBlockData.builder()
|
||||
return DocumentText.builder()
|
||||
.id(atomicTextBlock.getId())
|
||||
.page(atomicTextBlock.getPage().getNumber().longValue())
|
||||
.searchText(atomicTextBlock.getSearchText())
|
||||
@ -109,9 +109,9 @@ public class DocumentDataMapper {
|
||||
}
|
||||
|
||||
|
||||
private AtomicPositionBlockData toAtomicPositionBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
private DocumentPositions toAtomicPositionBlockData(AtomicTextBlock atomicTextBlock) {
|
||||
|
||||
return AtomicPositionBlockData.builder()
|
||||
return DocumentPositions.builder()
|
||||
.id(atomicTextBlock.getId())
|
||||
.positions(toPrimitiveFloatMatrix(atomicTextBlock.getPositions()))
|
||||
.stringIdxToPositionIdx(toPrimitiveIntArray(atomicTextBlock.getStringIdxToPositionIdx()))
|
||||
|
||||
@ -7,11 +7,11 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicPositionBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.AtomicTextBlockData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositions;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentText;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTreeData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.PageData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.DocumentTree;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Document;
|
||||
import com.knecon.fforesight.service.layoutparser.processor.graph.nodes.Footer;
|
||||
@ -41,7 +41,7 @@ public class DocumentGraphMapper {
|
||||
|
||||
context.pages.addAll(Arrays.stream(documentData.getPages()).map(DocumentGraphMapper::buildPage).toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentTreeData().getRoot().getChildren(), context));
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
|
||||
document.setDocumentTree(context.documentTree);
|
||||
document.setPages(new HashSet<>(context.pages));
|
||||
@ -52,10 +52,10 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentTreeData.EntryData> entries, Context context) {
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
|
||||
|
||||
List<DocumentTree.Entry> newEntries = new LinkedList<>();
|
||||
for (DocumentTreeData.EntryData entryData : entries) {
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers()).map(pageNumber -> getPage(pageNumber, context)).toList();
|
||||
|
||||
@ -154,14 +154,14 @@ public class DocumentGraphMapper {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextBlockData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.atomicPositionBlockData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.atomicTextBlockData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
getPage(context.documentTextBlockData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
}
|
||||
|
||||
|
||||
private Page buildPage(PageData p) {
|
||||
private Page buildPage(DocumentPage p) {
|
||||
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
||||
}
|
||||
@ -180,15 +180,15 @@ public class DocumentGraphMapper {
|
||||
|
||||
private final DocumentTree documentTree;
|
||||
private final List<Page> pages;
|
||||
private final List<AtomicTextBlockData> atomicTextBlockData;
|
||||
private final List<AtomicPositionBlockData> atomicPositionBlockData;
|
||||
private final List<DocumentText> documentTextBlockData;
|
||||
private final List<DocumentPositions> atomicPositionBlockData;
|
||||
|
||||
|
||||
Context(DocumentData documentData, DocumentTree documentTree) {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pages = new LinkedList<>();
|
||||
this.atomicTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
|
||||
this.documentTextBlockData = Arrays.stream(documentData.getAtomicTextBlocks()).toList();
|
||||
this.atomicPositionBlockData = Arrays.stream(documentData.getAtomicPositionBlocks()).toList();
|
||||
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ public class DocumentGraphJsonWritingTest extends BuildDocumentGraphTest {
|
||||
Document documentGraph = buildGraph(filename);
|
||||
DocumentData documentData = DocumentDataMapper.toDocumentData(documentGraph);
|
||||
ObjectMapper mapper = ObjectMapperFactory.create();
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_structure", ".json")), documentData.getDocumentTreeData());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_structure", ".json")), documentData.getDocumentStructure());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_text", ".json")), documentData.getAtomicTextBlocks());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_positions", ".json")), documentData.getAtomicPositionBlocks());
|
||||
mapper.writeValue(new FileOutputStream(File.createTempFile(filename + "_pages", ".json")), documentData.getPages());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user