Merge branch 'RED-9123' into 'main'
RED-9123: Protobuf serialization of document data files See merge request fforesight/llm-service!24
This commit is contained in:
commit
2d66b1e5d4
@ -13,10 +13,13 @@ extra["testcontainersVersion"] = "1.20.0"
|
||||
|
||||
dependencies {
|
||||
implementation(project(":llm-service-api"))
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:0.159.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.49.0")
|
||||
implementation("com.knecon.fforesight:layoutparser-service-internal-api:0.181.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.50.0")
|
||||
implementation("org.springframework.boot:spring-boot-starter:3.1.1")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.30.0")
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.30.0") {
|
||||
exclude(group = "com.iqser.red.commons", module = "storage-commons")
|
||||
}
|
||||
implementation("com.azure:azure-ai-openai:1.0.0-beta.10")
|
||||
implementation("ch.qos.logback:logback-classic:1.5.7")
|
||||
implementation("com.google.protobuf:protobuf-java:4.27.1")
|
||||
}
|
||||
|
||||
@ -2,10 +2,11 @@ package com.knecon.fforesight.llm.service.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -21,9 +22,15 @@ import lombok.experimental.FieldDefaults;
|
||||
@FieldDefaults(level = AccessLevel.PRIVATE)
|
||||
public class DocumentData implements Serializable {
|
||||
|
||||
DocumentPage[] documentPages;
|
||||
DocumentTextData[] documentTextData;
|
||||
DocumentPositionData[] documentPositionData;
|
||||
DocumentStructure documentStructure;
|
||||
DocumentPageProto.AllDocumentPages documentPages;
|
||||
DocumentTextDataProto.AllDocumentTextData documentTextData;
|
||||
DocumentPositionDataProto.AllDocumentPositionData documentPositionData;
|
||||
DocumentStructureWrapper documentStructureWrapper;
|
||||
|
||||
|
||||
public DocumentStructureProto.DocumentStructure getDocumentStructure() {
|
||||
|
||||
return documentStructureWrapper.getDocumentStructure();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
package com.knecon.fforesight.llm.service.document;
|
||||
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto.DocumentPage;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto.AllDocumentPositionData;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto.AllDocumentTextData;
|
||||
import static com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto.EntryData;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
@ -25,10 +27,6 @@ import com.knecon.fforesight.llm.service.document.nodes.TableCell;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -41,28 +39,30 @@ public class DocumentGraphMapper {
|
||||
DocumentTree documentTree = new DocumentTree(document);
|
||||
Context context = new Context(documentData, documentTree);
|
||||
|
||||
context.pageData.addAll(Arrays.stream(documentData.getDocumentPages())
|
||||
context.pageData.addAll(documentData.getDocumentPages().getDocumentPagesList()
|
||||
.stream()
|
||||
.map(DocumentGraphMapper::buildPage)
|
||||
.toList());
|
||||
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildren(), context));
|
||||
context.documentTree.getRoot().getChildren().addAll(buildEntries(documentData.getDocumentStructure().getRoot().getChildrenList(), context));
|
||||
|
||||
document.setDocumentTree(context.documentTree);
|
||||
document.setPages(new HashSet<>(context.pageData));
|
||||
document.setNumberOfPages(documentData.getDocumentPages().length);
|
||||
document.setNumberOfPages(documentData.getDocumentPages().getDocumentPagesCount());
|
||||
|
||||
document.setTextBlock(document.getTextBlock());
|
||||
return document;
|
||||
}
|
||||
|
||||
|
||||
private List<DocumentTree.Entry> buildEntries(List<DocumentStructure.EntryData> entries, Context context) {
|
||||
private List<DocumentTree.Entry> buildEntries(List<EntryData> entries, Context context) {
|
||||
|
||||
List<DocumentTree.Entry> newEntries = new ArrayList<>(entries.size());
|
||||
for (DocumentStructure.EntryData entryData : entries) {
|
||||
for (EntryData entryData : entries) {
|
||||
|
||||
List<Page> pages = Arrays.stream(entryData.getPageNumbers())
|
||||
.map(pageNumber -> getPage(pageNumber, context))
|
||||
List<Page> pages = entryData.getPageNumbersList()
|
||||
.stream()
|
||||
.map(context::getPage)
|
||||
.toList();
|
||||
|
||||
SemanticNode node = switch (entryData.getType()) {
|
||||
@ -74,33 +74,30 @@ public class DocumentGraphMapper {
|
||||
case FOOTER -> buildFooter(context);
|
||||
case TABLE -> buildTable(context, entryData.getProperties());
|
||||
case TABLE_CELL -> buildTableCell(context, entryData.getProperties());
|
||||
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbers());
|
||||
case IMAGE -> buildImage(context, entryData.getProperties(), entryData.getPageNumbersList());
|
||||
default -> throw new UnsupportedOperationException("Not yet implemented for type " + entryData.getType());
|
||||
};
|
||||
|
||||
if (entryData.getAtomicBlockIds().length > 0) {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIds(), context, node);
|
||||
if (entryData.getAtomicBlockIdsCount() > 0) {
|
||||
TextBlock textBlock = toTextBlock(entryData.getAtomicBlockIdsList(), context, node);
|
||||
node.setLeafTextBlock(textBlock);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
case IMAGE -> pages.forEach(page -> page.getImages().add((Image) node));
|
||||
default -> textBlock.getAtomicTextBlocks()
|
||||
.forEach(atb -> atb.getPage().getTextBlocksOnPage().add(atb));
|
||||
}
|
||||
|
||||
}
|
||||
List<Integer> treeId = Arrays.stream(entryData.getTreeId()).boxed()
|
||||
.toList();
|
||||
if (entryData.getEngines() != null) {
|
||||
entryData.getEngines()
|
||||
.forEach(node::addEngine);
|
||||
} else {
|
||||
entryData.setEngines(Collections.emptySet());
|
||||
}
|
||||
List<Integer> treeId = entryData.getTreeIdList();
|
||||
entryData.getEnginesList()
|
||||
.forEach(node::addEngine);
|
||||
node.setTreeId(treeId);
|
||||
|
||||
switch (entryData.getType()) {
|
||||
case HEADER -> pages.forEach(page -> page.setHeader((Header) node));
|
||||
case FOOTER -> pages.forEach(page -> page.setFooter((Footer) node));
|
||||
default -> pages.forEach(page -> page.getMainBody().add(node));
|
||||
}
|
||||
|
||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildren(), context)).node(node).build());
|
||||
}
|
||||
return newEntries;
|
||||
newEntries.add(DocumentTree.Entry.builder().treeId(treeId).children(buildEntries(entryData.getChildrenList(), context)).node(node).build());
|
||||
} return newEntries;
|
||||
}
|
||||
|
||||
|
||||
@ -110,10 +107,10 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
private Image buildImage(Context context, Map<String, String> properties, Long[] pageNumbers) {
|
||||
private Image buildImage(Context context, Map<String, String> properties, List<Long> pageNumbers) {
|
||||
|
||||
assert pageNumbers.length == 1;
|
||||
Page page = getPage(pageNumbers[0], context);
|
||||
assert pageNumbers.size() == 1;
|
||||
Page page = context.getPage(pageNumbers.get(0));
|
||||
var builder = Image.builder();
|
||||
PropertiesMapper.parseImageProperties(properties, builder);
|
||||
return builder.documentTree(context.documentTree).page(page).build();
|
||||
@ -159,13 +156,14 @@ public class DocumentGraphMapper {
|
||||
return SuperSection.builder().documentTree(context.documentTree).build();
|
||||
}
|
||||
|
||||
|
||||
private Paragraph buildParagraph(Context context, Map<String, String> properties) {
|
||||
|
||||
if (PropertiesMapper.isDuplicateParagraph(properties)) {
|
||||
|
||||
DuplicatedParagraph duplicatedParagraph = DuplicatedParagraph.builder().documentTree(context.documentTree).build();
|
||||
|
||||
Long[] unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
|
||||
var unsortedTextblockIds = PropertiesMapper.getUnsortedTextblockIds(properties);
|
||||
duplicatedParagraph.setUnsortedLeafTextBlock(toTextBlock(unsortedTextblockIds, context, duplicatedParagraph));
|
||||
return duplicatedParagraph;
|
||||
}
|
||||
@ -174,9 +172,9 @@ public class DocumentGraphMapper {
|
||||
}
|
||||
|
||||
|
||||
private TextBlock toTextBlock(Long[] atomicTextBlockIds, Context context, SemanticNode parent) {
|
||||
private TextBlock toTextBlock(List<Long> atomicTextBlockIds, Context context, SemanticNode parent) {
|
||||
|
||||
return Arrays.stream(atomicTextBlockIds)
|
||||
return atomicTextBlockIds.stream()
|
||||
.map(atomicTextBlockId -> getAtomicTextBlock(context, parent, atomicTextBlockId))
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
@ -184,24 +182,16 @@ public class DocumentGraphMapper {
|
||||
|
||||
private AtomicTextBlock getAtomicTextBlock(Context context, SemanticNode parent, Long atomicTextBlockId) {
|
||||
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.get(Math.toIntExact(atomicTextBlockId)),
|
||||
return AtomicTextBlock.fromAtomicTextBlockData(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)),
|
||||
context.documentPositionData.getDocumentPositionData(Math.toIntExact(atomicTextBlockId)),
|
||||
parent,
|
||||
getPage(context.documentTextData.get(Math.toIntExact(atomicTextBlockId)).getPage(), context));
|
||||
context.getPage(context.documentTextData.getDocumentTextData(Math.toIntExact(atomicTextBlockId)).getPage()));
|
||||
}
|
||||
|
||||
|
||||
private Page buildPage(DocumentPage p) {
|
||||
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).mainBody(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
private Page getPage(Long pageIndex, Context context) {
|
||||
|
||||
Page page = context.pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
return Page.builder().rotation(p.getRotation()).height(p.getHeight()).width(p.getWidth()).number(p.getNumber()).textBlocksOnPage(new LinkedList<>()).build();
|
||||
}
|
||||
|
||||
|
||||
@ -209,21 +199,27 @@ public class DocumentGraphMapper {
|
||||
|
||||
private final DocumentTree documentTree;
|
||||
private final List<Page> pageData;
|
||||
private final List<DocumentTextData> documentTextData;
|
||||
private final List<DocumentPositionData> documentPositionData;
|
||||
private final AllDocumentTextData documentTextData;
|
||||
private final AllDocumentPositionData documentPositionData;
|
||||
|
||||
|
||||
Context(DocumentData documentData, DocumentTree documentTree) {
|
||||
|
||||
this.documentTree = documentTree;
|
||||
this.pageData = new ArrayList<>();
|
||||
this.documentTextData = Arrays.stream(documentData.getDocumentTextData())
|
||||
.toList();
|
||||
this.documentPositionData = Arrays.stream(documentData.getDocumentPositionData())
|
||||
.toList();
|
||||
this.documentTextData = documentData.getDocumentTextData();
|
||||
this.documentPositionData = documentData.getDocumentPositionData();
|
||||
|
||||
}
|
||||
|
||||
|
||||
private Page getPage(Long pageIndex) {
|
||||
|
||||
Page page = pageData.get(Math.toIntExact(pageIndex) - 1);
|
||||
assert page.getNumber() == Math.toIntExact(pageIndex);
|
||||
return page;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ import com.knecon.fforesight.llm.service.document.nodes.Image;
|
||||
import com.knecon.fforesight.llm.service.document.nodes.ImageType;
|
||||
import com.knecon.fforesight.llm.service.document.nodes.Table;
|
||||
import com.knecon.fforesight.llm.service.document.nodes.TableCell;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@ -18,32 +18,32 @@ public class PropertiesMapper {
|
||||
|
||||
public void parseImageProperties(Map<String, String> properties, Image.ImageBuilder builder) {
|
||||
|
||||
builder.imageType(ImageType.fromString(properties.get(DocumentStructure.ImageProperties.IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructure.ImageProperties.TRANSPARENT)));
|
||||
builder.position(parseRectangle2D(properties.get(DocumentStructure.ImageProperties.POSITION)));
|
||||
builder.id(properties.get(DocumentStructure.ImageProperties.ID));
|
||||
builder.imageType(ImageType.fromString(properties.get(DocumentStructureWrapper.ImageProperties.IMAGE_TYPE)));
|
||||
builder.transparent(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.ImageProperties.TRANSPARENT)));
|
||||
builder.position(parseRectangle2D(properties.get(DocumentStructureWrapper.ImageProperties.POSITION)));
|
||||
builder.id(properties.get(DocumentStructureWrapper.ImageProperties.ID));
|
||||
}
|
||||
|
||||
|
||||
public void parseTableCellProperties(Map<String, String> properties, TableCell.TableCellBuilder builder) {
|
||||
|
||||
builder.row(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(DocumentStructure.TableCellProperties.COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(DocumentStructure.TableCellProperties.HEADER)));
|
||||
builder.bBox(parseRectangle2D(properties.get(DocumentStructure.TableCellProperties.B_BOX)));
|
||||
builder.row(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.ROW)));
|
||||
builder.col(Integer.parseInt(properties.get(DocumentStructureWrapper.TableCellProperties.COL)));
|
||||
builder.header(Boolean.parseBoolean(properties.get(DocumentStructureWrapper.TableCellProperties.HEADER)));
|
||||
builder.bBox(parseRectangle2D(properties.get(DocumentStructureWrapper.TableCellProperties.B_BOX)));
|
||||
}
|
||||
|
||||
|
||||
public void parseTableProperties(Map<String, String> properties, Table.TableBuilder builder) {
|
||||
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructure.TableProperties.NUMBER_OF_COLS)));
|
||||
builder.numberOfRows(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_ROWS)));
|
||||
builder.numberOfCols(Integer.parseInt(properties.get(DocumentStructureWrapper.TableProperties.NUMBER_OF_COLS)));
|
||||
}
|
||||
|
||||
|
||||
private Rectangle2D parseRectangle2D(String bBox) {
|
||||
|
||||
List<Float> floats = Arrays.stream(bBox.split(DocumentStructure.RECTANGLE_DELIMITER))
|
||||
List<Float> floats = Arrays.stream(bBox.split(DocumentStructureWrapper.RECTANGLE_DELIMITER))
|
||||
.map(Float::parseFloat)
|
||||
.toList();
|
||||
return new Rectangle2D.Float(floats.get(0), floats.get(1), floats.get(2), floats.get(3));
|
||||
@ -52,21 +52,21 @@ public class PropertiesMapper {
|
||||
|
||||
public static boolean isDuplicateParagraph(Map<String, String> properties) {
|
||||
|
||||
return properties.containsKey(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
|
||||
return properties.containsKey(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID);
|
||||
}
|
||||
|
||||
|
||||
public static Long[] getUnsortedTextblockIds(Map<String, String> properties) {
|
||||
public static List<Long> getUnsortedTextblockIds(Map<String, String> properties) {
|
||||
|
||||
return toLongArray(properties.get(DocumentStructure.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
|
||||
return toLongList(properties.get(DocumentStructureWrapper.DuplicateParagraphProperties.UNSORTED_TEXTBLOCK_ID));
|
||||
}
|
||||
|
||||
|
||||
public static Long[] toLongArray(String ids) {
|
||||
public static List<Long> toLongList(String ids) {
|
||||
|
||||
return Arrays.stream(ids.substring(1, ids.length() - 1).trim().split(","))
|
||||
.map(Long::valueOf)
|
||||
.toArray(Long[]::new);
|
||||
.toList();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ import java.util.Set;
|
||||
import com.knecon.fforesight.llm.service.document.DocumentTree;
|
||||
import com.knecon.fforesight.llm.service.document.entity.TextEntity;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -31,7 +31,7 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public abstract class AbstractSemanticNode implements GenericSemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
Set<LayoutEngineProto.LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngineProto.LayoutEngine.ALGORITHM));
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.knecon.fforesight.llm.service.document.entity.TextEntity;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlockCollector;
|
||||
|
||||
@ -35,7 +36,8 @@ public class Page {
|
||||
Integer width;
|
||||
Integer rotation;
|
||||
|
||||
List<SemanticNode> mainBody;
|
||||
|
||||
List<AtomicTextBlock> textBlocksOnPage;
|
||||
Header header;
|
||||
Footer footer;
|
||||
|
||||
@ -53,13 +55,36 @@ public class Page {
|
||||
*/
|
||||
public TextBlock getMainBodyTextBlock() {
|
||||
|
||||
return mainBody.stream()
|
||||
.filter(SemanticNode::isLeaf)
|
||||
.map(SemanticNode::getTextBlock)
|
||||
return textBlocksOnPage.stream()
|
||||
.filter(atb -> !atb.isEmpty())
|
||||
.collect(new TextBlockCollector());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieves the highest SemanticNodes, which appear only on this page. It is achieved by traversing the DocumentTree up, until a SemanticNode's direct parent is no longer exclusively on this page.
|
||||
*
|
||||
* @return A list which contains the highes SemanticNodes, which appear only on this page.
|
||||
*/
|
||||
public List<SemanticNode> getMainBody() {
|
||||
|
||||
return textBlocksOnPage.stream()
|
||||
.map(AtomicTextBlock::getParent)
|
||||
.map(this::getHighestParentOnlyOnPage)
|
||||
.distinct()
|
||||
.toList();
|
||||
}
|
||||
|
||||
private SemanticNode getHighestParentOnlyOnPage(SemanticNode node) {
|
||||
|
||||
SemanticNode currentNode = node;
|
||||
while (currentNode.hasParent() && currentNode.getParent().onlyOnPage(this)) {
|
||||
currentNode = currentNode.getParent();
|
||||
}
|
||||
return currentNode;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@ import com.knecon.fforesight.llm.service.document.entity.TextEntity;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.AtomicTextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto;
|
||||
|
||||
public interface SemanticNode {
|
||||
|
||||
@ -287,11 +287,10 @@ public interface SemanticNode {
|
||||
return getTextBlock().getSearchText().contains(string);
|
||||
}
|
||||
|
||||
|
||||
Set<LayoutEngine> getEngines();
|
||||
Set<LayoutEngineProto.LayoutEngine> getEngines();
|
||||
|
||||
|
||||
default void addEngine(LayoutEngine engine) {
|
||||
default void addEngine(LayoutEngineProto.LayoutEngine engine) {
|
||||
|
||||
getEngines().add(engine);
|
||||
}
|
||||
@ -669,4 +668,17 @@ public interface SemanticNode {
|
||||
return bBoxPerPage;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks wether this SemanticNode appears on a single page only, and if that page is the provided one.
|
||||
*
|
||||
* @param page the page to check
|
||||
* @return true, when SemanticNode is on a single page only and the page is the provided page. Otherwise, false.
|
||||
*/
|
||||
default boolean onlyOnPage(Page page) {
|
||||
|
||||
Set<Page> pages = getPages();
|
||||
return pages.size() == 1 && pages.contains(page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@ import com.knecon.fforesight.llm.service.document.DocumentTree;
|
||||
import com.knecon.fforesight.llm.service.document.entity.TextEntity;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlockCollector;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngine;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.LayoutEngineProto;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -36,7 +36,7 @@ import lombok.experimental.FieldDefaults;
|
||||
public class Table implements SemanticNode {
|
||||
|
||||
@Builder.Default
|
||||
Set<LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngine.ALGORITHM));
|
||||
Set<LayoutEngineProto.LayoutEngine> engines = new HashSet<>(Set.of(LayoutEngineProto.LayoutEngine.ALGORITHM));
|
||||
@EqualsAndHashCode.Include
|
||||
List<Integer> treeId;
|
||||
DocumentTree documentTree;
|
||||
|
||||
@ -5,7 +5,6 @@ import static java.lang.String.format;
|
||||
import java.awt.geom.Rectangle2D;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
@ -19,8 +18,8 @@ import com.knecon.fforesight.llm.service.document.RectangleTransformations;
|
||||
import com.knecon.fforesight.llm.service.document.TextRange;
|
||||
import com.knecon.fforesight.llm.service.document.nodes.Page;
|
||||
import com.knecon.fforesight.llm.service.document.nodes.SemanticNode;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -78,7 +77,10 @@ public class AtomicTextBlock implements TextBlock {
|
||||
}
|
||||
|
||||
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextData atomicTextBlockData, DocumentPositionData atomicPositionBlockData, SemanticNode parent, Page page) {
|
||||
public static AtomicTextBlock fromAtomicTextBlockData(DocumentTextDataProto.DocumentTextData atomicTextBlockData,
|
||||
DocumentPositionDataProto.DocumentPositionData atomicPositionBlockData,
|
||||
SemanticNode parent,
|
||||
Page page) {
|
||||
|
||||
return AtomicTextBlock.builder()
|
||||
.id(atomicTextBlockData.getId())
|
||||
@ -86,20 +88,18 @@ public class AtomicTextBlock implements TextBlock {
|
||||
.page(page)
|
||||
.textRange(new TextRange(atomicTextBlockData.getStart(), atomicTextBlockData.getEnd()))
|
||||
.searchText(atomicTextBlockData.getSearchText())
|
||||
.lineBreaks(Arrays.stream(atomicTextBlockData.getLineBreaks()).boxed()
|
||||
.toList())
|
||||
.stringIdxToPositionIdx(Arrays.stream(atomicPositionBlockData.getStringIdxToPositionIdx()).boxed()
|
||||
.toList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositions()))
|
||||
.lineBreaks(atomicTextBlockData.getLineBreaksList())
|
||||
.stringIdxToPositionIdx(atomicPositionBlockData.getStringIdxToPositionIdxList())
|
||||
.positions(toRectangle2DList(atomicPositionBlockData.getPositionsList()))
|
||||
.parent(parent)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private static List<Rectangle2D> toRectangle2DList(float[][] positions) {
|
||||
private static List<Rectangle2D> toRectangle2DList(List<DocumentPositionDataProto.DocumentPositionData.Position> positions) {
|
||||
|
||||
return Arrays.stream(positions)
|
||||
.map(floatArr -> (Rectangle2D) new Rectangle2D.Float(floatArr[0], floatArr[1], floatArr[2], floatArr[3]))
|
||||
return positions.stream()
|
||||
.map(pos -> (Rectangle2D) new Rectangle2D.Float(pos.getValue(0), pos.getValue(1), pos.getValue(2), pos.getValue(3)))
|
||||
.toList();
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,14 @@
|
||||
package com.knecon.fforesight.llm.service.services;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -24,11 +31,14 @@ import com.azure.ai.openai.models.CompletionsUsage;
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.primitives.Floats;
|
||||
import com.iqser.red.storage.commons.exception.StorageException;
|
||||
import com.iqser.red.storage.commons.service.StorageService;
|
||||
import com.knecon.fforesight.llm.service.ChunkingResponse;
|
||||
import com.knecon.fforesight.llm.service.LlmNerEntities;
|
||||
import com.knecon.fforesight.llm.service.LlmNerEntity;
|
||||
import com.knecon.fforesight.llm.service.LlmNerMessage;
|
||||
import com.knecon.fforesight.llm.service.LlmServiceSettings;
|
||||
import com.knecon.fforesight.llm.service.SystemMessages;
|
||||
import com.knecon.fforesight.llm.service.document.DocumentData;
|
||||
import com.knecon.fforesight.llm.service.document.DocumentGraphMapper;
|
||||
@ -36,10 +46,18 @@ import com.knecon.fforesight.llm.service.document.nodes.Document;
|
||||
import com.knecon.fforesight.llm.service.document.textblock.TextBlock;
|
||||
import com.knecon.fforesight.llm.service.models.Chunk;
|
||||
import com.knecon.fforesight.llm.service.utils.FormattingUtils;
|
||||
import com.knecon.fforesight.llm.service.utils.StorageIdUtils;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPage;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPageProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentPositionDataProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructure;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentStructureWrapper;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextData;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.DocumentTextDataProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.EntryDataProto;
|
||||
import com.knecon.fforesight.service.layoutparser.internal.api.data.redaction.NodeTypeProto;
|
||||
import com.knecon.fforesight.tenantcommons.TenantContext;
|
||||
|
||||
import lombok.AccessLevel;
|
||||
@ -217,16 +235,227 @@ public class LlmNerService {
|
||||
private Document buildDocument(LlmNerMessage llmNerMessage) {
|
||||
|
||||
DocumentData documentData = new DocumentData();
|
||||
documentData.setDocumentStructure(storageService.readJSONObject(TenantContext.getTenantId(), llmNerMessage.getDocumentStructureStorageId(), DocumentStructure.class));
|
||||
documentData.setDocumentTextData(storageService.readJSONObject(TenantContext.getTenantId(), llmNerMessage.getDocumentTextStorageId(), DocumentTextData[].class));
|
||||
documentData.setDocumentPositionData(storageService.readJSONObject(TenantContext.getTenantId(),
|
||||
llmNerMessage.getDocumentPositionStorageId(),
|
||||
DocumentPositionData[].class));
|
||||
documentData.setDocumentPages(storageService.readJSONObject(TenantContext.getTenantId(), llmNerMessage.getDocumentPagesStorageId(), DocumentPage[].class));
|
||||
documentData.setDocumentStructureWrapper(new DocumentStructureWrapper(fetchDocumentStructure(llmNerMessage.getDocumentStructureStorageId())));
|
||||
documentData.setDocumentTextData(fetchDocumentTextData(llmNerMessage.getDocumentTextStorageId()));
|
||||
documentData.setDocumentPositionData(fetchDocumentPositionData(llmNerMessage.getDocumentPositionStorageId()));
|
||||
documentData.setDocumentPages(fetchAllDocumentPages(llmNerMessage.getDocumentPagesStorageId()));
|
||||
return DocumentGraphMapper.toDocumentGraph(documentData);
|
||||
}
|
||||
|
||||
|
||||
private DocumentStructureProto.DocumentStructure fetchDocumentStructure(String storageId) {
|
||||
|
||||
DocumentStructureProto.DocumentStructure documentStructure;
|
||||
StorageIdUtils.StorageInfo storageInfo = StorageIdUtils.parseStorageId(storageId);
|
||||
|
||||
if (storageInfo.fileTypeExtension().contains("proto")) {
|
||||
documentStructure = storageService.readProtoObject(TenantContext.getTenantId(), storageId, DocumentStructureProto.DocumentStructure.parser());
|
||||
} else {
|
||||
DocumentStructure oldDocumentStructure = getOldData(storageInfo.dossierId(), storageInfo.fileId(), storageInfo.fileTypeName(), DocumentStructure.class);
|
||||
if (oldDocumentStructure == null) {
|
||||
return null;
|
||||
}
|
||||
documentStructure = convertDocumentStructure(oldDocumentStructure);
|
||||
}
|
||||
|
||||
return documentStructure;
|
||||
}
|
||||
|
||||
|
||||
private DocumentTextDataProto.AllDocumentTextData fetchDocumentTextData(String storageId) {
|
||||
|
||||
DocumentTextDataProto.AllDocumentTextData documentTextData;
|
||||
StorageIdUtils.StorageInfo storageInfo = StorageIdUtils.parseStorageId(storageId);
|
||||
|
||||
if (storageInfo.fileTypeExtension().contains("proto")) {
|
||||
documentTextData = storageService.readProtoObject(TenantContext.getTenantId(), storageId, DocumentTextDataProto.AllDocumentTextData.parser());
|
||||
} else {
|
||||
DocumentTextData[] oldDocumentTextData = getOldData(storageInfo.dossierId(), storageInfo.fileId(), storageInfo.fileTypeName(), DocumentTextData[].class);
|
||||
if (oldDocumentTextData == null) {
|
||||
return null;
|
||||
}
|
||||
documentTextData = convertAllDocumentTextData(oldDocumentTextData);
|
||||
}
|
||||
|
||||
return documentTextData;
|
||||
}
|
||||
|
||||
|
||||
private DocumentPositionDataProto.AllDocumentPositionData fetchDocumentPositionData(String storageId) {
|
||||
|
||||
DocumentPositionDataProto.AllDocumentPositionData documentPositionData;
|
||||
StorageIdUtils.StorageInfo storageInfo = StorageIdUtils.parseStorageId(storageId);
|
||||
|
||||
if (storageInfo.fileTypeExtension().contains("proto")) {
|
||||
documentPositionData = storageService.readProtoObject(TenantContext.getTenantId(), storageId, DocumentPositionDataProto.AllDocumentPositionData.parser());
|
||||
} else {
|
||||
DocumentPositionData[] oldDocumentPositionData = getOldData(storageInfo.dossierId(), storageInfo.fileId(), storageInfo.fileTypeName(), DocumentPositionData[].class);
|
||||
if (oldDocumentPositionData == null) {
|
||||
return null;
|
||||
}
|
||||
documentPositionData = convertAllDocumentPositionData(oldDocumentPositionData);
|
||||
}
|
||||
|
||||
return documentPositionData;
|
||||
}
|
||||
|
||||
|
||||
private DocumentPageProto.AllDocumentPages fetchAllDocumentPages(String storageId) {
|
||||
|
||||
DocumentPageProto.AllDocumentPages allDocumentPages;
|
||||
StorageIdUtils.StorageInfo storageInfo = StorageIdUtils.parseStorageId(storageId);
|
||||
|
||||
if (storageInfo.fileTypeExtension().contains("proto")) {
|
||||
allDocumentPages = storageService.readProtoObject(TenantContext.getTenantId(), storageId, DocumentPageProto.AllDocumentPages.parser());
|
||||
} else {
|
||||
DocumentPage[] oldDocumentPages = getOldData(storageInfo.dossierId(), storageInfo.fileId(), storageInfo.fileTypeName(), DocumentPage[].class);
|
||||
if (oldDocumentPages == null) {
|
||||
return null;
|
||||
}
|
||||
allDocumentPages = convertAllDocumentPages(oldDocumentPages);
|
||||
}
|
||||
|
||||
return allDocumentPages;
|
||||
}
|
||||
|
||||
|
||||
private <T> T getOldData(String dossierId, String fileId, String fileType, Class<T> valueType) {
|
||||
|
||||
String oldStorageId = StorageIdUtils.getStorageId(dossierId, fileId, fileType, ".json");
|
||||
System.out.println("----------------> LOOKING FOR " + oldStorageId);
|
||||
try (InputStream inputStream = getObject(TenantContext.getTenantId(), oldStorageId)) {
|
||||
return mapper.readValue(inputStream, valueType);
|
||||
} catch (IOException e) {
|
||||
log.error("Could not read JSON for " + fileType + ", error was: " + e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static EntryDataProto.EntryData convertEntryData(DocumentStructure.EntryData oldEntryData) {
|
||||
|
||||
EntryDataProto.EntryData.Builder builder = EntryDataProto.EntryData.newBuilder();
|
||||
|
||||
builder.setType(NodeTypeProto.NodeType.valueOf(oldEntryData.getType().name()));
|
||||
builder.addAllTreeId(Arrays.stream(oldEntryData.getTreeId()).boxed()
|
||||
.collect(Collectors.toList()));
|
||||
builder.addAllAtomicBlockIds(Arrays.asList(oldEntryData.getAtomicBlockIds()));
|
||||
builder.addAllPageNumbers(Arrays.asList(oldEntryData.getPageNumbers()));
|
||||
|
||||
builder.putAllProperties(oldEntryData.getProperties());
|
||||
|
||||
if (oldEntryData.getChildren() != null) {
|
||||
oldEntryData.getChildren()
|
||||
.forEach(child -> builder.addChildren(convertEntryData(child)));
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentStructureProto.DocumentStructure convertDocumentStructure(DocumentStructure oldStructure) {
|
||||
|
||||
DocumentStructureProto.DocumentStructure.Builder newBuilder = DocumentStructureProto.DocumentStructure.newBuilder();
|
||||
|
||||
if (oldStructure.getRoot() != null) {
|
||||
newBuilder.setRoot(convertEntryData(oldStructure.getRoot()));
|
||||
}
|
||||
|
||||
return newBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentPageProto.DocumentPage convertDocumentPage(DocumentPage oldPage) {
|
||||
|
||||
return DocumentPageProto.DocumentPage.newBuilder()
|
||||
.setNumber(oldPage.getNumber())
|
||||
.setHeight(oldPage.getHeight())
|
||||
.setWidth(oldPage.getWidth())
|
||||
.setRotation(oldPage.getRotation())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentPageProto.AllDocumentPages convertAllDocumentPages(DocumentPage[] oldPages) {
|
||||
|
||||
DocumentPageProto.AllDocumentPages.Builder allPagesBuilder = DocumentPageProto.AllDocumentPages.newBuilder();
|
||||
|
||||
for (DocumentPage oldPage : oldPages) {
|
||||
DocumentPageProto.DocumentPage newPage = convertDocumentPage(oldPage);
|
||||
allPagesBuilder.addDocumentPages(newPage);
|
||||
}
|
||||
|
||||
return allPagesBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentPositionDataProto.DocumentPositionData convertDocumentPositionData(DocumentPositionData oldData) {
|
||||
|
||||
DocumentPositionDataProto.DocumentPositionData.Builder builder = DocumentPositionDataProto.DocumentPositionData.newBuilder()
|
||||
.setId(oldData.getId())
|
||||
.addAllStringIdxToPositionIdx(Arrays.stream(oldData.getStringIdxToPositionIdx()).boxed()
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
for (float[] pos : oldData.getPositions()) {
|
||||
DocumentPositionDataProto.DocumentPositionData.Position position = DocumentPositionDataProto.DocumentPositionData.Position.newBuilder()
|
||||
.addAllValue(Floats.asList(pos))
|
||||
.build();
|
||||
builder.addPositions(position);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentPositionDataProto.AllDocumentPositionData convertAllDocumentPositionData(DocumentPositionData[] oldDataList) {
|
||||
|
||||
DocumentPositionDataProto.AllDocumentPositionData.Builder allDataBuilder = DocumentPositionDataProto.AllDocumentPositionData.newBuilder();
|
||||
|
||||
for (DocumentPositionData oldData : oldDataList) {
|
||||
allDataBuilder.addDocumentPositionData(convertDocumentPositionData(oldData));
|
||||
}
|
||||
|
||||
return allDataBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentTextDataProto.DocumentTextData convertDocumentTextData(DocumentTextData oldData) {
|
||||
|
||||
DocumentTextDataProto.DocumentTextData.Builder builder = DocumentTextDataProto.DocumentTextData.newBuilder()
|
||||
.setId(oldData.getId())
|
||||
.setPage(oldData.getPage())
|
||||
.setSearchText(oldData.getSearchText())
|
||||
.setNumberOnPage(oldData.getNumberOnPage())
|
||||
.setStart(oldData.getStart())
|
||||
.setEnd(oldData.getEnd())
|
||||
.addAllLineBreaks(Arrays.stream(oldData.getLineBreaks()).boxed()
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
||||
private static DocumentTextDataProto.AllDocumentTextData convertAllDocumentTextData(DocumentTextData[] oldDataList) {
|
||||
|
||||
DocumentTextDataProto.AllDocumentTextData.Builder allDataBuilder = DocumentTextDataProto.AllDocumentTextData.newBuilder();
|
||||
|
||||
for (DocumentTextData oldData : oldDataList) {
|
||||
allDataBuilder.addDocumentTextData(convertDocumentTextData(oldData));
|
||||
}
|
||||
|
||||
return allDataBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private InputStream getObject(String tenantId, String storageId) {
|
||||
|
||||
File tempFile = File.createTempFile("temp", ".data");
|
||||
storageService.downloadTo(tenantId, storageId, tempFile);
|
||||
return new BufferedInputStream(Files.newInputStream(Paths.get(tempFile.getPath()), StandardOpenOption.DELETE_ON_CLOSE));
|
||||
}
|
||||
|
||||
|
||||
private record EntitiesWithUsage(List<LlmNerEntity> entities, CompletionsUsage completionsUsage) {
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
package com.knecon.fforesight.llm.service.utils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import lombok.experimental.UtilityClass;
|
||||
|
||||
@UtilityClass
|
||||
public class StorageIdUtils {
|
||||
|
||||
public static final String INVALID_STORAGE_ID_FORMAT = "Invalid storageId format";
|
||||
|
||||
|
||||
public String getStorageId(String dossierId, String fileId, String fileName, String fileExtension) {
|
||||
|
||||
return dossierId + "/" + fileId + "." + fileName + fileExtension;
|
||||
}
|
||||
|
||||
|
||||
public static StorageInfo parseStorageId(String storageId) {
|
||||
|
||||
String[] parts = storageId.split("/", 2);
|
||||
|
||||
if (parts.length < 2) {
|
||||
throw new IllegalArgumentException(INVALID_STORAGE_ID_FORMAT);
|
||||
}
|
||||
|
||||
String dossierId = parts[0];
|
||||
String fileAndType = parts[1];
|
||||
|
||||
String[] fileParts = fileAndType.split("\\.");
|
||||
|
||||
if (fileParts.length < 3) {
|
||||
throw new IllegalArgumentException(INVALID_STORAGE_ID_FORMAT);
|
||||
}
|
||||
|
||||
String fileId = fileParts[0];
|
||||
String fileTypeExtension = fileParts[fileParts.length - 1];
|
||||
String fileTypeName = String.join(".", Arrays.copyOfRange(fileParts, 1, fileParts.length - 1));
|
||||
|
||||
return new StorageInfo(dossierId, fileId, fileTypeName, fileTypeExtension);
|
||||
}
|
||||
|
||||
|
||||
public record StorageInfo(String dossierId, String fileId, String fileTypeName, String fileTypeExtension) {
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -32,8 +32,10 @@ dependencies {
|
||||
implementation("org.springframework.cloud:spring-cloud-starter-openfeign:4.1.3")
|
||||
implementation("org.springframework.boot:spring-boot-starter-websocket:$springBootVersion")
|
||||
implementation("org.springframework.security:spring-security-messaging:$springSecurityVersion")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.49.0")
|
||||
implementation("com.knecon.fforesight:keycloak-commons:0.30.0")
|
||||
implementation("com.iqser.red.commons:storage-commons:2.50.0")
|
||||
implementation("com.knecon.fforesight:keycloak-commons:0.30.0") {
|
||||
exclude(group = "com.knecon.fforesight", module = "tenant-commons")
|
||||
}
|
||||
implementation("com.knecon.fforesight:tenant-commons:0.30.0")
|
||||
implementation("com.knecon.fforesight:swagger-commons:0.7.0")
|
||||
implementation("ch.qos.logback:logback-classic")
|
||||
|
||||
@ -0,0 +1,45 @@
|
||||
package com.knecon.fforesight.llm.service;
|
||||
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.knecon.fforesight.llm.service.utils.StorageIdUtils;
|
||||
|
||||
public class StorageIdUtilsTest {
|
||||
|
||||
@Test
|
||||
void testParseStorageId_ValidInput() {
|
||||
StorageIdUtils.StorageInfo storageInfo = StorageIdUtils.parseStorageId("dossierId/fileId.DOCUMENT_STRUCTURE.json");
|
||||
assertEquals("dossierId", storageInfo.dossierId(), "Incorrect dossierId");
|
||||
assertEquals("fileId", storageInfo.fileId(), "Incorrect fileId");
|
||||
assertEquals("DOCUMENT_STRUCTURE", storageInfo.fileTypeName(), "Incorrect fileTypeName");
|
||||
assertEquals("json", storageInfo.fileTypeExtension(), "Incorrect fileTypeExtension");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testParseStorageId_MissingFileTypeExtension() {
|
||||
Exception exception = assertThrows(IllegalArgumentException.class, () ->
|
||||
StorageIdUtils.parseStorageId("dossierId/fileId.DOCUMENT_STRUCTURE")
|
||||
);
|
||||
assertEquals("Invalid storageId format", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testParseStorageId_InvalidFormat() {
|
||||
Exception exception = assertThrows(IllegalArgumentException.class, () ->
|
||||
StorageIdUtils.parseStorageId("invalidFormat")
|
||||
);
|
||||
assertEquals("Invalid storageId format", exception.getMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testParseStorageId_NoDotsInFilePart() {
|
||||
Exception exception = assertThrows(IllegalArgumentException.class, () ->
|
||||
StorageIdUtils.parseStorageId("dossierId/fileId")
|
||||
);
|
||||
assertEquals("Invalid storageId format", exception.getMessage());
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user