pyinfra/doc/signatures.txt
2022-04-29 12:01:13 +02:00

77 lines
2.8 KiB
Plaintext

Processing service interface
image classification now : JSON (Mdat PDF) -> (Data PDF -> JSON [Mdat ImObj]
image classification future: JSON [Mdat FunkIm] | Mdat PDF -> (Data [FunkIm] -> JSON [Mdat FunkIm])
object detection : JSON [Mdat PagIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat SemIm]])
NER : JSON [Mdat Dict] -> (Data [Dict] -> JSON [Mdat])
table parsing : JSON [Mdat FunkIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
pdf2image : Mdat (fn, [Int], PDF) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
image classification now : Mdat (fn, [Int], file) -> (Data PDF -> JSON [Mdat ImObj]
image classification future: Mdat (fn, [Int], dir) -> (Data [FunkIm] -> JSON [Mdat FunkIm])
object detection : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
table parsing : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
NER : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
pdf2image : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
from funcy import identity
access(mdat):
if mdat.path is file:
request = {"data": load(mdat.path), "metadata": mdat}
elif mdat.path is dir:
get_indexed = identity if not mdat.idx else itemgetter(*mdat.idx)
request = {"data": get_indexed(get_files(mdat.path)), "metadata": mdat}
else:
raise BadRequest
storage:
fileId: {
pages: [PagIm]
images: [FunkIm]
sections: gz
}
---------------
assert if targetPath is file then response list must be singleton
{index: [], dir: fileID.pdf.gz, targetPath: fileID.images.json.gz} -> [{data: pdf bytes, metadata: request: ...] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
image classification now : Mdat (fn, [Int], file) -> [JSON (Data PDF, Mdat)] -> [JSON (Data null, Mdat [ImObj])] | 1 -> 1
assert if targetPath is file then response list must be singleton
{index: [], dir: fileID/images, targetPath: fileID.images.json.gz} -> [{data: image bytes, metadata: request: {image location...}] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
image classification future: Mdat (fn, [Int], dir) -> JSON (Data [FunkIm], Mdat) -> [JSON (Data null, Mdat [FunkIm])] |
object detection : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
table parsing : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
NER : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
pdf2image : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])
aggregate <==> targetpath is file and index is empty