Processing service interface

image classification now   : JSON (Mdat PDF) -> (Data PDF -> JSON [Mdat ImObj]
image classification future: JSON [Mdat FunkIm] | Mdat PDF ->  (Data [FunkIm] -> JSON [Mdat FunkIm])
object detection           : JSON [Mdat PagIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat SemIm]])
NER                        : JSON [Mdat Dict] -> (Data [Dict] -> JSON [Mdat])
table parsing              : JSON [Mdat FunkIm] | Mdat PDF -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
pdf2image                  : Mdat (fn, [Int], PDF) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])


image classification now   : Mdat (fn, [Int], file) -> (Data PDF -> JSON [Mdat ImObj]
image classification future: Mdat (fn, [Int], dir) -> (Data [FunkIm] -> JSON [Mdat FunkIm])
object detection           : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
table parsing              : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
NER                        : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
pdf2image                  : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])


from funcy import identity

access(mdat):
	if mdat.path is file:
		request = {"data": load(mdat.path), "metadata": mdat}
	elif mdat.path is dir:
		get_indexed = identity if not mdat.idx else itemgetter(*mdat.idx)
		request = {"data": get_indexed(get_files(mdat.path)), "metadata": mdat}
	else:
		raise BadRequest


storage:

fileId: {
	pages: [PagIm]
	images: [FunkIm]
	sections: gz
}


---------------



							 assert if targetPath is file then response list must be singleton
                             {index: [], dir: fileID.pdf.gz, targetPath: fileID.images.json.gz} -> [{data: pdf bytes, metadata: request: ...] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
image classification now   : Mdat (fn, [Int], file) -> [JSON (Data PDF, Mdat)] -> [JSON (Data null, Mdat [ImObj])]  | 1 -> 1
							 assert if targetPath is file then response list must be singleton
                             {index: [], dir: fileID/images, targetPath: fileID.images.json.gz} -> [{data: image bytes, metadata: request: {image location...}] -> [{data: null, metadata: request: null, response: {classification infos: ...}]
image classification future: Mdat (fn, [Int], dir) -> JSON (Data [FunkIm], Mdat) -> [JSON (Data null, Mdat [FunkIm])]                 |
object detection           : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat SemIm]])
table parsing              : Mdat (fn, [Int], dir) -> (Data [PagIm] -> JSON [[Mdat FunkIm]])
NER                        : Mdat (fn, [Int], file) -> (Data [Dict] -> JSON [Mdat])
pdf2image                  : Mdat (fn, [Int], file) -> (JSON ([Int], Data PDF) -> [(FunkIm, Mdat)])

 aggregate <==> targetpath is file and index is empty





















