update script for extraction
This commit is contained in:
parent
c2d7127a84
commit
90f8f9da36
@ -10,7 +10,7 @@ from pyinfra.storage.storages import get_s3_storage
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--bucket_name", "-b", required=True)
|
||||
parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image", "conversion"], required=True)
|
||||
parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image", "conversion", "extraction"], required=True)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
@ -49,6 +49,8 @@ def build_message_bodies(analyse_container_type, bucket_name):
|
||||
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz"})
|
||||
if analyse_container_type == "conversion":
|
||||
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "png.gz", "operation": "conversion", "pages": [1,2,3]})
|
||||
if analyse_container_type == "extraction":
|
||||
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "json.gz", "operation": "extraction"})
|
||||
if analyse_container_type == "ner":
|
||||
message_dict.update(
|
||||
{"targetFileExtension": "TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz"}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user