update script for extraction

This commit is contained in:
Julius Unverfehrt 2022-06-02 15:12:33 +02:00
parent c2d7127a84
commit 90f8f9da36

View File

@ -10,7 +10,7 @@ from pyinfra.storage.storages import get_s3_storage
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--bucket_name", "-b", required=True)
parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image", "conversion"], required=True)
parser.add_argument("--analysis_container", "-a", choices=["detr", "ner", "image", "conversion", "extraction"], required=True)
args = parser.parse_args()
return args
@ -49,6 +49,8 @@ def build_message_bodies(analyse_container_type, bucket_name):
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz"})
if analyse_container_type == "conversion":
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "png.gz", "operation": "conversion", "pages": [1,2,3]})
if analyse_container_type == "extraction":
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "json.gz", "operation": "extraction"})
if analyse_container_type == "ner":
message_dict.update(
{"targetFileExtension": "TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz"}