add log for Consumer Error, fix page index hash function

This commit is contained in:
Julius Unverfehrt 2022-06-02 14:14:54 +02:00
parent bfe8bbb8cb
commit c2d7127a84
3 changed files with 7 additions and 3 deletions

View File

@ -19,7 +19,7 @@ from pyinfra.storage.storage import Storage
def unique_hash(pages, seed=""):
assert isinstance(seed, str)
pages_str = "-".join(pages)
pages_str = "-".join(map(str, pages))
seed = seed or str(time.time())
rand_str = (pages_str + seed).encode(encoding="UTF-8", errors="strict")
hsh = hashlib.md5(rand_str).hexdigest()

View File

@ -48,7 +48,7 @@ def build_message_bodies(analyse_container_type, bucket_name):
if analyse_container_type == "detr" or analyse_container_type == "image":
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "IMAGE_INFO.json.gz"})
if analyse_container_type == "conversion":
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "png.gz", "operation": "conversion"})
message_dict.update({"targetFileExtension": "ORIGIN.pdf.gz", "responseFileExtension": "png.gz", "operation": "conversion", "pages": [1,2,3]})
if analyse_container_type == "ner":
message_dict.update(
{"targetFileExtension": "TEXT.json.gz", "responseFileExtension": "NER_ENTITIES.json.gz"}

View File

@ -10,12 +10,16 @@ from pyinfra.flask import run_probing_webserver, set_up_probing_webserver
from pyinfra.utils.banner import show_banner
logger = logging.getLogger()
@retry(ConsumerError, tries=3, delay=5, jitter=(1, 3))
def consume():
consumer = get_consumer()
try:
consumer.basic_consume_and_publish()
except Exception as err:
logger.exception(err)
raise ConsumerError() from err
@ -44,4 +48,4 @@ if __name__ == "__main__":
logging.getLogger("flask").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
main()
main()