misc minor fixes while integrating with pdf2image

This commit is contained in:
Matthias Bisping 2022-05-31 17:58:28 +02:00
parent dd591bd24b
commit 3046b4dc26
5 changed files with 19 additions and 11 deletions

View File

@ -88,7 +88,7 @@ class Callback:
raise AnalysisFailure from err
def __call__(self, body: dict):
operation = body.get("operations", "submit")
operation = body.get("operation", "submit")
endpoint = self.__make_endpoint(operation)
pipeline = self.__get_pipeline(endpoint)

View File

@ -21,6 +21,12 @@ def __set_up_processing_server(queued_stream_function: QueuedStreamFunction):
resp.status_code = 200
return resp
@app.route("/health", methods=["GET"])
def healthy():
resp = jsonify("OK")
resp.status_code = 200
return resp
@app.route("/submit", methods=["POST", "PATCH"])
def submit():
return processor.push(request)

View File

@ -6,7 +6,7 @@ import logging
import time
from collections import deque
from operator import itemgetter
from typing import Callable, Iterable
from typing import Callable
from funcy import omit
from more_itertools import peekable
@ -105,11 +105,6 @@ class IdentifierDispatchCallback(DispatchCallback):
return identifier != self.identifier
# def data_is_non_empty(self, data):
#
# if isinstance(data, str):
# self.put_object(data, metadata)
def __call__(self, metadata):
return self.has_new_identifier(metadata)
@ -200,12 +195,17 @@ class QueueVisitor:
return {"data": data, "metadata": {}}
try:
data = json.loads(data.decode())
except json.JSONDecodeError: # case 1 fallback
return wrap(data.decode())
data = data.decode()
try:
data = json.loads(data)
except json.JSONDecodeError: # case 1 fallback
return wrap(data)
except Exception:
return wrap(data)
if not isinstance(data, dict): # case 1
return wrap(string_to_bytes(data))
else: # case 2
validate(data)
data["data"] = string_to_bytes(data["data"])

View File

@ -54,7 +54,7 @@ def build_message_bodies(analyse_container_type, bucket_name):
return message_dict
storage = get_s3_storage()
for bucket_name, pdf_name in storage.get_all_object_names(bucket_name):
for pdf_name in storage.get_all_object_names(bucket_name):
if "pdf" not in pdf_name:
continue
file_id = pdf_name.split(".")[0]

View File

@ -153,3 +153,5 @@ def test_serving(
uploaded_files = starmap(storage.get_object, zip(repeat(bucket_name), names_of_uploaded_files))
outputs = sorted(chain(*map(decode, uploaded_files)), key=itemgetter(0))
assert outputs == targets
storage.clear_bucket(bucket_name)