operations section in config cleaned up

This commit is contained in:
Matthias Bisping 2022-06-23 16:39:22 +02:00
parent 671129af3e
commit afffdeb993
2 changed files with 11 additions and 10 deletions

View File

@ -9,34 +9,35 @@ service:
# specified in the request
download_strategy: $DOWNLOAD_STRATEGY|multi
response_formatter: default # TODO: write formatter for analysis tasks that pulls metadata content into root of response json
# Note: This is not really the right place for this. It should be configured on a per-service basis.
operations:
conversion:
input:
subdir: ""
extension: ORIGIN.pdf.gz
output: # FIXME
subdir: "conversion_new" # FIXME: currently operation string is sued for output dir
output:
subdir: "conversion"
extension: json.gz
extraction:
input:
subdir: ""
extension: ORIGIN.pdf.gz
output: # FIXME
subdir: "extraction_new" # FIXME: currently operation string is sued for output dir
output:
subdir: "extraction"
extension: json.gz
table_parsing:
input:
subdir: "conversion_new"
subdir: "conversion"
extension: json.gz
output: # FIXME
subdir: "table_new" # FIXME: currently operation string is sued for output dir
output:
subdir: "table_new"
extension: json.gz
default:
input:
subdir: ""
extension: IN.gz
output: # FIXME
subdir: "" # FIXME: currently operation string is sued for output dir
output:
subdir: ""
extension: out.gz
probing_webserver:

View File

@ -160,7 +160,7 @@ def build_filepath(object_descriptor, page):
object_name = object_descriptor["object_name"]
parts = object_name.split("/")
path = "/".join(parts)
path = re.sub("id:\d", f"id:{page}", path)
path = re.sub(r"id:\d", f"id:{page}", path)
return path