operations section in config cleaned up

This commit is contained in:
Matthias Bisping 2022-06-23 16:39:22 +02:00
parent 671129af3e
commit afffdeb993
2 changed files with 11 additions and 10 deletions

View File

@ -9,34 +9,35 @@ service:
# specified in the request # specified in the request
download_strategy: $DOWNLOAD_STRATEGY|multi download_strategy: $DOWNLOAD_STRATEGY|multi
response_formatter: default # TODO: write formatter for analysis tasks that pulls metadata content into root of response json response_formatter: default # TODO: write formatter for analysis tasks that pulls metadata content into root of response json
# Note: This is not really the right place for this. It should be configured on a per-service basis.
operations: operations:
conversion: conversion:
input: input:
subdir: "" subdir: ""
extension: ORIGIN.pdf.gz extension: ORIGIN.pdf.gz
output: # FIXME output:
subdir: "conversion_new" # FIXME: currently operation string is sued for output dir subdir: "conversion"
extension: json.gz extension: json.gz
extraction: extraction:
input: input:
subdir: "" subdir: ""
extension: ORIGIN.pdf.gz extension: ORIGIN.pdf.gz
output: # FIXME output:
subdir: "extraction_new" # FIXME: currently operation string is sued for output dir subdir: "extraction"
extension: json.gz extension: json.gz
table_parsing: table_parsing:
input: input:
subdir: "conversion_new" subdir: "conversion"
extension: json.gz extension: json.gz
output: # FIXME output:
subdir: "table_new" # FIXME: currently operation string is sued for output dir subdir: "table_new"
extension: json.gz extension: json.gz
default: default:
input: input:
subdir: "" subdir: ""
extension: IN.gz extension: IN.gz
output: # FIXME output:
subdir: "" # FIXME: currently operation string is sued for output dir subdir: ""
extension: out.gz extension: out.gz
probing_webserver: probing_webserver:

View File

@ -160,7 +160,7 @@ def build_filepath(object_descriptor, page):
object_name = object_descriptor["object_name"] object_name = object_descriptor["object_name"]
parts = object_name.split("/") parts = object_name.split("/")
path = "/".join(parts) path = "/".join(parts)
path = re.sub("id:\d", f"id:{page}", path) path = re.sub(r"id:\d", f"id:{page}", path)
return path return path