Merge in RR/pyinfra from 2.0.0-input-output-file-pattern-for-download-strategy to 2.0.0
Squashed commit of the following:
commit c7ce79ebbeace6a8cb7925ed69eda2d7cd2a4783
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Fri Jun 24 12:35:29 2022 +0200
refactor
commit 80f04e544962760adb2dc60c9dd03ccca22167d6
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Fri Jun 24 11:06:10 2022 +0200
refactoring of component factory, callback and client-pipeline getter
commit 6c024e1a789e1d55f0739c6846e5c02e8b7c943d
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 20:04:10 2022 +0200
operations section in config cleaned up; added upload formatter
commit c85800aefc224967cea591c1ec4cf1aaa3ac8215
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 19:22:51 2022 +0200
refactoring; removed obsolete config entries and code
commit 4be125952d82dc868935c8c73ad87fd8f0bd1d6c
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 19:14:47 2022 +0200
removed obsolete code
commit ac69a5c8e3f1e2fd7e828a17eeab97984f4f9746
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 18:58:41 2022 +0200
refactoring: rm dl strat module
commit efd36d0fc4f8f36d267bfa9d35415811fe723ccc
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 18:33:51 2022 +0200
refactoring: multi dl strat -> downloader, rm single dl strat
commit afffdeb993500a6abdb6fe85a549e3d6e97e9ee7
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 16:39:22 2022 +0200
operations section in config cleaned up
commit 671129af3e343490e0fb277a2b0329aa3027fd73
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Thu Jun 23 16:09:16 2022 +0200
rename prometheus metric name to include service name
commit 932a3e314b382315492aecab95b1f02f2916f8a6
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 14:43:23 2022 +0200
cleaned up file descr mngr
commit 79350b4ce71fcd095ed6a5e1d3a598ea246fae53
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 12:26:15 2022 +0200
refactoring WIP: moving response stratgey logic into storage strategy (needs to be refactored as well, later) and file descr mngr. Here the moved code needs to be cleaned up.
commit 7e48c66f0c378b25a433a4034eefdc8a0957e775
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 12:00:48 2022 +0200
refactoring; removed operation / response folder from output path
commit 8e6cbdaf23c48f6eeb52512b7f382d5727e206d6
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 11:08:37 2022 +0200
refactoring; added operation -> file pattern mapping to file descr mngr (mainly for self-documentaton purposes)
commit 2c80d7cec0cc171e099e5b13aadd2ae0f9bf4f02
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 10:59:57 2022 +0200
refactoring: introduced input- and output-file specific methods to file descr mngr
commit ecced37150eaac3008cc1b01b235e5f7135e504b
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 10:43:26 2022 +0200
refactoring
commit 3828341e98861ff8d63035ee983309ad5064bb30
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Thu Jun 23 10:42:46 2022 +0200
refactoring
commit 9a7c412523d467af40feb6924823ca89e28aadfe
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jun 22 17:04:54 2022 +0200
add prometheus metric name for default operation
commit d207b2e274ba53b2a21a18c367bb130fb05ee1cd
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jun 22 17:02:55 2022 +0200
Merge config
commit d3fdf36b12d8def18810454765e731599b833bfc
Author: Matthias Bisping <matthias.bisping@iqser.com>
Date: Wed Jun 22 17:01:12 2022 +0200
added fixmes / todos
commit f49d0b9cb7764473ef9d127bc5d88525a4a16a23
Author: Julius Unverfehrt <julius.unverfehrt@iqser.com>
Date: Wed Jun 22 16:28:25 2022 +0200
update script
... and 47 more commits
74 lines
2.6 KiB
YAML
Executable File
74 lines
2.6 KiB
YAML
Executable File
service:
|
|
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger
|
|
name: $SERVICE_NAME|research # Default service name for research service, used for prometheus metric name
|
|
response_formatter: default # formats analysis payloads of response messages
|
|
upload_formatter: projecting # formats analysis payloads of objects uploaded to storage
|
|
# Note: This is not really the right place for this. It should be configured on a per-service basis.
|
|
operations:
|
|
conversion:
|
|
input:
|
|
subdir: ""
|
|
extension: ORIGIN.pdf.gz
|
|
output:
|
|
subdir: "pages_as_images"
|
|
extension: json.gz
|
|
extraction:
|
|
input:
|
|
subdir: ""
|
|
extension: ORIGIN.pdf.gz
|
|
output:
|
|
subdir: "extracted_images"
|
|
extension: json.gz
|
|
table_parsing:
|
|
input:
|
|
subdir: "pages_as_images"
|
|
extension: json.gz
|
|
output:
|
|
subdir: "table_parses"
|
|
extension: json.gz
|
|
default:
|
|
input:
|
|
subdir: ""
|
|
extension: IN.gz
|
|
output:
|
|
subdir: ""
|
|
extension: out.gz
|
|
|
|
probing_webserver:
|
|
host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address
|
|
port: $PROBING_WEBSERVER_PORT|8080 # Probe webserver port
|
|
mode: $PROBING_WEBSERVER_MODE|production # webserver mode: {development, production}
|
|
|
|
rabbitmq:
|
|
host: $RABBITMQ_HOST|localhost # RabbitMQ host address
|
|
port: $RABBITMQ_PORT|5672 # RabbitMQ host port
|
|
user: $RABBITMQ_USERNAME|user # RabbitMQ username
|
|
password: $RABBITMQ_PASSWORD|bitnami # RabbitMQ password
|
|
heartbeat: $RABBITMQ_HEARTBEAT|7200 # Controls AMQP heartbeat timeout in seconds
|
|
|
|
queues:
|
|
input: $REQUEST_QUEUE|request_queue # Requests to service
|
|
output: $RESPONSE_QUEUE|response_queue # Responses by service
|
|
dead_letter: $DEAD_LETTER_QUEUE|dead_letter_queue # Messages that failed to process
|
|
|
|
callback:
|
|
analysis_endpoint: $ANALYSIS_ENDPOINT|"http://127.0.0.1:5000"
|
|
|
|
storage:
|
|
backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure}
|
|
bucket: "STORAGE_BUCKET_NAME|STORAGE_AZURECONTAINERNAME|pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from
|
|
|
|
s3:
|
|
endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000"
|
|
access_key: $STORAGE_KEY|root
|
|
secret_key: $STORAGE_SECRET|password
|
|
region: $STORAGE_REGION|"eu-west-1"
|
|
|
|
azure:
|
|
connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net"
|
|
|
|
retry:
|
|
tries: 3
|
|
delay: 5
|
|
jitter: [1, 3]
|