76 lines
2.7 KiB
YAML
Executable File
76 lines
2.7 KiB
YAML
Executable File
service:
|
|
logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger
|
|
name: $SERVICE_NAME|research # Default service name for research service, used for prometheus metric name
|
|
|
|
# Specifies, how to handle the `page` key of a request. "multi" will download all pages matching the list of pages
|
|
# specified in the request
|
|
response_formatter: default # TODO: write formatter for analysis tasks that pulls metadata content into root of response json
|
|
# Note: This is not really the right place for this. It should be configured on a per-service basis.
|
|
operations:
|
|
conversion:
|
|
input:
|
|
subdir: ""
|
|
extension: ORIGIN.pdf.gz
|
|
output:
|
|
subdir: "conversion"
|
|
extension: json.gz
|
|
extraction:
|
|
input:
|
|
subdir: ""
|
|
extension: ORIGIN.pdf.gz
|
|
output:
|
|
subdir: "extraction"
|
|
extension: json.gz
|
|
table_parsing:
|
|
input:
|
|
subdir: "conversion"
|
|
extension: json.gz
|
|
output:
|
|
subdir: "table_new"
|
|
extension: json.gz
|
|
default:
|
|
input:
|
|
subdir: ""
|
|
extension: IN.gz
|
|
output:
|
|
subdir: ""
|
|
extension: out.gz
|
|
|
|
probing_webserver:
|
|
host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address
|
|
port: $PROBING_WEBSERVER_PORT|8080 # Probe webserver port
|
|
mode: $PROBING_WEBSERVER_MODE|production # webserver mode: {development, production}
|
|
|
|
rabbitmq:
|
|
host: $RABBITMQ_HOST|localhost # RabbitMQ host address
|
|
port: $RABBITMQ_PORT|5672 # RabbitMQ host port
|
|
user: $RABBITMQ_USERNAME|user # RabbitMQ username
|
|
password: $RABBITMQ_PASSWORD|bitnami # RabbitMQ password
|
|
heartbeat: $RABBITMQ_HEARTBEAT|7200 # Controls AMQP heartbeat timeout in seconds
|
|
|
|
queues:
|
|
input: $REQUEST_QUEUE|request_queue # Requests to service
|
|
output: $RESPONSE_QUEUE|response_queue # Responses by service
|
|
dead_letter: $DEAD_LETTER_QUEUE|dead_letter_queue # Messages that failed to process
|
|
|
|
callback:
|
|
analysis_endpoint: $ANALYSIS_ENDPOINT|"http://127.0.0.1:5000"
|
|
|
|
storage:
|
|
backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure}
|
|
bucket: "STORAGE_BUCKET_NAME|STORAGE_AZURECONTAINERNAME|pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from
|
|
|
|
s3:
|
|
endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000"
|
|
access_key: $STORAGE_KEY|root
|
|
secret_key: $STORAGE_SECRET|password
|
|
region: $STORAGE_REGION|"eu-west-1"
|
|
|
|
azure:
|
|
connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net"
|
|
|
|
retry:
|
|
tries: 3
|
|
delay: 5
|
|
jitter: [1, 3]
|