service: logging_level: $LOGGING_LEVEL_ROOT|DEBUG # Logging level for service logger name: $SERVICE_NAME|research # Default service name for research service, used for prometheus metric name # target_file_extension: $TARGET_FILE_EXTENSION|"json.gz" # Extension for files to download from storage and process # TODO: will become obsolete by below changes response_file_extension: $RESPONSE_FILE_EXTENSION|"json.gz" # Extension for response files to upload to storage # Specifies, how to handle the `page` key of a request. "multi" will download all pages matching the list of pages # specified in the request download_strategy: $DOWNLOAD_STRATEGY|multi response_formatter: default # TODO: write formatter for analysis tasks that pulls metadata content into root of response json # Note: This is not really the right place for this. It should be configured on a per-service basis. operations: conversion: input: subdir: "" extension: ORIGIN.pdf.gz output: subdir: "conversion" extension: json.gz extraction: input: subdir: "" extension: ORIGIN.pdf.gz output: subdir: "extraction" extension: json.gz table_parsing: input: subdir: "conversion" extension: json.gz output: subdir: "table_new" extension: json.gz default: input: subdir: "" extension: IN.gz output: subdir: "" extension: out.gz probing_webserver: host: $PROBING_WEBSERVER_HOST|"0.0.0.0" # Probe webserver address port: $PROBING_WEBSERVER_PORT|8080 # Probe webserver port mode: $PROBING_WEBSERVER_MODE|production # webserver mode: {development, production} rabbitmq: host: $RABBITMQ_HOST|localhost # RabbitMQ host address port: $RABBITMQ_PORT|5672 # RabbitMQ host port user: $RABBITMQ_USERNAME|user # RabbitMQ username password: $RABBITMQ_PASSWORD|bitnami # RabbitMQ password heartbeat: $RABBITMQ_HEARTBEAT|7200 # Controls AMQP heartbeat timeout in seconds queues: input: $REQUEST_QUEUE|request_queue # Requests to service output: $RESPONSE_QUEUE|response_queue # Responses by service dead_letter: $DEAD_LETTER_QUEUE|dead_letter_queue # Messages that failed to process callback: analysis_endpoint: $ANALYSIS_ENDPOINT|"http://127.0.0.1:5000" storage: backend: $STORAGE_BACKEND|s3 # The type of storage to use {s3, azure} bucket: "STORAGE_BUCKET_NAME|STORAGE_AZURECONTAINERNAME|pyinfra-test-bucket" # The bucket / container to pull files specified in queue requests from s3: endpoint: $STORAGE_ENDPOINT|"http://127.0.0.1:9000" access_key: $STORAGE_KEY|root secret_key: $STORAGE_SECRET|password region: $STORAGE_REGION|"eu-west-1" azure: connection_string: $STORAGE_AZURECONNECTIONSTRING|"DefaultEndpointsProtocol=https;AccountName=iqserdevelopment;AccountKey=4imAbV9PYXaztSOMpIyAClg88bAZCXuXMGJG0GA1eIBpdh2PlnFGoRBnKqLy2YZUSTmZ3wJfC7tzfHtuC6FEhQ==;EndpointSuffix=core.windows.net" retry: tries: 3 delay: 5 jitter: [1, 3]