Introduces a preprocessing that scans each page for page sized images. If one is encountered, all images that are below a configured ratio in respect to the page size are dropped. This step has to occur before the image stiching logic, but MIGHT introduce the problem of dropping image parts that might constitue an image. This hoever is not solveable since we want to drop the small images before further processing since the faulty character images are also stiched to a valid image, that in reality isn't an image.
39 lines
1.3 KiB
TOML
39 lines
1.3 KiB
TOML
[logging]
|
|
level = "INFO"
|
|
|
|
[service]
|
|
# Print document processing progress to stdout
|
|
verbose = false
|
|
batch_size = 16
|
|
mlflow_run_id = "fabfb1f192c745369b88cab34471aba7"
|
|
|
|
# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
|
|
# The filter result values are reported in the service responses. For convenience the response to a request contains a
|
|
# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
|
|
# specified required value.
|
|
[filters.confidence]
|
|
# Minimum permissible prediction confidence
|
|
min = 0.5
|
|
|
|
# Image size to page size ratio (ratio of geometric means of areas)
|
|
[filters.image_to_page_quotient]
|
|
min = 0.05
|
|
max = 0.75
|
|
|
|
[filters.is_scanned_page]
|
|
# Minimum permissible image to page ratio tolerance for a page to be considered scanned.
|
|
# This is only used for filtering small images on scanned pages and is applied before processing the image, therefore
|
|
# superseding the image_to_page_quotient filter that only applies a tag to the image after processing.
|
|
tolerance = 0
|
|
|
|
# Image width to height ratio
|
|
[filters.image_width_to_height_quotient]
|
|
min = 0.1
|
|
max = 10
|
|
|
|
# put class specific filters here ['signature', 'formula', 'logo']
|
|
[filters.overrides.signature.image_to_page_quotient]
|
|
max = 0.4
|
|
|
|
|