Compare commits
227 Commits
master
...
fuzzy_stit
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
03e7b00cfd | ||
|
|
7aee00cb49 | ||
|
|
2cc52c4630 | ||
|
|
daa1da3a50 | ||
|
|
6a7debde14 | ||
|
|
b4f279c549 | ||
|
|
f5881f2229 | ||
|
|
62bfedfea8 | ||
|
|
1d88876ab1 | ||
|
|
bbafad5561 | ||
|
|
f17a232009 | ||
|
|
88a46ae7cd | ||
|
|
e82a81f5c8 | ||
|
|
35c5b15e32 | ||
|
|
698e647c6f | ||
|
|
d8f86d14a5 | ||
|
|
bb7c1be630 | ||
|
|
79cd31850d | ||
|
|
3d335783dc | ||
|
|
bb79f9dd55 | ||
|
|
585cdf5c70 | ||
|
|
04cf0245ed | ||
|
|
3530ef72c5 | ||
|
|
d80af336eb | ||
|
|
bcf6dc5c47 | ||
|
|
f4c0547405 | ||
|
|
1bea5fb9a8 | ||
|
|
57440f5106 | ||
|
|
710783a2f8 | ||
|
|
887b8339a2 | ||
|
|
43cb0fffed | ||
|
|
6e7645e319 | ||
|
|
3b18fc6158 | ||
|
|
1b10445f91 | ||
|
|
5967149c49 | ||
|
|
303970db51 | ||
|
|
51793d19e9 | ||
|
|
e276a5ec27 | ||
|
|
7e6fe7cf11 | ||
|
|
bb5db1b4ef | ||
|
|
8ac9fcb19f | ||
|
|
160973e2be | ||
|
|
803cc57155 | ||
|
|
50b4d239cb | ||
|
|
9bb07f95fb | ||
|
|
29028cc1a5 | ||
|
|
2fcb0bd149 | ||
|
|
3e882dc247 | ||
|
|
2b1e7cbb08 | ||
|
|
5e8b55ef10 | ||
|
|
3266e0af58 | ||
|
|
7e2696d5c5 | ||
|
|
302613bf2b | ||
|
|
66fd103d1b | ||
|
|
6e5d6912ed | ||
|
|
b1efb5ed09 | ||
|
|
ef70e11352 | ||
|
|
315679468b | ||
|
|
64e3350dee | ||
|
|
6a7e0e1000 | ||
|
|
11fc63035d | ||
|
|
4bc295b212 | ||
|
|
4c46be4abc | ||
|
|
37ee086b5d | ||
|
|
1fd30e68b6 | ||
|
|
2c908162f1 | ||
|
|
4756b8c9bd | ||
|
|
e0885c545a | ||
|
|
fdb7ebe618 | ||
|
|
ce69f7d160 | ||
|
|
8f61c4cba2 | ||
|
|
f3e2b2335f | ||
|
|
9cda65ad41 | ||
|
|
692e72b3b2 | ||
|
|
38869d52c6 | ||
|
|
e01b5c9acd | ||
|
|
6a6fc19958 | ||
|
|
1b1f1aafef | ||
|
|
caef37376b | ||
|
|
16aa951c96 | ||
|
|
89afb8f920 | ||
|
|
1ffc9dcc68 | ||
|
|
0976971117 | ||
|
|
b4b0058475 | ||
|
|
2ee36dcb54 | ||
|
|
ab382646b7 | ||
|
|
8c916a79c3 | ||
|
|
3ff6dac2e0 | ||
|
|
d134884553 | ||
|
|
2d0545c928 | ||
|
|
65a4a8e34e | ||
|
|
39c111fd42 | ||
|
|
0376223c9d | ||
|
|
bf85ef357c | ||
|
|
f6a7a14a20 | ||
|
|
41f783dc5d | ||
|
|
32397256c8 | ||
|
|
f44e6f4fd7 | ||
|
|
3d2c97bc10 | ||
|
|
9663cec12d | ||
|
|
c1c3f541d4 | ||
|
|
4d86e78307 | ||
|
|
1cf6ab256c | ||
|
|
a89e374c67 | ||
|
|
0861e22542 | ||
|
|
7827869af4 | ||
|
|
613bba8cfc | ||
|
|
5c23898280 | ||
|
|
e8d0299e46 | ||
|
|
cb00aed62c | ||
|
|
1501653673 | ||
|
|
b4b929b65f | ||
|
|
3d1c251e10 | ||
|
|
c80549d5d3 | ||
|
|
070749880e | ||
|
|
94783c54f2 | ||
|
|
2b48c6108b | ||
|
|
da9b3d0cb9 | ||
|
|
c372529ee5 | ||
|
|
1a1ece1f95 | ||
|
|
426061e5ea | ||
|
|
7c2cf44ad0 | ||
|
|
c125e1ff6c | ||
|
|
dd007891c7 | ||
|
|
d3257fdeda | ||
|
|
1581880ec6 | ||
|
|
268b83a1ff | ||
|
|
5caa9807e2 | ||
|
|
82added50a | ||
|
|
b6ccfbcf8f | ||
|
|
e17912caa9 | ||
|
|
3eaf9dc0e1 | ||
|
|
0cefef4e15 | ||
|
|
4f94cbd68d | ||
|
|
2517b45d44 | ||
|
|
2a62ad7aba | ||
|
|
20c980dbe6 | ||
|
|
726298b155 | ||
|
|
479afbcd34 | ||
|
|
4ab9f0d89b | ||
|
|
d4604a2cb5 | ||
|
|
4ebb36247e | ||
|
|
7ec7390e90 | ||
|
|
dc1cdde458 | ||
|
|
0921ef9a4f | ||
|
|
91dd467142 | ||
|
|
b3e1604ecc | ||
|
|
20718996bd | ||
|
|
cc8d87338c | ||
|
|
258c1ab02d | ||
|
|
ce3d33955e | ||
|
|
a95cc4e06b | ||
|
|
6d1ace473b | ||
|
|
0a22a35912 | ||
|
|
a5d3232dd0 | ||
|
|
49f9847d9a | ||
|
|
1c6f5749dd | ||
|
|
8bccec277f | ||
|
|
7f37f841dd | ||
|
|
8c7e3e29f5 | ||
|
|
99d8e921db | ||
|
|
6835394d30 | ||
|
|
ad6bb80900 | ||
|
|
95209a5c9d | ||
|
|
45a07c620a | ||
|
|
81ab9a5f53 | ||
|
|
8b15ac6df4 | ||
|
|
e9489287bd | ||
|
|
15c0b73034 | ||
|
|
7a64af156b | ||
|
|
60617fd622 | ||
|
|
ade318c7b7 | ||
|
|
3339ed2eab | ||
|
|
7340fb6dda | ||
|
|
358d7ecd91 | ||
|
|
d33a882d65 | ||
|
|
06adedac57 | ||
|
|
edbc5c3f84 | ||
|
|
f60bafd007 | ||
|
|
a1c7dd4a8d | ||
|
|
6b58756103 | ||
|
|
3b4c2a40b2 | ||
|
|
c06905625d | ||
|
|
d44622dddc | ||
|
|
3c6dfed508 | ||
|
|
f18e183ab0 | ||
|
|
86f2abc553 | ||
|
|
f0a8f2224c | ||
|
|
9bf1dcbe1d | ||
|
|
9ce7b6e6da | ||
|
|
e818b05472 | ||
|
|
b818ee4724 | ||
|
|
9461be29d5 | ||
|
|
2631eb5c0f | ||
|
|
643ab99bd3 | ||
|
|
e0ab365bb9 | ||
|
|
48737d9439 | ||
|
|
a5147c9a58 | ||
|
|
4c939464b0 | ||
|
|
334dc79f7e | ||
|
|
9d58ae714f | ||
|
|
0f811bdc56 | ||
|
|
d11333981f | ||
|
|
4fcd1e79d3 | ||
|
|
5c5d132d7f | ||
|
|
0f9510906d | ||
|
|
6343229c1e | ||
|
|
7d21b0a585 | ||
|
|
364111db89 | ||
|
|
ea298dacfa | ||
|
|
373c619b0c | ||
|
|
8aa0717007 | ||
|
|
a3215e0bc3 | ||
|
|
c64bff0843 | ||
|
|
dd18087261 | ||
|
|
d97b477208 | ||
|
|
981d7816a0 | ||
|
|
2e36a9d46d | ||
|
|
03f269c2d7 | ||
|
|
6853d862ed | ||
|
|
31591bef0f | ||
|
|
7834a65ff5 | ||
|
|
8b7293be09 | ||
|
|
9c9070e8bf | ||
|
|
e8fb01b4b7 | ||
|
|
41f0cc8a41 | ||
|
|
ee959346b7 |
@ -1,8 +1,6 @@
|
||||
[core]
|
||||
remote = azure_remote
|
||||
remote = vector
|
||||
autostage = true
|
||||
['remote "vector"']
|
||||
url = ssh://vector.iqser.com/research/image-prediction/
|
||||
port = 22
|
||||
['remote "azure_remote"']
|
||||
url = azure://image-classification-dvc/
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -1,8 +1,7 @@
|
||||
.vscode/
|
||||
*.h5
|
||||
*venv
|
||||
/venv/
|
||||
.idea/
|
||||
src/data
|
||||
|
||||
!.gitignore
|
||||
*.project
|
||||
@ -34,7 +33,6 @@ src/data
|
||||
**/dependencies-and-licenses-overview.txt
|
||||
|
||||
.coverage
|
||||
.coverage\.*\.*
|
||||
|
||||
|
||||
*__pycache__
|
||||
@ -48,6 +46,7 @@ src/data
|
||||
*misc
|
||||
|
||||
/coverage_html_report/
|
||||
.coverage\.*
|
||||
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/linux,pycharm
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,pycharm
|
||||
@ -174,3 +173,5 @@ fabric.properties
|
||||
.idea/codestream.xml
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,pycharm
|
||||
/image_prediction/data/mlruns/
|
||||
#/data/mlruns/
|
||||
|
||||
@ -1,51 +0,0 @@
|
||||
include:
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: main
|
||||
file: "/ci-templates/research/dvc.gitlab-ci.yml"
|
||||
- project: "Gitlab/gitlab"
|
||||
ref: main
|
||||
file: "/ci-templates/research/versioning-build-test-release.gitlab-ci.yml"
|
||||
|
||||
variables:
|
||||
NEXUS_PROJECT_DIR: red
|
||||
IMAGENAME: "${CI_PROJECT_NAME}"
|
||||
INTEGRATION_TEST_FILE: "${CI_PROJECT_ID}.pdf"
|
||||
FF_USE_FASTZIP: "true" # enable fastzip - a faster zip implementation that also supports level configuration.
|
||||
ARTIFACT_COMPRESSION_LEVEL: default # can also be set to fastest, fast, slow and slowest. If just enabling fastzip is not enough try setting this to fastest or fast.
|
||||
CACHE_COMPRESSION_LEVEL: default # same as above, but for caches
|
||||
# TRANSFER_METER_FREQUENCY: 5s # will display transfer progress every 5 seconds for artifacts and remote caches. For debugging purposes.
|
||||
|
||||
stages:
|
||||
- data
|
||||
- setup
|
||||
- tests
|
||||
- sonarqube
|
||||
- versioning
|
||||
- build
|
||||
- integration-tests
|
||||
- release
|
||||
|
||||
docker-build:
|
||||
extends: .docker-build
|
||||
needs:
|
||||
- job: dvc-pull
|
||||
artifacts: true
|
||||
- !reference [.needs-versioning, needs] # leave this line as is
|
||||
|
||||
###################
|
||||
# INTEGRATION TESTS
|
||||
trigger-integration-tests:
|
||||
extends: .integration-tests
|
||||
# ADD THE MODEL BUILD WHICH SHOULD TRIGGER THE INTEGRATION TESTS
|
||||
# needs:
|
||||
# - job: docker-build::model_name
|
||||
# artifacts: true
|
||||
rules:
|
||||
- when: never
|
||||
|
||||
#########
|
||||
# RELEASE
|
||||
release:
|
||||
extends: .release
|
||||
needs:
|
||||
- !reference [.needs-versioning, needs] # leave this line as is
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
[submodule "incl/redai_image"]
|
||||
path = incl/redai_image
|
||||
url = ssh://git@git.iqser.com:2222/rr/redai_image.git
|
||||
@ -1 +0,0 @@
|
||||
3.10
|
||||
80
Dockerfile
80
Dockerfile
@ -1,73 +1,25 @@
|
||||
FROM python:3.10-slim AS builder
|
||||
ARG BASE_ROOT="nexus.iqser.com:5001/red/"
|
||||
ARG VERSION_TAG="latest"
|
||||
|
||||
ARG GITLAB_USER
|
||||
ARG GITLAB_ACCESS_TOKEN
|
||||
FROM ${BASE_ROOT}image-prediction-base:${VERSION_TAG}
|
||||
|
||||
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
|
||||
WORKDIR /app/service
|
||||
|
||||
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RED=gitlab-red
|
||||
COPY src src
|
||||
COPY data data
|
||||
COPY image_prediction image_prediction
|
||||
COPY incl/redai_image/redai incl/redai_image/redai
|
||||
COPY setup.py setup.py
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY config.yaml config.yaml
|
||||
|
||||
ARG PYPI_REGISTRY_FFORESIGHT=https://gitlab.knecon.com/api/v4/groups/269/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_FFORESIGHT=gitlab-fforesight
|
||||
# Install dependencies differing from base image.
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
ARG VERSION=dev
|
||||
|
||||
LABEL maintainer="Research <research@knecon.com>"
|
||||
LABEL version="${VERSION}"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
###########
|
||||
# ENV SETUP
|
||||
ENV PYTHONDONTWRITEBYTECODE=true
|
||||
ENV PYTHONUNBUFFERED=true
|
||||
ENV POETRY_HOME=/opt/poetry
|
||||
ENV PATH="$POETRY_HOME/bin:$PATH"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl git bash build-essential libffi-dev libssl-dev && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||
RUN poetry --version
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry config virtualenvs.create true && \
|
||||
poetry config virtualenvs.in-project true && \
|
||||
poetry config installer.max-workers 10 && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_FFORESIGHT} ${PYPI_REGISTRY_FFORESIGHT} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_FFORESIGHT} ${GITLAB_USER} ${GITLAB_ACCESS_TOKEN} && \
|
||||
poetry install --without=dev -vv --no-interaction --no-root
|
||||
|
||||
###############
|
||||
# WORKING IMAGE
|
||||
FROM python:3.10-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# COPY SOURCE CODE FROM BUILDER IMAGE
|
||||
COPY --from=builder /app /app
|
||||
# COPY BILL OF MATERIALS (BOM)
|
||||
COPY bom.json /bom.json
|
||||
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
###################
|
||||
# COPY SOURCE CODE
|
||||
COPY ./src ./src
|
||||
COPY ./config ./config
|
||||
COPY ./data ./data
|
||||
COPY banner.txt ./
|
||||
RUN python3 -m pip install -e .
|
||||
RUN python3 -m pip install -e incl/redai_image/redai
|
||||
|
||||
EXPOSE 5000
|
||||
EXPOSE 8080
|
||||
|
||||
CMD [ "python", "src/serve.py"]
|
||||
CMD ["python3", "src/serve.py"]
|
||||
|
||||
25
Dockerfile_base
Normal file
25
Dockerfile_base
Normal file
@ -0,0 +1,25 @@
|
||||
FROM python:3.8 as builder1
|
||||
|
||||
# Use a virtual environment.
|
||||
RUN python -m venv /app/venv
|
||||
ENV PATH="/app/venv/bin:$PATH"
|
||||
|
||||
# Upgrade pip.
|
||||
RUN python -m pip install --upgrade pip
|
||||
|
||||
# Make a directory for the service files and copy the service repo into the container.
|
||||
WORKDIR /app/service
|
||||
COPY ./requirements.txt ./requirements.txt
|
||||
|
||||
# Install dependencies.
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
|
||||
# Make a new container and copy all relevant files over to filter out temporary files
|
||||
# produced during setup to reduce the final container's size.
|
||||
FROM python:3.8
|
||||
|
||||
WORKDIR /app/
|
||||
COPY --from=builder1 /app .
|
||||
ENV PATH="/app/venv/bin:$PATH"
|
||||
|
||||
WORKDIR /app/service
|
||||
@ -1,43 +0,0 @@
|
||||
FROM python:3.10
|
||||
|
||||
ARG USERNAME
|
||||
ARG TOKEN
|
||||
ARG PYPI_REGISTRY_RESEARCH=https://gitlab.knecon.com/api/v4/groups/19/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RESEARCH=gitlab-research
|
||||
ARG PYPI_REGISTRY_RED=https://gitlab.knecon.com/api/v4/groups/12/-/packages/pypi
|
||||
ARG POETRY_SOURCE_REF_RED=gitlab-red
|
||||
ARG VERSION=dev
|
||||
|
||||
LABEL maintainer="Research <research@knecon.com>"
|
||||
LABEL version="${VERSION}"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED=true
|
||||
ENV POETRY_HOME=/opt/poetry
|
||||
ENV PATH="$POETRY_HOME/bin:$PATH"
|
||||
|
||||
RUN curl -sSL https://install.python-poetry.org | python3 -
|
||||
|
||||
COPY ./data ./data
|
||||
COPY ./test ./test
|
||||
COPY ./config ./config
|
||||
COPY ./src ./src
|
||||
COPY pyproject.toml poetry.lock banner.txt config.yaml./
|
||||
|
||||
RUN poetry config virtualenvs.create false && \
|
||||
poetry config installer.max-workers 10 && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RESEARCH} ${PYPI_REGISTRY_RESEARCH} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RESEARCH} ${USERNAME} ${TOKEN} && \
|
||||
poetry config repositories.${POETRY_SOURCE_REF_RED} ${PYPI_REGISTRY_RED} && \
|
||||
poetry config http-basic.${POETRY_SOURCE_REF_RED} ${USERNAME} ${TOKEN} && \
|
||||
poetry install --without=dev -vv --no-interaction --no-root
|
||||
|
||||
EXPOSE 5000
|
||||
EXPOSE 8080
|
||||
|
||||
RUN apt update --yes
|
||||
RUN apt install vim --yes
|
||||
RUN apt install poppler-utils --yes
|
||||
|
||||
CMD coverage run -m pytest test/ --tb=native -q -s -vvv -x && coverage combine && coverage report -m && coverage xml
|
||||
136
README.md
136
README.md
@ -1,143 +1,25 @@
|
||||
### Setup
|
||||
### Building
|
||||
|
||||
Build base image
|
||||
```bash
|
||||
docker build -t image-classification-image --progress=plain --no-cache \
|
||||
-f Dockerfile \
|
||||
--build-arg USERNAME=$GITLAB_USER \
|
||||
--build-arg TOKEN=$GITLAB_ACCESS_TOKEN \
|
||||
.
|
||||
setup/docker.sh
|
||||
```
|
||||
|
||||
Build head image
|
||||
```bash
|
||||
docker build -f Dockerfile -t image-prediction . --build-arg BASE_ROOT=""
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
#### Without Docker
|
||||
|
||||
|
||||
```bash
|
||||
py scripts/run_pipeline.py /path/to/a/pdf
|
||||
```
|
||||
|
||||
#### With Docker
|
||||
|
||||
Shell 1
|
||||
|
||||
```bash
|
||||
docker run --rm --net=host image-prediction
|
||||
docker run --rm --net=host --rm image-prediction
|
||||
```
|
||||
|
||||
Shell 2
|
||||
|
||||
```bash
|
||||
python scripts/pyinfra_mock.py /path/to/a/pdf
|
||||
python scripts/pyinfra_mock.py --pdf_path /path/to/a/pdf
|
||||
```
|
||||
|
||||
### Tests
|
||||
|
||||
Run for example this command to execute all tests and get a coverage report:
|
||||
|
||||
```bash
|
||||
coverage run -m pytest test --tb=native -q -s -vvv -x && coverage combine && coverage report -m
|
||||
```
|
||||
|
||||
After having built the service container as specified above, you can also run tests in a container as follows:
|
||||
|
||||
```bash
|
||||
./run_tests.sh
|
||||
```
|
||||
|
||||
### Message Body Formats
|
||||
|
||||
|
||||
#### Request Format
|
||||
|
||||
The request messages need to provide the fields `"dossierId"` and `"fileId"`. A request should look like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"dossierId": "<string identifier>",
|
||||
"fileId": "<string identifier>"
|
||||
}
|
||||
```
|
||||
|
||||
Any additional keys are ignored.
|
||||
|
||||
|
||||
#### Response Format
|
||||
|
||||
Response bodies contain information about the identified class of the image, the confidence of the classification, the
|
||||
position and size of the image as well as the results of additional convenience filters which can be configured through
|
||||
environment variables. A response body looks like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"dossierId": "debug",
|
||||
"fileId": "13ffa9851740c8d20c4c7d1706d72f2a",
|
||||
"data": [...]
|
||||
}
|
||||
```
|
||||
|
||||
An image metadata record (entry in `"data"` field of a response body) looks like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"classification": {
|
||||
"label": "logo",
|
||||
"probabilities": {
|
||||
"logo": 1.0,
|
||||
"signature": 1.1599173226749333e-17,
|
||||
"other": 2.994595513398207e-23,
|
||||
"formula": 4.352109377281029e-31
|
||||
}
|
||||
},
|
||||
"position": {
|
||||
"x1": 475.95,
|
||||
"x2": 533.4,
|
||||
"y1": 796.47,
|
||||
"y2": 827.62,
|
||||
"pageNumber": 6
|
||||
},
|
||||
"geometry": {
|
||||
"width": 57.44999999999999,
|
||||
"height": 31.149999999999977
|
||||
},
|
||||
"alpha": false,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": 0.05975350599135938,
|
||||
"tooLarge": false,
|
||||
"tooSmall": false
|
||||
},
|
||||
"imageFormat": {
|
||||
"quotient": 1.8443017656500813,
|
||||
"tooTall": false,
|
||||
"tooWide": false
|
||||
}
|
||||
},
|
||||
"probability": {
|
||||
"unconfident": false
|
||||
},
|
||||
"allPassed": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
A configuration file is located under `config.yaml`. All relevant variables can be configured via
|
||||
exporting environment variables.
|
||||
|
||||
| __Environment Variable__ | Default | Description |
|
||||
|------------------------------------|------------------------------------|----------------------------------------------------------------------------------------|
|
||||
| __LOGGING_LEVEL_ROOT__ | "INFO" | Logging level for log file messages |
|
||||
| __VERBOSE__ | *true* | Service prints document processing progress to stdout |
|
||||
| __BATCH_SIZE__ | 16 | Number of images in memory simultaneously per service instance |
|
||||
| __RUN_ID__ | "fabfb1f192c745369b88cab34471aba7" | The ID of the mlflow run to load the image classifier from |
|
||||
| __MIN_REL_IMAGE_SIZE__ | 0.05 | Minimally permissible image size to page size ratio |
|
||||
| __MAX_REL_IMAGE_SIZE__ | 0.75 | Maximally permissible image size to page size ratio |
|
||||
| __MIN_IMAGE_FORMAT__ | 0.1 | Minimally permissible image width to height ratio |
|
||||
| __MAX_IMAGE_FORMAT__ | 10 | Maximally permissible image width to height ratio |
|
||||
|
||||
See also: https://git.iqser.com/projects/RED/repos/helm/browse/redaction/templates/image-service-v2
|
||||
|
||||
40
bamboo-specs/pom.xml
Normal file
40
bamboo-specs/pom.xml
Normal file
@ -0,0 +1,40 @@
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-parent</artifactId>
|
||||
<version>7.1.2</version>
|
||||
<relativePath/>
|
||||
</parent>
|
||||
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<sonar.skip>true</sonar.skip>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.atlassian.bamboo</groupId>
|
||||
<artifactId>bamboo-specs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<!-- run 'mvn test' to perform offline validation of the plan -->
|
||||
<!-- run 'mvn -Ppublish-specs' to upload the plan to your Bamboo server -->
|
||||
</project>
|
||||
182
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
182
bamboo-specs/src/main/java/buildjob/PlanSpec.java
Normal file
@ -0,0 +1,182 @@
|
||||
package buildjob;
|
||||
|
||||
import com.atlassian.bamboo.specs.api.BambooSpec;
|
||||
import com.atlassian.bamboo.specs.api.builders.BambooKey;
|
||||
import com.atlassian.bamboo.specs.api.builders.docker.DockerConfiguration;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PermissionType;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.Permissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.permission.PlanPermissions;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Job;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.PlanIdentifier;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Stage;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.BranchCleanup;
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.branches.PlanBranchManagement;
|
||||
import com.atlassian.bamboo.specs.api.builders.project.Project;
|
||||
import com.atlassian.bamboo.specs.builders.task.CheckoutItem;
|
||||
import com.atlassian.bamboo.specs.builders.task.InjectVariablesTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsCheckoutTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.CleanWorkingDirectoryTask;
|
||||
import com.atlassian.bamboo.specs.builders.task.VcsTagTask;
|
||||
import com.atlassian.bamboo.specs.builders.trigger.BitbucketServerTrigger;
|
||||
import com.atlassian.bamboo.specs.model.task.InjectVariablesScope;
|
||||
import com.atlassian.bamboo.specs.api.builders.Variable;
|
||||
import com.atlassian.bamboo.specs.util.BambooServer;
|
||||
import com.atlassian.bamboo.specs.builders.task.ScriptTask;
|
||||
import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
|
||||
|
||||
/**
|
||||
* Plan configuration for Bamboo.
|
||||
* Learn more on: <a href="https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs">https://confluence.atlassian.com/display/BAMBOO/Bamboo+Specs</a>
|
||||
*/
|
||||
@BambooSpec
|
||||
public class PlanSpec {
|
||||
|
||||
private static final String SERVICE_NAME = "image-prediction";
|
||||
private static final String SERVICE_NAME_BASE = "image-prediction-base";
|
||||
|
||||
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
|
||||
|
||||
/**
|
||||
* Run main to publish plan on Bamboo
|
||||
*/
|
||||
public static void main(final String[] args) throws Exception {
|
||||
//By default credentials are read from the '.credentials' file.
|
||||
BambooServer bambooServer = new BambooServer("http://localhost:8085");
|
||||
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
bambooServer.publish(plan);
|
||||
PlanPermissions planPermission = new PlanSpec().createPlanPermission(plan.getIdentifier());
|
||||
bambooServer.publish(planPermission);
|
||||
}
|
||||
|
||||
private PlanPermissions createPlanPermission(PlanIdentifier planIdentifier) {
|
||||
Permissions permission = new Permissions()
|
||||
.userPermissions("atlbamboo", PermissionType.EDIT, PermissionType.VIEW, PermissionType.ADMIN, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("research", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("Development", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.groupPermissions("QA", PermissionType.EDIT, PermissionType.VIEW, PermissionType.CLONE, PermissionType.BUILD)
|
||||
.loggedInUserPermissions(PermissionType.VIEW)
|
||||
.anonymousUserPermissionView();
|
||||
return new PlanPermissions(planIdentifier.getProjectKey(), planIdentifier.getPlanKey()).permissions(permission);
|
||||
}
|
||||
|
||||
private Project project() {
|
||||
return new Project()
|
||||
.name("RED")
|
||||
.key(new BambooKey("RED"));
|
||||
}
|
||||
|
||||
public Plan createDockerBuildPlan() {
|
||||
return new Plan(
|
||||
project(),
|
||||
SERVICE_NAME, new BambooKey(SERVICE_KEY))
|
||||
.description("Docker build for image-prediction.")
|
||||
// .variables()
|
||||
.stages(new Stage("Build Stage")
|
||||
.jobs(
|
||||
new Job("Build Job", new BambooKey("BUILD"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout redai_image research repository.")
|
||||
.checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.inlineBody("mkdir -p ~/.ssh\n" +
|
||||
"echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
|
||||
"echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
|
||||
"echo \" user bamboo-agent\" >> ~/.ssh/config\n" +
|
||||
"chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
|
||||
new ScriptTask()
|
||||
.description("Build Docker container.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/docker-build.sh")
|
||||
.argument(SERVICE_NAME + " " + SERVICE_NAME_BASE))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock")),
|
||||
new Job("Sonar Job", new BambooKey("SONAR"))
|
||||
.tasks(
|
||||
new CleanWorkingDirectoryTask()
|
||||
.description("Clean working directory.")
|
||||
.enabled(true),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout redai_image repository.")
|
||||
.checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
|
||||
new ScriptTask()
|
||||
.description("Set config and keys.")
|
||||
.inlineBody("mkdir -p ~/.ssh\n" +
|
||||
"echo \"${bamboo.bamboo_agent_ssh}\" | base64 -d >> ~/.ssh/id_rsa\n" +
|
||||
"echo \"host vector.iqser.com\" > ~/.ssh/config\n" +
|
||||
"echo \" user bamboo-agent\" >> ~/.ssh/config\n" +
|
||||
"chmod 600 ~/.ssh/config ~/.ssh/id_rsa"),
|
||||
new ScriptTask()
|
||||
.description("Run Sonarqube scan.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/sonar-scan.sh")
|
||||
.argument(SERVICE_NAME))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.2.0")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))),
|
||||
new Stage("Licence Stage")
|
||||
.jobs(
|
||||
new Job("Git Tag Job", new BambooKey("GITTAG"))
|
||||
.tasks(
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build git tag.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/git-tag.sh"),
|
||||
new InjectVariablesTask()
|
||||
.description("Inject git tag.")
|
||||
.path("git.tag")
|
||||
.namespace("g")
|
||||
.scope(InjectVariablesScope.LOCAL),
|
||||
new VcsTagTask()
|
||||
.description("${bamboo.g.gitTag}")
|
||||
.tagName("${bamboo.g.gitTag}")
|
||||
.defaultRepository())
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/release_build:4.4.1")),
|
||||
new Job("Licence Job", new BambooKey("LICENCE"))
|
||||
.enabled(false)
|
||||
.tasks(
|
||||
new VcsCheckoutTask()
|
||||
.description("Checkout default repository.")
|
||||
.checkoutItems(new CheckoutItem().defaultRepository()),
|
||||
new ScriptTask()
|
||||
.description("Build licence.")
|
||||
.location(Location.FILE)
|
||||
.fileFromPath("bamboo-specs/src/main/resources/scripts/create-licence.sh"))
|
||||
.dockerConfiguration(
|
||||
new DockerConfiguration()
|
||||
.image("nexus.iqser.com:5001/infra/maven:3.6.2-jdk-13-3.0.0")
|
||||
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
|
||||
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
|
||||
.linkedRepositories("RR / " + SERVICE_NAME)
|
||||
.linkedRepositories("RR / redai_image")
|
||||
.triggers(new BitbucketServerTrigger())
|
||||
.planBranchManagement(new PlanBranchManagement()
|
||||
.createForVcsBranch()
|
||||
.delete(new BranchCleanup()
|
||||
.whenInactiveInRepositoryAfterDays(14))
|
||||
.notificationForCommitters());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
19
bamboo-specs/src/main/resources/scripts/create-licence.sh
Executable file
19
bamboo-specs/src/main/resources/scripts/create-licence.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ \"${bamboo_version_tag}\" != \"dev\" ]]
|
||||
then
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
versions:set \
|
||||
-DnewVersion=${bamboo_version_tag}
|
||||
|
||||
${bamboo_capability_system_builder_mvn3_Maven_3}/bin/mvn \
|
||||
-f ${bamboo_build_working_directory}/pom.xml \
|
||||
-B clean deploy \
|
||||
-e -DdeployAtEnd=true \
|
||||
-Dmaven.wagon.http.ssl.insecure=true \
|
||||
-Dmaven.wagon.http.ssl.allowall=true \
|
||||
-Dmaven.wagon.http.ssl.ignore.validity.dates=true \
|
||||
-DaltDeploymentRepository=iqser_release::default::https://nexus.iqser.com/repository/gin4-platform-releases
|
||||
fi
|
||||
19
bamboo-specs/src/main/resources/scripts/docker-build.sh
Executable file
19
bamboo-specs/src/main/resources/scripts/docker-build.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SERVICE_NAME=$1
|
||||
SERVICE_NAME_BASE=$2
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
pip install dvc
|
||||
pip install 'dvc[ssh]'
|
||||
dvc pull
|
||||
|
||||
echo "index-url = https://${bamboo_nexus_user}:${bamboo_nexus_password}@nexus.iqser.com/repository/python-combind/simple" >> pip.conf
|
||||
docker build -f Dockerfile_base -t nexus.iqser.com:5001/red/$SERVICE_NAME_BASE:${bamboo_version_tag} .
|
||||
docker build -f Dockerfile -t nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag} --build-arg VERSION_TAG=${bamboo_version_tag} .
|
||||
echo "${bamboo_nexus_password}" | docker login --username "${bamboo_nexus_user}" --password-stdin nexus.iqser.com:5001
|
||||
docker push nexus.iqser.com:5001/red/$SERVICE_NAME:${bamboo_version_tag}
|
||||
9
bamboo-specs/src/main/resources/scripts/git-tag.sh
Executable file
9
bamboo-specs/src/main/resources/scripts/git-tag.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
if [[ "${bamboo_version_tag}" = "dev" ]]
|
||||
then
|
||||
echo "gitTag=${bamboo_planRepository_1_branch}_${bamboo_buildNumber}" > git.tag
|
||||
else
|
||||
echo "gitTag=${bamboo_version_tag}" > git.tag
|
||||
fi
|
||||
51
bamboo-specs/src/main/resources/scripts/sonar-scan.sh
Executable file
51
bamboo-specs/src/main/resources/scripts/sonar-scan.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
export JAVA_HOME=/usr/bin/sonar-scanner/jre
|
||||
|
||||
python3 -m venv build_venv
|
||||
source build_venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
|
||||
echo "dev setup for unit test and coverage 💖"
|
||||
|
||||
pip install -e .
|
||||
pip install -r requirements.txt
|
||||
|
||||
SERVICE_NAME=$1
|
||||
|
||||
echo "dependency-check:aggregate"
|
||||
mkdir -p reports
|
||||
dependency-check --enableExperimental -f JSON -f HTML -f XML \
|
||||
--disableAssembly -s . -o reports --project $SERVICE_NAME --exclude ".git/**" --exclude "venv/**" \
|
||||
--exclude "build_venv/**" --exclude "**/__pycache__/**" --exclude "bamboo-specs/**"
|
||||
|
||||
if [[ -z "${bamboo_repository_pr_key}" ]]
|
||||
then
|
||||
echo "Sonar Scan for branch: ${bamboo_planRepository_1_branch}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=image_prediction \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.branch.name=${bamboo_planRepository_1_branch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
|
||||
else
|
||||
echo "Sonar Scan for PR with key1: ${bamboo_repository_pr_key}"
|
||||
/usr/bin/sonar-scanner/bin/sonar-scanner \
|
||||
-Dsonar.projectKey=RED_$SERVICE_NAME \
|
||||
-Dsonar.sources=image_prediction \
|
||||
-Dsonar.host.url=https://sonarqube.iqser.com \
|
||||
-Dsonar.login=${bamboo_sonarqube_api_token_secret} \
|
||||
-Dsonar.pullrequest.key=${bamboo_repository_pr_key} \
|
||||
-Dsonar.pullrequest.branch=${bamboo_repository_pr_sourceBranch} \
|
||||
-Dsonar.pullrequest.base=${bamboo_repository_pr_targetBranch} \
|
||||
-Dsonar.dependencyCheck.jsonReportPath=reports/dependency-check-report.json \
|
||||
-Dsonar.dependencyCheck.xmlReportPath=reports/dependency-check-report.xml \
|
||||
-Dsonar.dependencyCheck.htmlReportPath=reports/dependency-check-report.html \
|
||||
-Dsonar.python.coverage.reportPaths=reports/coverage.xml
|
||||
fi
|
||||
16
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
16
bamboo-specs/src/test/java/buildjob/PlanSpecTest.java
Normal file
@ -0,0 +1,16 @@
|
||||
package buildjob;
|
||||
|
||||
|
||||
import com.atlassian.bamboo.specs.api.builders.plan.Plan;
|
||||
import com.atlassian.bamboo.specs.api.exceptions.PropertiesValidationException;
|
||||
import com.atlassian.bamboo.specs.api.util.EntityPropertiesBuilders;
|
||||
import org.junit.Test;
|
||||
|
||||
public class PlanSpecTest {
|
||||
@Test
|
||||
public void checkYourPlanOffline() throws PropertiesValidationException {
|
||||
Plan plan = new PlanSpec().createDockerBuildPlan();
|
||||
|
||||
EntityPropertiesBuilders.build(plan);
|
||||
}
|
||||
}
|
||||
28
config.yaml
Normal file
28
config.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
webserver:
|
||||
host: $SERVER_HOST|"127.0.0.1" # webserver address
|
||||
port: $SERVER_PORT|5000 # webserver port
|
||||
mode: $SERVER_MODE|production # webserver mode: {development, production}
|
||||
|
||||
service:
|
||||
logging_level: INFO # Logging level for service logger
|
||||
progressbar: True # Whether a progress bar over the pages of a document is displayed while processing
|
||||
batch_size: $BATCH_SIZE|32 # Number of images in memory simultaneously
|
||||
verbose: $VERBOSE|True # Service prints document processing progress to stdout
|
||||
run_id: $RUN_ID|fabfb1f192c745369b88cab34471aba7 # The ID of the mlflow run to load the service_estimator from
|
||||
|
||||
|
||||
# These variables control filters that are applied to either images, image metadata or service_estimator predictions. The filter
|
||||
# result values are reported in the service responses. For convenience the response to a request contains a
|
||||
# "filters.allPassed" field, which is set to false if any of the filters returned values did not meet its specified
|
||||
# required value.
|
||||
filters:
|
||||
|
||||
image_to_page_quotient: # Image size to page size ratio (ratio of geometric means of areas)
|
||||
min: $MIN_REL_IMAGE_SIZE|0.05 # Minimum permissible
|
||||
max: $MAX_REL_IMAGE_SIZE|0.75 # Maximum permissible
|
||||
|
||||
image_width_to_height_quotient: # Image width to height ratio
|
||||
min: $MIN_IMAGE_FORMAT|0.1 # Minimum permissible
|
||||
max: $MAX_IMAGE_FORMAT|10 # Maximum permissible
|
||||
|
||||
min_confidence: $MIN_CONFIDENCE|0.5 # Minimum permissible prediction confidence
|
||||
@ -1,68 +0,0 @@
|
||||
|
||||
[asyncio]
|
||||
max_concurrent_tasks = 10
|
||||
|
||||
[dynamic_tenant_queues]
|
||||
enabled = true
|
||||
|
||||
[metrics.prometheus]
|
||||
enabled = true
|
||||
prefix = "redactmanager_image_service"
|
||||
|
||||
[tracing]
|
||||
enabled = true
|
||||
# possible values "opentelemetry" | "azure_monitor" (Excpects APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.)
|
||||
type = "azure_monitor"
|
||||
|
||||
[tracing.opentelemetry]
|
||||
endpoint = "http://otel-collector-opentelemetry-collector.otel-collector:4318/v1/traces"
|
||||
service_name = "redactmanager_image_service"
|
||||
exporter = "otlp"
|
||||
|
||||
[webserver]
|
||||
host = "0.0.0.0"
|
||||
port = 8080
|
||||
|
||||
[rabbitmq]
|
||||
host = "localhost"
|
||||
port = 5672
|
||||
username = ""
|
||||
password = ""
|
||||
heartbeat = 60
|
||||
# Has to be a divider of heartbeat, and shouldn't be too big, since only in these intervals queue interactions happen (like receiving new messages)
|
||||
# This is also the minimum time the service needs to process a message
|
||||
connection_sleep = 5
|
||||
input_queue = "request_queue"
|
||||
output_queue = "response_queue"
|
||||
dead_letter_queue = "dead_letter_queue"
|
||||
|
||||
tenant_event_queue_suffix = "_tenant_event_queue"
|
||||
tenant_event_dlq_suffix = "_tenant_events_dlq"
|
||||
tenant_exchange_name = "tenants-exchange"
|
||||
queue_expiration_time = 300000 # 5 minutes in milliseconds
|
||||
|
||||
service_request_queue_prefix = "image_request_queue"
|
||||
service_request_exchange_name = "image_request_exchange"
|
||||
service_response_exchange_name = "image_response_exchange"
|
||||
service_dlq_name = "image_dlq"
|
||||
|
||||
[storage]
|
||||
backend = "s3"
|
||||
|
||||
[storage.s3]
|
||||
bucket = "redaction"
|
||||
endpoint = "http://127.0.0.1:9000"
|
||||
key = ""
|
||||
secret = ""
|
||||
region = "eu-central-1"
|
||||
|
||||
[storage.azure]
|
||||
container = "redaction"
|
||||
connection_string = ""
|
||||
|
||||
[storage.tenant_server]
|
||||
public_key = ""
|
||||
endpoint = "http://tenant-user-management:8081/internal-api/tenants"
|
||||
|
||||
[kubernetes]
|
||||
pod_name = "test_pod"
|
||||
@ -1,42 +0,0 @@
|
||||
[logging]
|
||||
level = "INFO"
|
||||
|
||||
[service]
|
||||
# Print document processing progress to stdout
|
||||
verbose = false
|
||||
batch_size = 6
|
||||
image_stiching_tolerance = 1 # in pixels
|
||||
mlflow_run_id = "fabfb1f192c745369b88cab34471aba7"
|
||||
|
||||
# These variables control filters that are applied to either images, image metadata or service_estimator predictions.
|
||||
# The filter result values are reported in the service responses. For convenience the response to a request contains a
|
||||
# "filters.allPassed" field, which is set to false if any of the values returned by the filters did not meet its
|
||||
# specified required value.
|
||||
[filters.confidence]
|
||||
# Minimum permissible prediction confidence
|
||||
min = 0.5
|
||||
|
||||
# Image size to page size ratio (ratio of geometric means of areas)
|
||||
[filters.image_to_page_quotient]
|
||||
min = 0.05
|
||||
max = 0.75
|
||||
|
||||
[filters.is_scanned_page]
|
||||
# Minimum permissible image to page ratio tolerance for a page to be considered scanned.
|
||||
# This is only used for filtering small images on scanned pages and is applied before processing the image, therefore
|
||||
# superseding the image_to_page_quotient filter that only applies a tag to the image after processing.
|
||||
tolerance = 0
|
||||
|
||||
# Image width to height ratio
|
||||
[filters.image_width_to_height_quotient]
|
||||
min = 0.1
|
||||
max = 10
|
||||
|
||||
# put class specific filters here ['signature', 'formula', 'logo']
|
||||
[filters.overrides.signature.image_to_page_quotient]
|
||||
max = 0.4
|
||||
|
||||
[filters.overrides.logo.image_to_page_quotient]
|
||||
min = 0.06
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
outs:
|
||||
- md5: ad061d607f615afc149643f62dbf37cc.dir
|
||||
size: 166952700
|
||||
- md5: 4219c52caf5f87f5a94f1ae00c60fb91.dir
|
||||
size: 166952679
|
||||
nfiles: 179
|
||||
path: mlruns
|
||||
|
||||
@ -24,11 +24,10 @@ class Classifier:
|
||||
self.__pipe = rcompose(self.__estimator_adapter, self.__label_mapper)
|
||||
|
||||
def predict(self, batch: Union[np.array, Tuple[Image]]) -> List[str]:
|
||||
|
||||
if isinstance(batch, np.ndarray) and batch.shape[0] == 0:
|
||||
if not isinstance(batch, tuple) and batch.shape[0] == 0:
|
||||
return []
|
||||
|
||||
return self.__pipe(batch)
|
||||
return list(self.__pipe(batch))
|
||||
|
||||
def __call__(self, batch: np.array) -> List[str]:
|
||||
logger.debug("Classifier.predict")
|
||||
40
image_prediction/config.py
Normal file
40
image_prediction/config.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""Implements a config object with dot-indexing syntax."""
|
||||
|
||||
|
||||
from envyaml import EnvYAML
|
||||
|
||||
from image_prediction.locations import CONFIG_FILE
|
||||
|
||||
|
||||
def _get_item_and_maybe_make_dotindexable(container, item):
|
||||
ret = container[item]
|
||||
return DotIndexable(ret) if isinstance(ret, dict) else ret
|
||||
|
||||
|
||||
class DotIndexable:
|
||||
def __init__(self, x):
|
||||
self.x = x
|
||||
|
||||
def __getattr__(self, item):
|
||||
return _get_item_and_maybe_make_dotindexable(self.x, item)
|
||||
|
||||
def __repr__(self):
|
||||
return self.x.__repr__()
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.__getattr__(item)
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self, config_path):
|
||||
self.__config = EnvYAML(config_path)
|
||||
|
||||
def __getattr__(self, item):
|
||||
if item in self.__config:
|
||||
return _get_item_and_maybe_make_dotindexable(self.__config, item)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.__getattr__(item)
|
||||
|
||||
|
||||
CONFIG = Config(CONFIG_FILE)
|
||||
@ -3,17 +3,17 @@ from funcy import juxt
|
||||
from image_prediction.classifier.classifier import Classifier
|
||||
from image_prediction.classifier.image_classifier import ImageClassifier
|
||||
from image_prediction.compositor.compositor import TransformerCompositor
|
||||
from image_prediction.encoder.encoders.hash_encoder import HashEncoder
|
||||
from image_prediction.estimator.adapter.adapter import EstimatorAdapter
|
||||
from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier
|
||||
from image_prediction.formatter.formatters.camel_case import Snake2CamelCaseKeyFormatter
|
||||
from image_prediction.formatter.formatters.enum import EnumFormatter
|
||||
from image_prediction.transformer.transformers.response import ResponseTransformer
|
||||
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
|
||||
from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
|
||||
from image_prediction.model_loader.loader import ModelLoader
|
||||
from image_prediction.model_loader.loaders.mlflow import MlflowConnector
|
||||
from image_prediction.redai_adapter.mlflow import MlflowModelReader
|
||||
from image_prediction.transformer.transformers.coordinate.pdfnet import PDFNetCoordinateTransformer
|
||||
from image_prediction.transformer.transformers.response import ResponseTransformer
|
||||
|
||||
|
||||
def get_mlflow_model_loader(mlruns_dir):
|
||||
@ -32,12 +32,16 @@ def get_extractor(**kwargs):
|
||||
return image_extractor
|
||||
|
||||
|
||||
def get_extractor_classifier(model_loader, model_identifier, **kwargs):
|
||||
extractor_classifier = ExtractorClassifier(
|
||||
get_extractor(**kwargs), get_image_classifier(model_loader, model_identifier)
|
||||
)
|
||||
|
||||
return extractor_classifier
|
||||
|
||||
|
||||
def get_formatter():
|
||||
formatter = TransformerCompositor(
|
||||
PDFNetCoordinateTransformer(), EnumFormatter(), ResponseTransformer(), Snake2CamelCaseKeyFormatter()
|
||||
)
|
||||
return formatter
|
||||
|
||||
|
||||
def get_encoder():
|
||||
return HashEncoder()
|
||||
@ -32,11 +32,3 @@ class IntentionalTestException(RuntimeError):
|
||||
|
||||
class InvalidBox(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ParsingError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BadXref(ValueError):
|
||||
pass
|
||||
@ -0,0 +1,32 @@
|
||||
from itertools import chain
|
||||
from typing import Iterable
|
||||
|
||||
from funcy import chunks
|
||||
|
||||
from image_prediction.classifier.image_classifier import ImageClassifier
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor
|
||||
|
||||
|
||||
class ExtractorClassifier:
|
||||
"""This class is responsible for orchestrating the pairing of classifications and image metadata. It extracts images
|
||||
from an object and classifies them. Then it ties the classification together with the metadata. It returns an
|
||||
iterable of dictionaries, where each dictionary has a field 'label' for the classification and possibly additional
|
||||
fields for metadata -- metadata could be void.
|
||||
"""
|
||||
|
||||
def __init__(self, image_extractor: ImageExtractor, image_classifier: ImageClassifier):
|
||||
self.classifier = image_classifier
|
||||
self.extractor = image_extractor
|
||||
|
||||
def __process_batch(self, batch):
|
||||
images, metadata = zip(*batch)
|
||||
|
||||
predictions = self.classifier(images)
|
||||
responses = ({"classification": prd, **mdt} for prd, mdt in zip(predictions, metadata))
|
||||
return responses
|
||||
|
||||
def __call__(self, obj, **kwargs) -> Iterable[dict]:
|
||||
image_metadata_pairs = self.extractor(obj, **kwargs)
|
||||
batches = chunks(16, image_metadata_pairs)
|
||||
predictions = chain.from_iterable(map(self.__process_batch, batches))
|
||||
return predictions
|
||||
@ -1,20 +1,39 @@
|
||||
import multiprocessing
|
||||
import traceback
|
||||
from typing import Callable
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from prometheus_client import generate_latest, CollectorRegistry, Summary
|
||||
|
||||
from image_prediction.utils import get_logger
|
||||
from image_prediction.utils.process_wrapping import wrap_in_process
|
||||
|
||||
logger = get_logger()
|
||||
|
||||
|
||||
def run_in_process(func):
|
||||
p = multiprocessing.Process(target=func)
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
|
||||
def wrap_in_process(func_to_wrap):
|
||||
def build_function_and_run_in_process(*args, **kwargs):
|
||||
def func():
|
||||
try:
|
||||
result = func_to_wrap(*args, **kwargs)
|
||||
return_dict["result"] = result
|
||||
except:
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
manager = multiprocessing.Manager()
|
||||
return_dict = manager.dict()
|
||||
run_in_process(func)
|
||||
return return_dict.get("result", None)
|
||||
|
||||
return build_function_and_run_in_process
|
||||
|
||||
|
||||
def make_prediction_server(predict_fn: Callable):
|
||||
app = Flask(__name__)
|
||||
registry = CollectorRegistry(auto_describe=True)
|
||||
metric = Summary(
|
||||
f"redactmanager_imageClassification_seconds", f"Time spent on image-service classification.", registry=registry
|
||||
)
|
||||
|
||||
@app.route("/ready", methods=["GET"])
|
||||
def ready():
|
||||
@ -34,8 +53,6 @@ def make_prediction_server(predict_fn: Callable):
|
||||
return response
|
||||
|
||||
@app.route("/predict", methods=["POST"])
|
||||
@app.route("/", methods=["POST"])
|
||||
@metric.time()
|
||||
def predict():
|
||||
|
||||
# Tensorflow does not free RAM. Workaround: Run prediction function (which instantiates a model) in sub-process.
|
||||
@ -45,7 +62,7 @@ def make_prediction_server(predict_fn: Callable):
|
||||
logger.info("Analysing...")
|
||||
predictions = predict_fn_wrapped(request.data)
|
||||
|
||||
if predictions is not None:
|
||||
if predictions:
|
||||
response = jsonify(predictions)
|
||||
logger.info("Analysis completed.")
|
||||
return response
|
||||
@ -53,8 +70,4 @@ def make_prediction_server(predict_fn: Callable):
|
||||
logger.error("Analysis failed.")
|
||||
return __failure()
|
||||
|
||||
@app.route("/prometheus", methods=["GET"])
|
||||
def prometheus():
|
||||
return generate_latest(registry=registry)
|
||||
|
||||
return app
|
||||
181
image_prediction/image_extractor/extractors/parsable.py
Normal file
181
image_prediction/image_extractor/extractors/parsable.py
Normal file
@ -0,0 +1,181 @@
|
||||
import atexit
|
||||
import io
|
||||
from functools import partial, lru_cache
|
||||
from itertools import chain, starmap, filterfalse, repeat
|
||||
from operator import itemgetter
|
||||
from typing import List
|
||||
|
||||
import fitz
|
||||
from PIL import Image
|
||||
from funcy import rcompose, merge, zipdict
|
||||
from tqdm import tqdm
|
||||
|
||||
from image_prediction.image_extractor.extractor import ImageExtractor, ImageMetadataPair
|
||||
from image_prediction.info import Info
|
||||
from image_prediction.stitching.stitching import stitch_pairs
|
||||
from image_prediction.stitching.utils import validate_box_coords, validate_box_size
|
||||
|
||||
|
||||
class ParsablePDFImageExtractor(ImageExtractor):
|
||||
def __init__(self, verbose=False, tolerance=0):
|
||||
"""
|
||||
|
||||
Args:
|
||||
verbose: Whether to show progressbar
|
||||
tolerance: The tolerance in pixels for the distance images beyond which they will not be stitched together
|
||||
"""
|
||||
self.doc: fitz.fitz.Document = None
|
||||
self.verbose = verbose
|
||||
self.tolerance = tolerance
|
||||
|
||||
def extract(self, pdf: bytes, page_range: range = None):
|
||||
self.doc = fitz.Document(stream=pdf)
|
||||
|
||||
pages = extract_pages(self.doc, page_range) if page_range else self.doc
|
||||
|
||||
image_metadata_pairs = chain.from_iterable(
|
||||
map(
|
||||
self.__process_images_on_page,
|
||||
tqdm(pages, desc="Extracting", disable=not self.verbose, total=len(page_range) if page_range else None),
|
||||
)
|
||||
)
|
||||
|
||||
yield from image_metadata_pairs
|
||||
|
||||
def __process_images_on_page(self, page: fitz.fitz.Page):
|
||||
images = get_images_on_page(self.doc, page)
|
||||
metadata = get_metadata_for_images_on_page(self.doc, page)
|
||||
clear_caches()
|
||||
|
||||
image_metadata_pairs = starmap(ImageMetadataPair, filter(all, zip(images, metadata)))
|
||||
image_metadata_pairs = stitch_pairs(list(image_metadata_pairs), tolerance=self.tolerance)
|
||||
|
||||
yield from image_metadata_pairs
|
||||
|
||||
|
||||
def extract_pages(doc, page_range):
|
||||
page_range = range(page_range.start + 1, page_range.stop + 1)
|
||||
pages = map(doc.load_page, page_range)
|
||||
|
||||
return pages
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_images_on_page(doc, page: fitz.Page):
|
||||
image_infos = get_image_infos(page)
|
||||
xrefs = map(itemgetter("xref"), image_infos)
|
||||
images = map(partial(xref_to_image, doc), xrefs)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def get_metadata_for_images_on_page(doc, page: fitz.Page):
|
||||
|
||||
metadata = map(get_image_metadata, get_image_infos(page))
|
||||
metadata = validate_coords_and_passthrough(metadata)
|
||||
|
||||
metadata = filter_out_tiny_images(metadata)
|
||||
metadata = validate_size_and_passthrough(metadata)
|
||||
|
||||
metadata = add_page_metadata(page, metadata)
|
||||
|
||||
metadata = add_alpha_channel_info(doc, page, metadata)
|
||||
|
||||
yield from metadata
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_image_infos(page: fitz.Page) -> List[dict]:
|
||||
return page.get_image_info(xrefs=True)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def xref_to_image(doc, xref) -> Image:
|
||||
maybe_image = load_image_handle_from_xref(doc, xref)
|
||||
return Image.open(io.BytesIO(maybe_image["image"])) if maybe_image else None
|
||||
|
||||
|
||||
def get_image_metadata(image_info):
|
||||
|
||||
x1, y1, x2, y2 = map(rounder, image_info["bbox"])
|
||||
|
||||
width = abs(x2 - x1)
|
||||
height = abs(y2 - y1)
|
||||
|
||||
return {
|
||||
Info.WIDTH: width,
|
||||
Info.HEIGHT: height,
|
||||
Info.X1: x1,
|
||||
Info.X2: x2,
|
||||
Info.Y1: y1,
|
||||
Info.Y2: y2,
|
||||
}
|
||||
|
||||
|
||||
def validate_coords_and_passthrough(metadata):
|
||||
yield from map(validate_box_coords, metadata)
|
||||
|
||||
|
||||
def filter_out_tiny_images(metadata):
|
||||
return filterfalse(tiny, metadata)
|
||||
|
||||
|
||||
def validate_size_and_passthrough(metadata):
|
||||
yield from map(validate_box_size, metadata)
|
||||
|
||||
|
||||
def add_page_metadata(page, metadata):
|
||||
return map(partial(merge, get_page_metadata(page)), metadata)
|
||||
|
||||
|
||||
def add_alpha_channel_info(doc, page, metadata):
|
||||
xrefs = map(itemgetter("xref"), get_image_infos(page))
|
||||
alpha = map(partial(has_alpha_channel, doc), xrefs)
|
||||
alpha = ({Info.ALPHA: a} for a in alpha)
|
||||
# alpha = map(dict, zip(repeat(Info.ALPHA), alpha))
|
||||
metadata = starmap(merge, zip(alpha, metadata))
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_image_handle_from_xref(doc, xref):
|
||||
return doc.extract_image(xref)
|
||||
|
||||
|
||||
rounder = rcompose(round, int)
|
||||
|
||||
|
||||
def get_page_metadata(page):
|
||||
page_width, page_height = map(rounder, page.mediabox_size)
|
||||
|
||||
return {
|
||||
Info.PAGE_WIDTH: page_width,
|
||||
Info.PAGE_HEIGHT: page_height,
|
||||
Info.PAGE_IDX: page.number,
|
||||
}
|
||||
|
||||
|
||||
def has_alpha_channel(doc, xref):
|
||||
|
||||
maybe_image = load_image_handle_from_xref(doc, xref)
|
||||
maybe_smask = maybe_image["smask"] if maybe_image else None
|
||||
|
||||
if maybe_smask:
|
||||
return any([doc.extract_image(maybe_smask) is not None, bool(fitz.Pixmap(doc, maybe_smask).alpha)])
|
||||
else:
|
||||
return bool(fitz.Pixmap(doc, xref).alpha)
|
||||
|
||||
|
||||
def tiny(metadata):
|
||||
return metadata[Info.WIDTH] * metadata[Info.HEIGHT] <= 4
|
||||
|
||||
|
||||
def clear_caches():
|
||||
get_image_infos.cache_clear()
|
||||
load_image_handle_from_xref.cache_clear()
|
||||
get_images_on_page.cache_clear()
|
||||
xref_to_image.cache_clear()
|
||||
|
||||
|
||||
atexit.register(clear_caches)
|
||||
@ -12,4 +12,3 @@ class Info(Enum):
|
||||
Y1 = "y1"
|
||||
Y2 = "y2"
|
||||
ALPHA = "alpha"
|
||||
XREF = "xref"
|
||||
17
image_prediction/locations.py
Normal file
17
image_prediction/locations.py
Normal file
@ -0,0 +1,17 @@
|
||||
"""Defines constant paths relative to the module root path."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
MODULE_DIR = Path(__file__).resolve().parents[0]
|
||||
|
||||
PACKAGE_ROOT_DIR = MODULE_DIR.parents[0]
|
||||
|
||||
CONFIG_FILE = PACKAGE_ROOT_DIR / "config.yaml"
|
||||
|
||||
BANNER_FILE = PACKAGE_ROOT_DIR / "banner.txt"
|
||||
|
||||
DATA_DIR = PACKAGE_ROOT_DIR / "data"
|
||||
|
||||
MLRUNS_DIR = str(DATA_DIR / "mlruns")
|
||||
|
||||
TEST_DATA_DIR = PACKAGE_ROOT_DIR / "test" / "data"
|
||||
26
image_prediction/pipeline.py
Normal file
26
image_prediction/pipeline.py
Normal file
@ -0,0 +1,26 @@
|
||||
import os
|
||||
|
||||
from funcy import rcompose
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
from image_prediction.default_objects import get_extractor_classifier, get_formatter, get_mlflow_model_loader
|
||||
from image_prediction.locations import MLRUNS_DIR
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
|
||||
|
||||
def load_pipeline(**kwargs):
|
||||
model_loader = get_mlflow_model_loader(MLRUNS_DIR)
|
||||
model_identifier = CONFIG.service.run_id
|
||||
|
||||
pipeline = Pipeline(model_loader, model_identifier, **kwargs)
|
||||
|
||||
return pipeline
|
||||
|
||||
|
||||
class Pipeline:
|
||||
def __init__(self, model_loader, model_identifier, **kwargs):
|
||||
self.pipe = rcompose(get_extractor_classifier(model_loader, model_identifier, **kwargs), get_formatter())
|
||||
|
||||
def __call__(self, pdf: bytes, page_range: range = None):
|
||||
yield from self.pipe(pdf, page_range=page_range)
|
||||
@ -3,7 +3,7 @@ from functools import reduce
|
||||
from typing import Iterable, Callable, List
|
||||
|
||||
from PIL import Image
|
||||
from funcy import juxt, first, rest, rcompose, rpartial, complement, ilen
|
||||
from funcy import juxt, first, rest, rcompose, rpartial
|
||||
|
||||
from image_prediction.image_extractor.extractor import ImageMetadataPair
|
||||
from image_prediction.info import Info
|
||||
@ -13,22 +13,8 @@ from image_prediction.stitching.utils import make_coord_getter, flatten_groups_o
|
||||
from image_prediction.utils.generic import until
|
||||
|
||||
|
||||
def make_merger_sentinel():
|
||||
def no_new_mergers(pairs):
|
||||
nonlocal number_of_pairs_so_far
|
||||
|
||||
number_of_pairs_now = len(pairs)
|
||||
|
||||
if number_of_pairs_now == number_of_pairs_so_far:
|
||||
return True
|
||||
|
||||
else:
|
||||
number_of_pairs_so_far = number_of_pairs_now
|
||||
return False
|
||||
|
||||
number_of_pairs_so_far = -1
|
||||
|
||||
return no_new_mergers
|
||||
def no_new_merges(pairs1, pairs2):
|
||||
return len(pairs1) == len(pairs2)
|
||||
|
||||
|
||||
def merge_along_both_axes(pairs: Iterable[ImageMetadataPair], tolerance=0) -> List[ImageMetadataPair]:
|
||||
@ -86,8 +72,7 @@ def merge_group_horizontally(group: Iterable[ImageMetadataPair], tolerance=0):
|
||||
|
||||
def merge_group(group: Iterable[ImageMetadataPair], direction, tolerance=0):
|
||||
reduce_group = make_merger_aggregator(direction, tolerance=tolerance)
|
||||
no_new_mergers = make_merger_sentinel()
|
||||
return until(no_new_mergers, reduce_group, group)
|
||||
return until(no_new_merges, reduce_group, group)
|
||||
|
||||
|
||||
def make_merger_aggregator(axis, tolerance=0) -> Callable[[Iterable[ImageMetadataPair]], Iterable[ImageMetadataPair]]:
|
||||
@ -3,13 +3,11 @@ from typing import Iterable, List
|
||||
from funcy import rpartial
|
||||
|
||||
from image_prediction.image_extractor.extractor import ImageMetadataPair
|
||||
from image_prediction.stitching.merging import merge_along_both_axes, make_merger_sentinel
|
||||
from image_prediction.stitching.merging import merge_along_both_axes, no_new_merges
|
||||
from image_prediction.utils.generic import until
|
||||
|
||||
|
||||
def stitch_pairs(pairs: Iterable[ImageMetadataPair], tolerance=0) -> List[ImageMetadataPair]:
|
||||
"""Given a collection of image-metadata pairs from the same pages, combines all pairs that constitute adjacent
|
||||
images."""
|
||||
no_new_mergers = make_merger_sentinel()
|
||||
merge = rpartial(merge_along_both_axes, tolerance)
|
||||
return until(no_new_mergers, merge, pairs)
|
||||
return until(no_new_merges, rpartial(merge_along_both_axes, tolerance), pairs)
|
||||
@ -1,5 +1,4 @@
|
||||
import math
|
||||
from dynaconf import Dynaconf
|
||||
from operator import itemgetter
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
@ -16,45 +15,38 @@ class ResponseTransformer(Transformer):
|
||||
|
||||
|
||||
def build_image_info(data: dict) -> dict:
|
||||
page_width, page_height, x1, x2, y1, y2, width, height, alpha = itemgetter(
|
||||
"page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height", "alpha"
|
||||
def compute_geometric_quotient():
|
||||
page_area_sqrt = math.sqrt(abs(page_width * page_height))
|
||||
image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
|
||||
return image_area_sqrt / page_area_sqrt
|
||||
|
||||
page_width, page_height, x1, x2, y1, y2, width, height = itemgetter(
|
||||
"page_width", "page_height", "x1", "x2", "y1", "y2", "width", "height"
|
||||
)(data)
|
||||
|
||||
classification = data["classification"]
|
||||
label = classification["label"]
|
||||
representation = data["representation"]
|
||||
|
||||
geometric_quotient = round(compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1), 4)
|
||||
|
||||
min_image_to_page_quotient_breached = bool(
|
||||
geometric_quotient < get_class_specific_filter_value(label, CONFIG, "image_to_page_quotient", "min")
|
||||
)
|
||||
max_image_to_page_quotient_breached = bool(
|
||||
geometric_quotient > get_class_specific_filter_value(label, CONFIG, "image_to_page_quotient", "max")
|
||||
)
|
||||
quotient = round(compute_geometric_quotient(), 4)
|
||||
|
||||
min_image_to_page_quotient_breached = bool(quotient < CONFIG.filters.image_to_page_quotient.min)
|
||||
max_image_to_page_quotient_breached = bool(quotient > CONFIG.filters.image_to_page_quotient.max)
|
||||
min_image_width_to_height_quotient_breached = bool(
|
||||
width / height < get_class_specific_filter_value(label, CONFIG, "image_width_to_height_quotient", "min")
|
||||
width / height < CONFIG.filters.image_width_to_height_quotient.min
|
||||
)
|
||||
max_image_width_to_height_quotient_breached = bool(
|
||||
width / height > get_class_specific_filter_value(label, CONFIG, "image_width_to_height_quotient", "max")
|
||||
width / height > CONFIG.filters.image_width_to_height_quotient.max
|
||||
)
|
||||
|
||||
min_confidence_breached = bool(
|
||||
max(classification["probabilities"].values())
|
||||
< get_class_specific_filter_value(label, CONFIG, "confidence", "min")
|
||||
)
|
||||
classification = data["classification"]
|
||||
|
||||
min_confidence_breached = bool(max(classification["probabilities"].values()) < CONFIG.filters.min_confidence)
|
||||
|
||||
image_info = {
|
||||
"classification": classification,
|
||||
"representation": representation,
|
||||
"position": {"x1": x1, "x2": x2, "y1": y1, "y2": y2, "pageNumber": data["page_idx"] + 1},
|
||||
"geometry": {"width": width, "height": height},
|
||||
"alpha": alpha,
|
||||
"filters": {
|
||||
"geometry": {
|
||||
"imageSize": {
|
||||
"quotient": geometric_quotient,
|
||||
"quotient": quotient,
|
||||
"tooLarge": max_image_to_page_quotient_breached,
|
||||
"tooSmall": min_image_to_page_quotient_breached,
|
||||
},
|
||||
@ -78,23 +70,3 @@ def build_image_info(data: dict) -> dict:
|
||||
}
|
||||
|
||||
return image_info
|
||||
|
||||
|
||||
def compute_geometric_quotient(page_width, page_height, x2, x1, y2, y1):
|
||||
page_area_sqrt = math.sqrt(abs(page_width * page_height))
|
||||
image_area_sqrt = math.sqrt(abs(x2 - x1) * abs(y2 - y1))
|
||||
return image_area_sqrt / page_area_sqrt
|
||||
|
||||
|
||||
def get_class_specific_filter_value(label: str, settings: Dynaconf, filter_type: str, bound: str = None):
|
||||
try:
|
||||
value = (
|
||||
settings.filters.overrides[label][filter_type][bound]
|
||||
if bound
|
||||
else settings.filters.overrides[label][filter_type]
|
||||
)
|
||||
logger.warning(f"Using {label=} specific {bound=} {filter_type=} {value=}.")
|
||||
except KeyError:
|
||||
value = settings.filters[filter_type][bound]
|
||||
|
||||
return value
|
||||
@ -4,7 +4,8 @@ from image_prediction.locations import BANNER_FILE
|
||||
|
||||
|
||||
def show_banner():
|
||||
banner = load_banner()
|
||||
with open(BANNER_FILE) as f:
|
||||
banner = "\n" + "".join(f.readlines()) + "\n"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.propagate = False
|
||||
@ -18,9 +19,3 @@ def show_banner():
|
||||
logger.addHandler(handler)
|
||||
|
||||
logger.info(banner)
|
||||
|
||||
|
||||
def load_banner():
|
||||
with open(BANNER_FILE) as f:
|
||||
banner = "\n" + "".join(f.readlines()) + "\n"
|
||||
return banner
|
||||
7
image_prediction/utils/generic.py
Normal file
7
image_prediction/utils/generic.py
Normal file
@ -0,0 +1,7 @@
|
||||
from funcy import iterate, chunks
|
||||
|
||||
|
||||
def until(cond, func, *args, **kwargs):
|
||||
for a, b in chunks(2, iterate(func, *args, **kwargs)):
|
||||
if cond(a, b):
|
||||
return a
|
||||
29
image_prediction/utils/logger.py
Normal file
29
image_prediction/utils/logger.py
Normal file
@ -0,0 +1,29 @@
|
||||
import logging
|
||||
|
||||
from image_prediction.config import CONFIG
|
||||
|
||||
logging.basicConfig()
|
||||
|
||||
|
||||
def make_logger_getter():
|
||||
logger = logging.getLogger("imclf")
|
||||
logger.propagate = False
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setLevel(CONFIG.service.logging_level)
|
||||
|
||||
log_format = "%(asctime)s %(levelname)-8s %(message)s"
|
||||
formatter = logging.Formatter(log_format, datefmt="%Y-%m-%d %H:%M:%S")
|
||||
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
logger.setLevel(CONFIG.service.logging_level)
|
||||
|
||||
def get_logger():
|
||||
return logger
|
||||
|
||||
return get_logger
|
||||
|
||||
|
||||
get_logger = make_logger_getter()
|
||||
@ -56,8 +56,7 @@ def annotate_image(doc, image_info):
|
||||
|
||||
def init():
|
||||
PDFNet.Initialize(
|
||||
# "Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
|
||||
"Knecon AG:OEM:DDA-R::WL+:AMS(20270129):EA5FDFB23C7F36B9C2AE606F4F0D9197DE1FB649119F9730B622FABEF5C7"
|
||||
"Knecon AG(en.knecon.swiss):OEM:DDA-R::WL+:AMS(20211029):BECC974307DAB4F34B513BC9B2531B24496F6FCB83CD8AC574358A959730B622FABEF5C7"
|
||||
)
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user