Compare commits

...

13 Commits

Author SHA1 Message Date
Julius Unverfehrt
1776e3083c blacckkkyykykykyk 2022-03-21 13:54:27 +01:00
Julius Unverfehrt
4c9e6c38bd add predicting as subprocess, add workaround for keras not working if the model was loaded in different process 2022-03-21 13:53:40 +01:00
Julius Unverfehrt
530de2ff89 refactor 2022-03-21 13:36:23 +01:00
Julius Unverfehrt
130d0e8b23 add minimal not working example for keras bug in multiprocess process 2022-03-21 13:34:54 +01:00
Julius Unverfehrt
2589598b05 test 2022-03-21 11:13:45 +01:00
Julius Unverfehrt
eb6f211f02 hardcoded test 2022-03-21 11:07:32 +01:00
Julius Unverfehrt
3e9bfac5cf test 2022-03-21 11:01:21 +01:00
Julius Unverfehrt
3d9c4d8856 change test 2022-03-21 10:57:03 +01:00
Julius Unverfehrt
58ca784d6c fix test 2022-03-21 10:21:38 +01:00
Julius Unverfehrt
6faad5ad5b add predictor test 2022-03-21 10:00:28 +01:00
Julius Unverfehrt
3fbca0ac23 refactor folder structure 2022-03-18 13:04:13 +01:00
Julius Unverfehrt
90e3058c71 add response test 2022-03-18 12:58:02 +01:00
Julius Unverfehrt
2a2deffd0b add test infrastructure 2022-03-18 12:56:32 +01:00
12 changed files with 244 additions and 4 deletions

54
.coveragerc Normal file
View File

@ -0,0 +1,54 @@
# .coveragerc to control coverage.py
[run]
branch = True
omit =
*/site-packages/*
*/distutils/*
*/test/*
*/__init__.py
*/setup.py
*/venv/*
*/env/*
*/build_venv/*
*/build_env/*
source =
image_prediction
src
relative_files = True
data_file = .coverage
[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
# Don't complain about missing debug-only code:
def __repr__
if self\.debug
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:
omit =
*/site-packages/*
*/distutils/*
*/test/*
*/__init__.py
*/setup.py
*/venv/*
*/env/*
*/build_venv/*
*/build_env/*
ignore_errors = True
[html]
directory = reports
[xml]
output = reports/coverage.xml

View File

@ -1,3 +1,4 @@
import multiprocessing
from typing import Callable
from flask import Flask, request, jsonify
@ -25,11 +26,32 @@ def make_prediction_server(predict_fn: Callable):
@app.route("/", methods=["POST"])
def predict():
pdf = request.data
def predict_fn_wrapper(pdf, return_dict):
return_dict["result"] = predict_fn(pdf)
def process():
# Tensorflow does not free RAM. Workaround is running model in process.
# https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
pdf = request.data
manager = multiprocessing.Manager()
return_dict = manager.dict()
p = multiprocessing.Process(
target=predict_fn_wrapper,
args=(
pdf,
return_dict,
),
)
p.start()
p.join()
try:
return dict(return_dict)["result"]
except KeyError:
raise
logger.debug("Running predictor on document...")
try:
predictions = predict_fn(pdf)
predictions = process()
response = jsonify(predictions)
logger.info("Analysis completed.")
return response

2
pytest.ini Normal file
View File

@ -0,0 +1,2 @@
[pytest]
norecursedirs = incl

View File

@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0
Pillow~=8.3.2
PyYAML~=5.4.1
scikit_learn~=0.24.2
pytest~=7.1.0

58
scripts/keras_MnWE.py Normal file
View File

@ -0,0 +1,58 @@
import multiprocessing
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
def process(predict_fn_wrapper):
# We observed memory doesn't get properly deallocated unless we do this:
manager = multiprocessing.Manager()
return_dict = manager.dict()
p = multiprocessing.Process(
target=predict_fn_wrapper,
args=(return_dict,),
)
p.start()
p.join()
try:
return dict(return_dict)["result"]
except KeyError:
pass
def make_model():
inputs = keras.Input(shape=(784,))
dense = layers.Dense(64, activation="relu")
x = dense(inputs)
outputs = layers.Dense(10)(x)
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=keras.optimizers.RMSprop(),
metrics=["accuracy"],
)
return model
def make_predict_fn():
# Keras bug: doesn't work in outer scope
model = make_model()
def predict(*args):
# model = make_model()
return model.predict(np.random.random(size=(1, 784)))
return predict
def make_predict_fn_wrapper(predict_fn):
def predict_fn_wrapper(return_dict):
return_dict["result"] = predict_fn()
return predict_fn_wrapper
if __name__ == "__main__":
predict_fn = make_predict_fn()
print(process(make_predict_fn_wrapper(predict_fn)))

View File

@ -12,13 +12,14 @@ logger = get_logger()
def main():
def predict(pdf):
# Keras model.predict stalls when model was loaded in different process
# https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
predictor = Predictor()
predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
response = build_response(predictions, metadata)
return response
predictor = Predictor()
logger.info("Predictor ready.")
prediction_server = make_prediction_server(predict)

0
test/__init__.py Normal file
View File

70
test/conftest.py Normal file
View File

@ -0,0 +1,70 @@
import os.path
import pytest
from image_prediction.predictor import Predictor
@pytest.fixture
def predictions():
return [
{
"class": "signature",
"probabilities": {
"signature": 1.0,
"logo": 9.150285377746546e-19,
"other": 4.374506412383356e-19,
"formula": 3.582569597002796e-24,
},
}
]
@pytest.fixture
def metadata():
return [
{
"page_height": 612.0,
"page_width": 792.0,
"height": 61.049999999999955,
"width": 139.35000000000002,
"page_idx": 8,
"x1": 63.5,
"x2": 202.85000000000002,
"y1": 472.0,
"y2": 533.05,
}
]
@pytest.fixture
def response():
return [
{
"classification": {
"label": "signature",
"probabilities": {"formula": 0.0, "logo": 0.0, "other": 0.0, "signature": 1.0},
},
"filters": {
"allPassed": True,
"geometry": {
"imageFormat": {"quotient": 2.282555282555285, "tooTall": False, "tooWide": False},
"imageSize": {"quotient": 0.13248234868245012, "tooLarge": False, "tooSmall": False},
},
"probability": {"unconfident": False},
},
"geometry": {"height": 61.049999999999955, "width": 139.35000000000002},
"position": {"pageNumber": 9, "x1": 63.5, "x2": 202.85000000000002, "y1": 472.0, "y2": 533.05},
}
]
@pytest.fixture
def predictor():
return Predictor()
@pytest.fixture
def test_pdf():
with open("./test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf", "rb") as f:
return f.read()

Binary file not shown.

View File

View File

@ -0,0 +1,26 @@
def test_predict_pdf_works(predictor, test_pdf):
# FIXME ugly test since there are '\n's in the dict with unknown heritage
predictions, metadata = predictor.predict_pdf(test_pdf)
predictions = [p for p in predictions][0]
assert predictions["class"] == "formula"
probabilities = predictions["probabilities"]
# Floating point precision problem for output so test only that keys exist not the values
assert all(key in probabilities for key in ("formula", "other", "signature", "logo"))
metadata = list(metadata)
metadata = dict(**metadata[0])
metadata.pop("document_filename") # temp filename cannot be tested
assert metadata == {
"px_width": 389.0,
"px_height": 389.0,
"width": 194.49999000000003,
"height": 194.49998999999997,
"x1": 320.861,
"x2": 515.36099,
"y1": 347.699,
"y2": 542.19899,
"page_width": 595.2800000000001,
"page_height": 841.89,
"page_rotation": 0,
"page_idx": 1,
"n_pages": 3,
}

View File

@ -0,0 +1,5 @@
from image_prediction.response import build_response
def test_build_response_returns_valid_response(predictions, metadata, response):
assert build_response(predictions, metadata) == response