Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1776e3083c | ||
|
|
4c9e6c38bd | ||
|
|
530de2ff89 | ||
|
|
130d0e8b23 | ||
|
|
2589598b05 | ||
|
|
eb6f211f02 | ||
|
|
3e9bfac5cf | ||
|
|
3d9c4d8856 | ||
|
|
58ca784d6c | ||
|
|
6faad5ad5b | ||
|
|
3fbca0ac23 | ||
|
|
90e3058c71 | ||
|
|
2a2deffd0b |
54
.coveragerc
Normal file
54
.coveragerc
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
# .coveragerc to control coverage.py
|
||||||
|
[run]
|
||||||
|
branch = True
|
||||||
|
omit =
|
||||||
|
*/site-packages/*
|
||||||
|
*/distutils/*
|
||||||
|
*/test/*
|
||||||
|
*/__init__.py
|
||||||
|
*/setup.py
|
||||||
|
*/venv/*
|
||||||
|
*/env/*
|
||||||
|
*/build_venv/*
|
||||||
|
*/build_env/*
|
||||||
|
source =
|
||||||
|
image_prediction
|
||||||
|
src
|
||||||
|
relative_files = True
|
||||||
|
data_file = .coverage
|
||||||
|
|
||||||
|
[report]
|
||||||
|
# Regexes for lines to exclude from consideration
|
||||||
|
exclude_lines =
|
||||||
|
# Have to re-enable the standard pragma
|
||||||
|
pragma: no cover
|
||||||
|
|
||||||
|
# Don't complain about missing debug-only code:
|
||||||
|
def __repr__
|
||||||
|
if self\.debug
|
||||||
|
|
||||||
|
# Don't complain if tests don't hit defensive assertion code:
|
||||||
|
raise AssertionError
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
# Don't complain if non-runnable code isn't run:
|
||||||
|
if 0:
|
||||||
|
if __name__ == .__main__.:
|
||||||
|
omit =
|
||||||
|
*/site-packages/*
|
||||||
|
*/distutils/*
|
||||||
|
*/test/*
|
||||||
|
*/__init__.py
|
||||||
|
*/setup.py
|
||||||
|
*/venv/*
|
||||||
|
*/env/*
|
||||||
|
*/build_venv/*
|
||||||
|
*/build_env/*
|
||||||
|
|
||||||
|
ignore_errors = True
|
||||||
|
|
||||||
|
[html]
|
||||||
|
directory = reports
|
||||||
|
|
||||||
|
[xml]
|
||||||
|
output = reports/coverage.xml
|
||||||
@ -1,3 +1,4 @@
|
|||||||
|
import multiprocessing
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from flask import Flask, request, jsonify
|
from flask import Flask, request, jsonify
|
||||||
@ -25,11 +26,32 @@ def make_prediction_server(predict_fn: Callable):
|
|||||||
|
|
||||||
@app.route("/", methods=["POST"])
|
@app.route("/", methods=["POST"])
|
||||||
def predict():
|
def predict():
|
||||||
pdf = request.data
|
def predict_fn_wrapper(pdf, return_dict):
|
||||||
|
return_dict["result"] = predict_fn(pdf)
|
||||||
|
|
||||||
|
def process():
|
||||||
|
# Tensorflow does not free RAM. Workaround is running model in process.
|
||||||
|
# https://stackoverflow.com/questions/39758094/clearing-tensorflow-gpu-memory-after-model-execution
|
||||||
|
pdf = request.data
|
||||||
|
manager = multiprocessing.Manager()
|
||||||
|
return_dict = manager.dict()
|
||||||
|
p = multiprocessing.Process(
|
||||||
|
target=predict_fn_wrapper,
|
||||||
|
args=(
|
||||||
|
pdf,
|
||||||
|
return_dict,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
p.start()
|
||||||
|
p.join()
|
||||||
|
try:
|
||||||
|
return dict(return_dict)["result"]
|
||||||
|
except KeyError:
|
||||||
|
raise
|
||||||
|
|
||||||
logger.debug("Running predictor on document...")
|
logger.debug("Running predictor on document...")
|
||||||
try:
|
try:
|
||||||
predictions = predict_fn(pdf)
|
predictions = process()
|
||||||
response = jsonify(predictions)
|
response = jsonify(predictions)
|
||||||
logger.info("Analysis completed.")
|
logger.info("Analysis completed.")
|
||||||
return response
|
return response
|
||||||
|
|||||||
2
pytest.ini
Normal file
2
pytest.ini
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
[pytest]
|
||||||
|
norecursedirs = incl
|
||||||
@ -19,3 +19,5 @@ PDFNetPython3~=9.1.0
|
|||||||
Pillow~=8.3.2
|
Pillow~=8.3.2
|
||||||
PyYAML~=5.4.1
|
PyYAML~=5.4.1
|
||||||
scikit_learn~=0.24.2
|
scikit_learn~=0.24.2
|
||||||
|
|
||||||
|
pytest~=7.1.0
|
||||||
58
scripts/keras_MnWE.py
Normal file
58
scripts/keras_MnWE.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import multiprocessing
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from tensorflow import keras
|
||||||
|
from tensorflow.keras import layers
|
||||||
|
|
||||||
|
|
||||||
|
def process(predict_fn_wrapper):
|
||||||
|
# We observed memory doesn't get properly deallocated unless we do this:
|
||||||
|
manager = multiprocessing.Manager()
|
||||||
|
return_dict = manager.dict()
|
||||||
|
p = multiprocessing.Process(
|
||||||
|
target=predict_fn_wrapper,
|
||||||
|
args=(return_dict,),
|
||||||
|
)
|
||||||
|
p.start()
|
||||||
|
p.join()
|
||||||
|
try:
|
||||||
|
return dict(return_dict)["result"]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def make_model():
|
||||||
|
inputs = keras.Input(shape=(784,))
|
||||||
|
dense = layers.Dense(64, activation="relu")
|
||||||
|
x = dense(inputs)
|
||||||
|
outputs = layers.Dense(10)(x)
|
||||||
|
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
|
||||||
|
model.compile(
|
||||||
|
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
||||||
|
optimizer=keras.optimizers.RMSprop(),
|
||||||
|
metrics=["accuracy"],
|
||||||
|
)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def make_predict_fn():
|
||||||
|
# Keras bug: doesn't work in outer scope
|
||||||
|
model = make_model()
|
||||||
|
|
||||||
|
def predict(*args):
|
||||||
|
# model = make_model()
|
||||||
|
return model.predict(np.random.random(size=(1, 784)))
|
||||||
|
|
||||||
|
return predict
|
||||||
|
|
||||||
|
|
||||||
|
def make_predict_fn_wrapper(predict_fn):
|
||||||
|
def predict_fn_wrapper(return_dict):
|
||||||
|
return_dict["result"] = predict_fn()
|
||||||
|
|
||||||
|
return predict_fn_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
predict_fn = make_predict_fn()
|
||||||
|
print(process(make_predict_fn_wrapper(predict_fn)))
|
||||||
@ -12,13 +12,14 @@ logger = get_logger()
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
def predict(pdf):
|
def predict(pdf):
|
||||||
|
# Keras model.predict stalls when model was loaded in different process
|
||||||
|
# https://stackoverflow.com/questions/42504669/keras-tensorflow-and-multiprocessing-in-python
|
||||||
|
predictor = Predictor()
|
||||||
predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
|
predictions, metadata = predictor.predict_pdf(pdf, verbose=CONFIG.service.progressbar)
|
||||||
response = build_response(predictions, metadata)
|
response = build_response(predictions, metadata)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
predictor = Predictor()
|
|
||||||
logger.info("Predictor ready.")
|
logger.info("Predictor ready.")
|
||||||
|
|
||||||
prediction_server = make_prediction_server(predict)
|
prediction_server = make_prediction_server(predict)
|
||||||
|
|||||||
0
test/__init__.py
Normal file
0
test/__init__.py
Normal file
70
test/conftest.py
Normal file
70
test/conftest.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import os.path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from image_prediction.predictor import Predictor
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def predictions():
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"class": "signature",
|
||||||
|
"probabilities": {
|
||||||
|
"signature": 1.0,
|
||||||
|
"logo": 9.150285377746546e-19,
|
||||||
|
"other": 4.374506412383356e-19,
|
||||||
|
"formula": 3.582569597002796e-24,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def metadata():
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"page_height": 612.0,
|
||||||
|
"page_width": 792.0,
|
||||||
|
"height": 61.049999999999955,
|
||||||
|
"width": 139.35000000000002,
|
||||||
|
"page_idx": 8,
|
||||||
|
"x1": 63.5,
|
||||||
|
"x2": 202.85000000000002,
|
||||||
|
"y1": 472.0,
|
||||||
|
"y2": 533.05,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def response():
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"classification": {
|
||||||
|
"label": "signature",
|
||||||
|
"probabilities": {"formula": 0.0, "logo": 0.0, "other": 0.0, "signature": 1.0},
|
||||||
|
},
|
||||||
|
"filters": {
|
||||||
|
"allPassed": True,
|
||||||
|
"geometry": {
|
||||||
|
"imageFormat": {"quotient": 2.282555282555285, "tooTall": False, "tooWide": False},
|
||||||
|
"imageSize": {"quotient": 0.13248234868245012, "tooLarge": False, "tooSmall": False},
|
||||||
|
},
|
||||||
|
"probability": {"unconfident": False},
|
||||||
|
},
|
||||||
|
"geometry": {"height": 61.049999999999955, "width": 139.35000000000002},
|
||||||
|
"position": {"pageNumber": 9, "x1": 63.5, "x2": 202.85000000000002, "y1": 472.0, "y2": 533.05},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def predictor():
|
||||||
|
return Predictor()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_pdf():
|
||||||
|
with open("./test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf", "rb") as f:
|
||||||
|
return f.read()
|
||||||
BIN
test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf
Normal file
BIN
test/test_data/f2dc689ca794fccb8cd38b95f2bf6ba9.pdf
Normal file
Binary file not shown.
0
test/unit_tests/__init__.py
Normal file
0
test/unit_tests/__init__.py
Normal file
26
test/unit_tests/test_predictor.py
Normal file
26
test/unit_tests/test_predictor.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
def test_predict_pdf_works(predictor, test_pdf):
|
||||||
|
# FIXME ugly test since there are '\n's in the dict with unknown heritage
|
||||||
|
predictions, metadata = predictor.predict_pdf(test_pdf)
|
||||||
|
predictions = [p for p in predictions][0]
|
||||||
|
assert predictions["class"] == "formula"
|
||||||
|
probabilities = predictions["probabilities"]
|
||||||
|
# Floating point precision problem for output so test only that keys exist not the values
|
||||||
|
assert all(key in probabilities for key in ("formula", "other", "signature", "logo"))
|
||||||
|
metadata = list(metadata)
|
||||||
|
metadata = dict(**metadata[0])
|
||||||
|
metadata.pop("document_filename") # temp filename cannot be tested
|
||||||
|
assert metadata == {
|
||||||
|
"px_width": 389.0,
|
||||||
|
"px_height": 389.0,
|
||||||
|
"width": 194.49999000000003,
|
||||||
|
"height": 194.49998999999997,
|
||||||
|
"x1": 320.861,
|
||||||
|
"x2": 515.36099,
|
||||||
|
"y1": 347.699,
|
||||||
|
"y2": 542.19899,
|
||||||
|
"page_width": 595.2800000000001,
|
||||||
|
"page_height": 841.89,
|
||||||
|
"page_rotation": 0,
|
||||||
|
"page_idx": 1,
|
||||||
|
"n_pages": 3,
|
||||||
|
}
|
||||||
5
test/unit_tests/test_response.py
Normal file
5
test/unit_tests/test_response.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
from image_prediction.response import build_response
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_response_returns_valid_response(predictions, metadata, response):
|
||||||
|
assert build_response(predictions, metadata) == response
|
||||||
Loading…
x
Reference in New Issue
Block a user