adapt service-container to image-service-v2

This commit is contained in:
cdietrich 2022-03-01 14:17:37 +01:00
parent a4fa73deaa
commit 684aca364f
26 changed files with 128 additions and 214 deletions

1
.dvc/.gitignore vendored
View File

@ -1,3 +1,4 @@
/config.local
/tmp
/cache
/plots/

View File

@ -1,6 +1,5 @@
[core]
remote = vector
autostage = true
['remote "vector"']
url = ssh://vector.iqser.com/research/detr_server/
url = ssh://vector.iqser.com/research/image_service/
port = 22

6
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "incl/detr"]
path = incl/detr
url = ssh://git@git.iqser.com:2222/rr/detr.git
[submodule "incl/redai_image"]
path = incl/redai_image
url = ssh://git@git.iqser.com:2222/rr/redai_image.git

View File

@ -1,13 +1,13 @@
ARG BASE_ROOT="nexus.iqser.com:5001/red/"
ARG VERSION_TAG="latest"
FROM ${BASE_ROOT}fb-detr-base:${VERSION_TAG}
FROM ${BASE_ROOT}image-prediction-base:${VERSION_TAG}
WORKDIR /app/service
COPY ./src ./src
COPY ./incl/detr ./incl/detr
COPY ./fb_detr ./fb_detr
COPY ./incl/redai_image ./incl/redai_image
COPY image_prediction ./image_prediction
COPY ./setup.py ./setup.py
COPY ./requirements.txt ./requirements.txt
COPY ./config.yaml ./config.yaml
@ -17,7 +17,7 @@ RUN python3 -m pip install -r requirements.txt
RUN python3 -m pip install -e .
WORKDIR /app/service/incl/detr
WORKDIR /app/service/incl/redai_image
RUN python3 -m pip install -e .
WORKDIR /app/service

View File

@ -7,7 +7,7 @@ setup/docker.sh
Build head image
```bash
docker build -f Dockerfile -t detr-server . --build-arg BASE_ROOT=""
docker build -f Dockerfile -t image-prediction . --build-arg BASE_ROOT=""
```
### Usage
@ -15,7 +15,7 @@ docker build -f Dockerfile -t detr-server . --build-arg BASE_ROOT=""
Shell 1
```bash
docker run --rm --net=host --rm detr-server
docker run --rm --net=host --rm image-prediction
```
Shell 2

View File

@ -33,8 +33,8 @@ import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
@BambooSpec
public class PlanSpec {
private static final String SERVICE_NAME = "fb-detr";
private static final String SERVICE_NAME_BASE = "fb-detr-base";
private static final String SERVICE_NAME = "image-prediction";
private static final String SERVICE_NAME_BASE = "image-prediction-base";
private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");
@ -72,7 +72,7 @@ public class PlanSpec {
return new Plan(
project(),
SERVICE_NAME, new BambooKey(SERVICE_KEY))
.description("Docker build for fb-detr.")
.description("Docker build for image-prediction.")
// .variables()
.stages(new Stage("Build Stage")
.jobs(
@ -86,7 +86,7 @@ public class PlanSpec {
.checkoutItems(new CheckoutItem().defaultRepository()),
new VcsCheckoutTask()
.description("Checkout detr research repository.")
.checkoutItems(new CheckoutItem().repository("RR / DETR").path("DETR")),
.checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
new ScriptTask()
.description("Set config and keys.")
.inlineBody("mkdir -p ~/.ssh\n" +
@ -112,8 +112,8 @@ public class PlanSpec {
.description("Checkout default repository.")
.checkoutItems(new CheckoutItem().defaultRepository()),
new VcsCheckoutTask()
.description("Checkout detr research repository.")
.checkoutItems(new CheckoutItem().repository("RR / DETR").path("DETR")),
.description("Checkout redai_image repository.")
.checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
new ScriptTask()
.description("Set config and keys.")
.inlineBody("mkdir -p ~/.ssh\n" +
@ -174,7 +174,7 @@ public class PlanSpec {
.volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
.volume("/var/run/docker.sock", "/var/run/docker.sock"))))
.linkedRepositories("RR / " + SERVICE_NAME)
.linkedRepositories("RR / DETR")
.linkedRepositories("RR / redai_image")
.triggers(new BitbucketServerTrigger())
.planBranchManagement(new PlanBranchManagement()
.createForVcsBranch()

1
data/.gitignore vendored
View File

@ -1 +0,0 @@
/checkpoint.pth

View File

@ -1,4 +0,0 @@
outs:
- md5: 9face65530febd41a0722e0513da2264
size: 496696129
path: checkpoint.pth

View File

@ -1 +0,0 @@
/resnet50-0676ba61.pth

View File

@ -1,4 +0,0 @@
outs:
- md5: b94941323912291bb67db6fdb1d80c11
size: 102530333
path: resnet50-0676ba61.pth

View File

@ -1,162 +0,0 @@
import argparse
import logging
from itertools import compress, starmap, chain
from operator import itemgetter
from pathlib import Path
from typing import Iterable
import torch
from iteration_utilities import starfilter
from tqdm import tqdm
from detr.models import build_model
from detr.prediction import get_args_parser, infer
from fb_detr.config import CONFIG
from fb_detr.utils.non_max_supprs import greedy_non_max_supprs
from fb_detr.utils.stream import stream_pages, chunk_iterable, get_page_count
def load_model(checkpoint_path):
parser = argparse.ArgumentParser(parents=[get_args_parser()])
args = parser.parse_args()
if args.output_dir:
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
device = torch.device(CONFIG.estimator.device)
model, _, _ = build_model(args)
checkpoint = torch.load(checkpoint_path, map_location="cpu")
model.load_state_dict(checkpoint["model"])
model.to(device)
return model
class Predictor:
def __init__(self, checkpoint_path, classes=None, rejection_class=None):
self.model = load_model(checkpoint_path)
self.classes = classes
self.rejection_class = rejection_class
@staticmethod
def __format_boxes(boxes):
keys = "x1", "y1", "x2", "y2"
x1s = boxes[:, 0].tolist()
y1s = boxes[:, 1].tolist()
x2s = boxes[:, 2].tolist()
y2s = boxes[:, 3].tolist()
boxes = [dict(zip(keys, vs)) for vs in zip(x1s, y1s, x2s, y2s)]
return boxes
@staticmethod
def __normalize_to_list(maybe_multiple):
return maybe_multiple if isinstance(maybe_multiple, tuple) else tuple([maybe_multiple])
def __format_classes(self, classes):
if self.classes:
return self.__normalize_to_list(itemgetter(*classes.tolist())(self.classes))
else:
return classes.tolist()
@staticmethod
def __format_probas(probas):
return probas.max(axis=1).tolist()
def __format_prediction(self, predictions: dict):
boxes, classes, probas = itemgetter("bboxes", "classes", "probas")(predictions)
if len(boxes):
boxes = self.__format_boxes(boxes)
classes = self.__format_classes(classes)
probas = self.__format_probas(probas)
else:
boxes, classes, probas = [], [], []
predictions["bboxes"] = boxes
predictions["classes"] = classes
predictions["probas"] = probas
return predictions
def __filter_predictions_for_image(self, predictions):
boxes, classes, probas = itemgetter("bboxes", "classes", "probas")(predictions)
if boxes:
keep = map(lambda c: c != self.rejection_class, classes)
compressed = list(compress(zip(boxes, classes, probas), keep))
boxes, classes, probas = map(list, zip(*compressed)) if compressed else ([], [], [])
predictions["bboxes"] = boxes
predictions["classes"] = classes
predictions["probas"] = probas
return predictions
def filter_predictions(self, predictions):
def detections_present(_, prediction):
return bool(prediction["classes"])
# TODO: set page_idx even when not filtering
def build_return_dict(page_idx, predictions):
return {"page_idx": page_idx, **predictions}
filtered_rejections = map(self.__filter_predictions_for_image, predictions)
filtered_no_detections = starfilter(detections_present, enumerate(filtered_rejections))
filtered_no_detections = starmap(build_return_dict, filtered_no_detections)
return filtered_no_detections
def format_predictions(self, outputs: Iterable):
return map(self.__format_prediction, outputs)
def __non_max_supprs(self, predictions):
predictions = map(greedy_non_max_supprs, predictions)
return predictions
def predict(self, images, threshold=None):
if not threshold:
threshold = CONFIG.estimator.threshold
predictions = infer(images, self.model, CONFIG.estimator.device, threshold)
predictions = self.format_predictions(predictions)
if self.rejection_class:
predictions = self.filter_predictions(predictions)
predictions = self.__non_max_supprs(predictions)
predictions = list(predictions)
return predictions
def predict_pdf(self, pdf: bytes):
def progress(generator):
page_count = get_page_count(pdf)
batch_count = int(page_count / CONFIG.service.batch_size)
yield from tqdm(
generator, total=batch_count, position=1, leave=True
) if CONFIG.service.verbose else generator
def predict_batch(batch_idx, batch):
predictions = self.predict(batch)
for p in predictions:
p["page_idx"] += batch_idx
return predictions
page_stream = stream_pages(pdf)
page_batches = chunk_iterable(page_stream, CONFIG.service.batch_size)
predictions = list(chain(*starmap(predict_batch, progress(enumerate(page_batches)))))
return predictions

View File

@ -3,7 +3,7 @@
from envyaml import EnvYAML
from fb_detr.locations import CONFIG_FILE
from image_prediction.locations import CONFIG_FILE
def _get_item_and_maybe_make_dotindexable(container, item):

View File

@ -0,0 +1,89 @@
import logging
from operator import itemgetter
from image_prediction.config import CONFIG
class Predictor:
"""`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
"""
def __init__(self, model_handle: ModelHandle = None):
"""Initializes a ServiceEstimator.
Args:
model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
mlflow database via CONFIG.service.run_id.
"""
try:
if model_handle is None:
reader = MlflowModelReader(
run_id=CONFIG.service.run_id, mlruns_dir=MLRUNS_DIR
)
# message_queue.put(text="Loading model...", level=logging.DEBUG)
self.model_handle = reader.get_model_handle(BASE_WEIGHTS)
# message_queue.put(text="Model loaded.", level=logging.DEBUG)
else:
self.model_handle = model_handle
self.classes = self.model_handle.model.classes_
self.classes_readable = np.array(self.model_handle.classes)
self.classes_readable_aligned = self.classes_readable[self.classes[list(range(len(self.classes)))]]
except Exception as e:
message_queue.put(
text="Service estimator initialization failed.",
exception=e,
level=logging.CRITICAL,
trace=traceback.format_exc(),
)
def __make_predictions_human_readable(self, probs: np.ndarray) -> List[Dict[str, float]]:
"""Translates an n x m matrix of probabilities over classes into an n-element list of mappings from classes to
probabilities.
Args:
probs: probability matrix (items x classes)
Returns:
list of mappings from classes to probabilities.
"""
classes = np.argmax(probs, axis=1)
classes = self.classes[classes]
classes_readable = [self.model_handle.classes[c] for c in classes]
return classes_readable
def predict(self, images: List, probabilities: bool = False, **kwargs):
"""Gathers predictions for list of images. Assigns each image a class and optionally a probability distribution
over all classes.
Args:
images (List[PIL.Image]) : Images to gather predictions for.
probabilities: Whether to return dictionaries of the following form instead of strings:
{
"class": predicted class,
"probabilities": {
"class 1" : class 1 probability,
"class 2" : class 2 probability,
...
}
}
Returns:
By default the return value is a list of classes (meaningful class name strings). Alternatively a list of
dictionaries with an additional probability field for estimated class probabilities per image can be
returned.
"""
X = self.model_handle.prep_images(list(images))
probs_per_item = self.model_handle.model.predict_proba(X, **kwargs).astype(float)
classes = self.__make_predictions_human_readable(probs_per_item)
class2prob_per_item = [dict(zip(self.classes_readable_aligned, probs)) for probs in probs_per_item]
class2prob_per_item = [
dict(sorted(c2p.items(), key=itemgetter(1), reverse=True)) for c2p in class2prob_per_item
]
predictions = [{"class": c, "probabilities": c2p} for c, c2p in zip(classes, class2prob_per_item)]
return predictions if probabilities else classes

View File

@ -1,8 +1,8 @@
import os
from fb_detr.config import CONFIG
from fb_detr.locations import DATA_DIR, TORCH_HOME
from fb_detr.predictor import Predictor
from image_prediction.config import CONFIG
from image_prediction.locations import DATA_DIR, TORCH_HOME
from image_prediction.predictor import Predictor
def suppress_userwarnings():

@ -1 +0,0 @@
Subproject commit 772023801e4fd3deef7953f7f49fd6fb2bf60236

View File

@ -1,10 +1,3 @@
torch==1.10.2
numpy==1.22.1
opencv-python-headless==4.5.5.62
torchvision==0.11.3
pycocotools==2.0.4
scipy==1.7.3
pdf2image==1.16.0
Flask==2.0.2
requests==2.27.1
iteration-utilities==0.11.0
@ -12,5 +5,10 @@ dvc==2.9.3
dvc[ssh]
frozendict==2.3.0
waitress==2.0.0
envyaml==1.10.211231
envyaml~=1.8.210417
dependency-check==0.6.*
envyaml~=1.8.210417
mlflow~=1.20.2
numpy~=1.19.3
PDFNetPython3~=9.1.0
tqdm~=4.62.2

View File

@ -9,7 +9,7 @@ app = Flask(__name__)
@app.before_first_request
def init():
from fb_detr.predictor import Predictor
from image_prediction.predictor import Predictor
global PRED

View File

@ -3,11 +3,11 @@
from distutils.core import setup
setup(
name="fb_detr",
name="image_prediction",
version="0.1.0",
description="",
author="",
author_email="",
url="",
packages=["fb_detr"],
packages=["image_prediction"],
)

View File

@ -5,11 +5,11 @@ python3 -m venv build_venv
source build_venv/bin/activate
python3 -m pip install --upgrade pip
pip install dvc
pip install 'dvc[ssh]'
dvc pull
#pip install dvc
#pip install 'dvc[ssh]'
#dvc pull
git submodule update --init --recursive
docker build -f Dockerfile_base -t fb-detr-base .
docker build -f Dockerfile -t fb-detr .
docker build -f Dockerfile_base -t image-prediction-base .
docker build -f Dockerfile -t image-prediction .

View File

@ -1,4 +1,4 @@
sonar.exclusions=bamboo-specs/**, **/test_data/**, **/detr/**
sonar.exclusions=bamboo-specs/**, **/test_data/**
sonar.c.file.suffixes=-
sonar.cpp.file.suffixes=-
sonar.objc.file.suffixes=-

View File

@ -5,8 +5,8 @@ from typing import Callable
from flask import Flask, request, jsonify
from waitress import serve
from fb_detr.config import CONFIG
from fb_detr.utils.estimator import suppress_userwarnings, initialize_predictor
from image_prediction.config import CONFIG
from image_prediction.utils.estimator import suppress_userwarnings, initialize_predictor
def parse_args():