adapt service-container to image-service-v2

2022-03-01 14:17:37 +01:00 · 2022-03-01 14:17:37 +01:00 · 684aca364f
commit 684aca364f
parent a4fa73deaa
26 changed files with 128 additions and 214 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -1,3 +1,4 @@
 /config.local
 /tmp
 /cache
+/plots/
--- a/.dvc/config
+++ b/.dvc/config
@ -1,6 +1,5 @@
 [core]
    remote = vector
-    autostage = true
 ['remote "vector"']
-    url = ssh://vector.iqser.com/research/detr_server/
+    url = ssh://vector.iqser.com/research/image_service/
    port = 22
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,3 @@
-[submodule "incl/detr"]
-	path = incl/detr
-	url = ssh://git@git.iqser.com:2222/rr/detr.git
+[submodule "incl/redai_image"]
+	path = incl/redai_image
+	url = ssh://git@git.iqser.com:2222/rr/redai_image.git
--- a/8
+++ b/8
@ -1,13 +1,13 @@
 ARG BASE_ROOT="nexus.iqser.com:5001/red/"
 ARG VERSION_TAG="latest"

-FROM ${BASE_ROOT}fb-detr-base:${VERSION_TAG}
+FROM ${BASE_ROOT}image-prediction-base:${VERSION_TAG}

 WORKDIR /app/service

 COPY ./src ./src
-COPY ./incl/detr ./incl/detr
-COPY ./fb_detr ./fb_detr
+COPY ./incl/redai_image ./incl/redai_image
+COPY image_prediction ./image_prediction
 COPY ./setup.py ./setup.py
 COPY ./requirements.txt ./requirements.txt
 COPY ./config.yaml ./config.yaml
@ -17,7 +17,7 @@ RUN python3 -m pip install -r requirements.txt

 RUN python3 -m pip install -e .

-WORKDIR /app/service/incl/detr
+WORKDIR /app/service/incl/redai_image
 RUN python3 -m pip install -e .
 WORKDIR /app/service

--- a/README.md
+++ b/README.md
@ -7,7 +7,7 @@ setup/docker.sh

 Build head image
 ```bash
-docker build -f Dockerfile -t detr-server . --build-arg BASE_ROOT=""
+docker build -f Dockerfile -t image-prediction . --build-arg BASE_ROOT=""
 ```

 ### Usage
@ -15,7 +15,7 @@ docker build -f Dockerfile -t detr-server . --build-arg BASE_ROOT=""
 Shell 1

 ```bash
-docker run --rm --net=host --rm detr-server
+docker run --rm --net=host --rm image-prediction
 ```

 Shell 2
--- a/bamboo-specs/src/main/java/buildjob/PlanSpec.java
+++ b/bamboo-specs/src/main/java/buildjob/PlanSpec.java
@ -33,8 +33,8 @@ import com.atlassian.bamboo.specs.model.task.ScriptTaskProperties.Location;
@BambooSpec
 public class PlanSpec {

-    private static final String SERVICE_NAME = "fb-detr";
-    private static final String SERVICE_NAME_BASE = "fb-detr-base";
+    private static final String SERVICE_NAME = "image-prediction";
+    private static final String SERVICE_NAME_BASE = "image-prediction-base";

    private static final String SERVICE_KEY = SERVICE_NAME.toUpperCase().replaceAll("-","").replaceAll("_","");

@ -72,7 +72,7 @@ public class PlanSpec {
    return new Plan(
            project(),
            SERVICE_NAME, new BambooKey(SERVICE_KEY))
-            .description("Docker build for fb-detr.")
+            .description("Docker build for image-prediction.")
            // .variables()
            .stages(new Stage("Build Stage")
              .jobs(
@ -86,7 +86,7 @@ public class PlanSpec {
                        .checkoutItems(new CheckoutItem().defaultRepository()),
                    new VcsCheckoutTask()
                        .description("Checkout detr research repository.")
-                        .checkoutItems(new CheckoutItem().repository("RR / DETR").path("DETR")),
+                        .checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
                    new ScriptTask()
                        .description("Set config and keys.")
                        .inlineBody("mkdir -p ~/.ssh\n" +
@ -112,8 +112,8 @@ public class PlanSpec {
                        .description("Checkout default repository.")
                        .checkoutItems(new CheckoutItem().defaultRepository()),
                    new VcsCheckoutTask()
-                        .description("Checkout detr research repository.")
-                        .checkoutItems(new CheckoutItem().repository("RR / DETR").path("DETR")),
+                        .description("Checkout redai_image repository.")
+                        .checkoutItems(new CheckoutItem().repository("RR / redai_image").path("redai_image")),
                    new ScriptTask()
                        .description("Set config and keys.")
                        .inlineBody("mkdir -p ~/.ssh\n" +
@ -174,7 +174,7 @@ public class PlanSpec {
                        .volume("/etc/maven/settings.xml", "/usr/share/maven/ref/settings.xml")
                        .volume("/var/run/docker.sock", "/var/run/docker.sock"))))
            .linkedRepositories("RR / " + SERVICE_NAME)
-            .linkedRepositories("RR / DETR")
+            .linkedRepositories("RR / redai_image")
            .triggers(new BitbucketServerTrigger())
            .planBranchManagement(new PlanBranchManagement()
              .createForVcsBranch()
--- a/data/.gitignore
+++ b/data/.gitignore
@ -1 +0,0 @@
-/checkpoint.pth
--- a/data/checkpoint.pth.dvc
+++ b/data/checkpoint.pth.dvc
@ -1,4 +0,0 @@
-outs:
- md5: 9face65530febd41a0722e0513da2264
-  size: 496696129
-  path: checkpoint.pth
--- a/data/hub/checkpoints/.gitignore
+++ b/data/hub/checkpoints/.gitignore
@ -1 +0,0 @@
-/resnet50-0676ba61.pth
--- a/data/hub/checkpoints/resnet50-0676ba61.pth.dvc
+++ b/data/hub/checkpoints/resnet50-0676ba61.pth.dvc
@ -1,4 +0,0 @@
-outs:
- md5: b94941323912291bb67db6fdb1d80c11
-  size: 102530333
-  path: resnet50-0676ba61.pth
--- a/fb_detr/predictor.py
+++ b/fb_detr/predictor.py
@ -1,162 +0,0 @@
-import argparse
-import logging
-from itertools import compress, starmap, chain
-from operator import itemgetter
-from pathlib import Path
-from typing import Iterable
-
-import torch
-from iteration_utilities import starfilter
-from tqdm import tqdm
-
-from detr.models import build_model
-from detr.prediction import get_args_parser, infer
-from fb_detr.config import CONFIG
-from fb_detr.utils.non_max_supprs import greedy_non_max_supprs
-from fb_detr.utils.stream import stream_pages, chunk_iterable, get_page_count
-
-
-def load_model(checkpoint_path):
-
-    parser = argparse.ArgumentParser(parents=[get_args_parser()])
-    args = parser.parse_args()
-
-    if args.output_dir:
-        Path(args.output_dir).mkdir(parents=True, exist_ok=True)
-
-    device = torch.device(CONFIG.estimator.device)
-
-    model, _, _ = build_model(args)
-
-    checkpoint = torch.load(checkpoint_path, map_location="cpu")
-    model.load_state_dict(checkpoint["model"])
-
-    model.to(device)
-
-    return model
-
-
-class Predictor:
-    def __init__(self, checkpoint_path, classes=None, rejection_class=None):
-        self.model = load_model(checkpoint_path)
-        self.classes = classes
-        self.rejection_class = rejection_class
-
-    @staticmethod
-    def __format_boxes(boxes):
-
-        keys = "x1", "y1", "x2", "y2"
-
-        x1s = boxes[:, 0].tolist()
-        y1s = boxes[:, 1].tolist()
-        x2s = boxes[:, 2].tolist()
-        y2s = boxes[:, 3].tolist()
-
-        boxes = [dict(zip(keys, vs)) for vs in zip(x1s, y1s, x2s, y2s)]
-
-        return boxes
-
-    @staticmethod
-    def __normalize_to_list(maybe_multiple):
-        return maybe_multiple if isinstance(maybe_multiple, tuple) else tuple([maybe_multiple])
-
-    def __format_classes(self, classes):
-        if self.classes:
-            return self.__normalize_to_list(itemgetter(*classes.tolist())(self.classes))
-        else:
-            return classes.tolist()
-
-    @staticmethod
-    def __format_probas(probas):
-        return probas.max(axis=1).tolist()
-
-    def __format_prediction(self, predictions: dict):
-
-        boxes, classes, probas = itemgetter("bboxes", "classes", "probas")(predictions)
-
-        if len(boxes):
-            boxes = self.__format_boxes(boxes)
-            classes = self.__format_classes(classes)
-            probas = self.__format_probas(probas)
-        else:
-            boxes, classes, probas = [], [], []
-
-        predictions["bboxes"] = boxes
-        predictions["classes"] = classes
-        predictions["probas"] = probas
-
-        return predictions
-
-    def __filter_predictions_for_image(self, predictions):
-
-        boxes, classes, probas = itemgetter("bboxes", "classes", "probas")(predictions)
-
-        if boxes:
-            keep = map(lambda c: c != self.rejection_class, classes)
-            compressed = list(compress(zip(boxes, classes, probas), keep))
-            boxes, classes, probas = map(list, zip(*compressed)) if compressed else ([], [], [])
-            predictions["bboxes"] = boxes
-            predictions["classes"] = classes
-            predictions["probas"] = probas
-
-        return predictions
-
-    def filter_predictions(self, predictions):
-        def detections_present(_, prediction):
-            return bool(prediction["classes"])
-
-        # TODO: set page_idx even when not filtering
-        def build_return_dict(page_idx, predictions):
-            return {"page_idx": page_idx, **predictions}
-
-        filtered_rejections = map(self.__filter_predictions_for_image, predictions)
-        filtered_no_detections = starfilter(detections_present, enumerate(filtered_rejections))
-        filtered_no_detections = starmap(build_return_dict, filtered_no_detections)
-
-        return filtered_no_detections
-
-    def format_predictions(self, outputs: Iterable):
-        return map(self.__format_prediction, outputs)
-
-    def __non_max_supprs(self, predictions):
-        predictions = map(greedy_non_max_supprs, predictions)
-        return predictions
-
-    def predict(self, images, threshold=None):
-
-        if not threshold:
-            threshold = CONFIG.estimator.threshold
-
-        predictions = infer(images, self.model, CONFIG.estimator.device, threshold)
-        predictions = self.format_predictions(predictions)
-        if self.rejection_class:
-            predictions = self.filter_predictions(predictions)
-
-        predictions = self.__non_max_supprs(predictions)
-
-        predictions = list(predictions)
-
-        return predictions
-
-    def predict_pdf(self, pdf: bytes):
-        def progress(generator):
-
-            page_count = get_page_count(pdf)
-            batch_count = int(page_count / CONFIG.service.batch_size)
-
-            yield from tqdm(
-                generator, total=batch_count, position=1, leave=True
-            ) if CONFIG.service.verbose else generator
-
-        def predict_batch(batch_idx, batch):
-            predictions = self.predict(batch)
-            for p in predictions:
-                p["page_idx"] += batch_idx
-
-            return predictions
-
-        page_stream = stream_pages(pdf)
-        page_batches = chunk_iterable(page_stream, CONFIG.service.batch_size)
-        predictions = list(chain(*starmap(predict_batch, progress(enumerate(page_batches)))))
-
-        return predictions
--- a/image_prediction/init.py
+++ b/image_prediction/init.py
--- a/image_prediction/config.py
+++ b/image_prediction/config.py
@ -3,7 +3,7 @@

 from envyaml import EnvYAML

-from fb_detr.locations import CONFIG_FILE
+from image_prediction.locations import CONFIG_FILE


 def _get_item_and_maybe_make_dotindexable(container, item):
--- a/image_prediction/locations.py
+++ b/image_prediction/locations.py
--- a/image_prediction/predictor.py
+++ b/image_prediction/predictor.py
@ -0,0 +1,89 @@
+import logging
+from operator import itemgetter
+
+from image_prediction.config import CONFIG
+
+
+class Predictor:
+    """`ModelHandle` wrapper. Forwards to wrapped model handle for prediction and produces structured output that is
+    interpretable independently of the wrapped model (e.g. with regard to a .classes_ attribute).
+    """
+
+    def __init__(self, model_handle: ModelHandle = None):
+        """Initializes a ServiceEstimator.
+
+        Args:
+            model_handle: ModelHandle object to forward to for prediction. By default, a model handle is loaded from the
+                mlflow database via CONFIG.service.run_id.
+        """
+        try:
+            if model_handle is None:
+                reader = MlflowModelReader(
+                    run_id=CONFIG.service.run_id, mlruns_dir=MLRUNS_DIR
+                )
+                # message_queue.put(text="Loading model...", level=logging.DEBUG)
+                self.model_handle = reader.get_model_handle(BASE_WEIGHTS)
+                # message_queue.put(text="Model loaded.", level=logging.DEBUG)
+            else:
+                self.model_handle = model_handle
+
+            self.classes = self.model_handle.model.classes_
+            self.classes_readable = np.array(self.model_handle.classes)
+            self.classes_readable_aligned = self.classes_readable[self.classes[list(range(len(self.classes)))]]
+        except Exception as e:
+            message_queue.put(
+                text="Service estimator initialization failed.",
+                exception=e,
+                level=logging.CRITICAL,
+                trace=traceback.format_exc(),
+            )
+
+    def __make_predictions_human_readable(self, probs: np.ndarray) -> List[Dict[str, float]]:
+        """Translates an n x m matrix of probabilities over classes into an n-element list of mappings from classes to
+        probabilities.
+
+        Args:
+            probs: probability matrix (items x classes)
+
+        Returns:
+            list of mappings from classes to probabilities.
+        """
+        classes = np.argmax(probs, axis=1)
+        classes = self.classes[classes]
+        classes_readable = [self.model_handle.classes[c] for c in classes]
+        return classes_readable
+
+    def predict(self, images: List, probabilities: bool = False, **kwargs):
+        """Gathers predictions for list of images. Assigns each image a class and optionally a probability distribution
+        over all classes.
+
+        Args:
+            images (List[PIL.Image]) : Images to gather predictions for.
+            probabilities: Whether to return dictionaries of the following form instead of strings:
+                {
+                    "class": predicted class,
+                    "probabilities": {
+                        "class 1" : class 1 probability,
+                        "class 2" : class 2 probability,
+                         ...
+                    }
+                }
+
+        Returns:
+            By default the return value is a list of classes (meaningful class name strings). Alternatively a list of
+            dictionaries with an additional probability field for estimated class probabilities per image can be
+            returned.
+        """
+        X = self.model_handle.prep_images(list(images))
+
+        probs_per_item = self.model_handle.model.predict_proba(X, **kwargs).astype(float)
+        classes = self.__make_predictions_human_readable(probs_per_item)
+
+        class2prob_per_item = [dict(zip(self.classes_readable_aligned, probs)) for probs in probs_per_item]
+        class2prob_per_item = [
+            dict(sorted(c2p.items(), key=itemgetter(1), reverse=True)) for c2p in class2prob_per_item
+        ]
+
+        predictions = [{"class": c, "probabilities": c2p} for c, c2p in zip(classes, class2prob_per_item)]
+
+        return predictions if probabilities else classes
--- a/image_prediction/utils/init.py
+++ b/image_prediction/utils/init.py
--- a/image_prediction/utils/estimator.py
+++ b/image_prediction/utils/estimator.py
@ -1,8 +1,8 @@
 import os

-from fb_detr.config import CONFIG
-from fb_detr.locations import DATA_DIR, TORCH_HOME
-from fb_detr.predictor import Predictor
+from image_prediction.config import CONFIG
+from image_prediction.locations import DATA_DIR, TORCH_HOME
+from image_prediction.predictor import Predictor


 def suppress_userwarnings():
--- a/image_prediction/utils/non_max_supprs.py
+++ b/image_prediction/utils/non_max_supprs.py
--- a/image_prediction/utils/stream.py
+++ b/image_prediction/utils/stream.py
--- a/incl/detr
+++ b/incl/detr
@ -1 +0,0 @@
-Subproject commit 772023801e4fd3deef7953f7f49fd6fb2bf60236
--- a/requirements.txt
+++ b/requirements.txt
@ -1,10 +1,3 @@
-torch==1.10.2
-numpy==1.22.1
-opencv-python-headless==4.5.5.62
-torchvision==0.11.3
-pycocotools==2.0.4
-scipy==1.7.3
-pdf2image==1.16.0
 Flask==2.0.2
 requests==2.27.1
 iteration-utilities==0.11.0
@ -12,5 +5,10 @@ dvc==2.9.3
 dvc[ssh]
 frozendict==2.3.0
 waitress==2.0.0
-envyaml==1.10.211231
+envyaml~=1.8.210417
 dependency-check==0.6.*
+envyaml~=1.8.210417
+mlflow~=1.20.2
+numpy~=1.19.3
+PDFNetPython3~=9.1.0
+tqdm~=4.62.2
--- a/scripts/flask_test.py
+++ b/scripts/flask_test.py
@ -9,7 +9,7 @@ app = Flask(__name__)

@app.before_first_request
 def init():
-    from fb_detr.predictor import Predictor
+    from image_prediction.predictor import Predictor

    global PRED

--- a/setup.py
+++ b/setup.py
@ -3,11 +3,11 @@
 from distutils.core import setup

 setup(
-    name="fb_detr",
+    name="image_prediction",
    version="0.1.0",
    description="",
    author="",
    author_email="",
    url="",
-    packages=["fb_detr"],
+    packages=["image_prediction"],
 )
--- a/setup/docker.sh
+++ b/setup/docker.sh
@ -5,11 +5,11 @@ python3 -m venv build_venv
 source build_venv/bin/activate
 python3 -m pip install --upgrade pip

-pip install dvc
-pip install 'dvc[ssh]'
-dvc pull
+#pip install dvc
+#pip install 'dvc[ssh]'
+#dvc pull

 git submodule update --init --recursive

-docker build -f Dockerfile_base -t fb-detr-base .
-docker build -f Dockerfile -t fb-detr .
+docker build -f Dockerfile_base -t image-prediction-base .
+docker build -f Dockerfile -t image-prediction .
--- a/sonar-project.properties
+++ b/sonar-project.properties
@ -1,4 +1,4 @@
-sonar.exclusions=bamboo-specs/**, **/test_data/**, **/detr/**
+sonar.exclusions=bamboo-specs/**, **/test_data/**
 sonar.c.file.suffixes=-
 sonar.cpp.file.suffixes=-
 sonar.objc.file.suffixes=-
--- a/src/serve.py
+++ b/src/serve.py
@ -5,8 +5,8 @@ from typing import Callable
 from flask import Flask, request, jsonify
 from waitress import serve

-from fb_detr.config import CONFIG
-from fb_detr.utils.estimator import suppress_userwarnings, initialize_predictor
+from image_prediction.config import CONFIG
+from image_prediction.utils.estimator import suppress_userwarnings, initialize_predictor


 def parse_args():
				`@ -1 +0,0 @@`
				`Subproject commit 772023801e4fd3deef7953f7f49fd6fb2bf60236`