From b3e1604eccb60e04bcdc73148bfe5a9f9d0a29c9 Mon Sep 17 00:00:00 2001 From: Matthias Bisping Date: Wed, 30 Mar 2022 19:36:45 +0200 Subject: [PATCH] added floating point conversion to label mapper for json serializability --- .../label_mapper/mappers/probability.py | 7 ++++- image_prediction/pipeline.py | 4 ++- scripts/run_pipeline.py | 29 +++++++++++++++++++ test/unit_tests/conftest.py | 5 +++- 4 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 scripts/run_pipeline.py diff --git a/image_prediction/label_mapper/mappers/probability.py b/image_prediction/label_mapper/mappers/probability.py index 9808d5f..354d0c4 100644 --- a/image_prediction/label_mapper/mappers/probability.py +++ b/image_prediction/label_mapper/mappers/probability.py @@ -1,7 +1,9 @@ +from functools import partial from operator import itemgetter from typing import Mapping, Iterable import numpy as np +from funcy import rcompose from image_prediction.exceptions import UnexpectedLabelFormat from image_prediction.label_mapper.mapper import LabelMapper @@ -10,6 +12,9 @@ from image_prediction.label_mapper.mapper import LabelMapper class ProbabilityMapper(LabelMapper): def __init__(self, labels: Mapping[int, str]): self.__labels = labels + # String conversion in the middle due to floating point precision issues. + # See: https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly + self.__rounder = rcompose(lambda d: round(d, 4), str, float) def __validate_array_label_format(self, probabilities: np.ndarray) -> None: if not len(probabilities) == len(self.__labels): @@ -19,7 +24,7 @@ class ProbabilityMapper(LabelMapper): def __map_array(self, probabilities: np.ndarray) -> dict: self.__validate_array_label_format(probabilities) - cls2prob = dict(sorted(zip(self.__labels, probabilities), key=itemgetter(1), reverse=True)) + cls2prob = dict(sorted(zip(self.__labels, list(map(self.__rounder, probabilities))), key=itemgetter(1), reverse=True)) most_likely = [*cls2prob][0] return {"label": most_likely, "probabilities": cls2prob} diff --git a/image_prediction/pipeline.py b/image_prediction/pipeline.py index af4e416..92913b7 100644 --- a/image_prediction/pipeline.py +++ b/image_prediction/pipeline.py @@ -9,6 +9,7 @@ from image_prediction.estimator.adapter.adapter import EstimatorAdapter from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier from image_prediction.formatter.formatters.info_formatter import EnumFormatter from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor +from image_prediction.label_mapper.mappers.probability import ProbabilityMapper from image_prediction.locations import MLRUNS_DIR from image_prediction.model_loader.loader import ModelLoader from image_prediction.model_loader.loaders.mlflow import MlflowConnector @@ -21,7 +22,8 @@ def get_image_classifier(): model_loader = ModelLoader(MlflowConnector(MlflowModelReader(MLRUNS_DIR))) model = model_loader.load_model(CONFIG.service.run_id) classes = model_loader.load_classes(CONFIG.service.run_id) - classifier = Classifier(EstimatorAdapter(model), classes) + label_mapper = ProbabilityMapper(classes) + classifier = Classifier(EstimatorAdapter(model), label_mapper) image_classifier = ImageClassifier(classifier) return image_classifier diff --git a/scripts/run_pipeline.py b/scripts/run_pipeline.py new file mode 100644 index 0000000..9668ce9 --- /dev/null +++ b/scripts/run_pipeline.py @@ -0,0 +1,29 @@ +import argparse +import json + +from image_prediction.pipeline import Pipeline + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("pdf") + + args = parser.parse_args() + + return args + + +def main(args): + + pipeline = Pipeline() + + with open(args.pdf, "rb") as f: + predictions = pipeline(f.read()) + + for prd in predictions: + print(prd) + + +if __name__ == "__main__": + args = parse_args() + main(args) \ No newline at end of file diff --git a/test/unit_tests/conftest.py b/test/unit_tests/conftest.py index c7f399c..671ff3f 100644 --- a/test/unit_tests/conftest.py +++ b/test/unit_tests/conftest.py @@ -1,6 +1,7 @@ import random import string import tempfile +from functools import partial from itertools import starmap from operator import itemgetter @@ -8,6 +9,7 @@ import fpdf import numpy as np import pytest from PIL import Image +from funcy import rcompose from image_prediction.classifier.classifier import Classifier from image_prediction.classifier.image_classifier import ImageClassifier @@ -196,10 +198,11 @@ def batch_of_expected_numeric_labels(batch_size, classes): @pytest.fixture def batch_of_expected_label_to_probability_mappings(batch_of_expected_probability_arrays, classes): def map_probabilities(probabilities): - lbl2prob = dict(sorted(zip(classes, probabilities), key=itemgetter(1), reverse=True)) + lbl2prob = dict(sorted(zip(classes, map(rounder, probabilities)), key=itemgetter(1), reverse=True)) most_likely = [*lbl2prob][0] return {"label": most_likely, "probabilities": lbl2prob} + rounder = rcompose(partial(np.round, decimals=4), float) return list(map(map_probabilities, batch_of_expected_probability_arrays))