added floating point conversion to label mapper for json serializability

This commit is contained in:
Matthias Bisping 2022-03-30 19:36:45 +02:00
parent 20718996bd
commit b3e1604ecc
4 changed files with 42 additions and 3 deletions

View File

@ -1,7 +1,9 @@
from functools import partial
from operator import itemgetter
from typing import Mapping, Iterable
import numpy as np
from funcy import rcompose
from image_prediction.exceptions import UnexpectedLabelFormat
from image_prediction.label_mapper.mapper import LabelMapper
@ -10,6 +12,9 @@ from image_prediction.label_mapper.mapper import LabelMapper
class ProbabilityMapper(LabelMapper):
def __init__(self, labels: Mapping[int, str]):
self.__labels = labels
# String conversion in the middle due to floating point precision issues.
# See: https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
self.__rounder = rcompose(lambda d: round(d, 4), str, float)
def __validate_array_label_format(self, probabilities: np.ndarray) -> None:
if not len(probabilities) == len(self.__labels):
@ -19,7 +24,7 @@ class ProbabilityMapper(LabelMapper):
def __map_array(self, probabilities: np.ndarray) -> dict:
self.__validate_array_label_format(probabilities)
cls2prob = dict(sorted(zip(self.__labels, probabilities), key=itemgetter(1), reverse=True))
cls2prob = dict(sorted(zip(self.__labels, list(map(self.__rounder, probabilities))), key=itemgetter(1), reverse=True))
most_likely = [*cls2prob][0]
return {"label": most_likely, "probabilities": cls2prob}

View File

@ -9,6 +9,7 @@ from image_prediction.estimator.adapter.adapter import EstimatorAdapter
from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier
from image_prediction.formatter.formatters.info_formatter import EnumFormatter
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
from image_prediction.locations import MLRUNS_DIR
from image_prediction.model_loader.loader import ModelLoader
from image_prediction.model_loader.loaders.mlflow import MlflowConnector
@ -21,7 +22,8 @@ def get_image_classifier():
model_loader = ModelLoader(MlflowConnector(MlflowModelReader(MLRUNS_DIR)))
model = model_loader.load_model(CONFIG.service.run_id)
classes = model_loader.load_classes(CONFIG.service.run_id)
classifier = Classifier(EstimatorAdapter(model), classes)
label_mapper = ProbabilityMapper(classes)
classifier = Classifier(EstimatorAdapter(model), label_mapper)
image_classifier = ImageClassifier(classifier)
return image_classifier

29
scripts/run_pipeline.py Normal file
View File

@ -0,0 +1,29 @@
import argparse
import json
from image_prediction.pipeline import Pipeline
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf")
args = parser.parse_args()
return args
def main(args):
pipeline = Pipeline()
with open(args.pdf, "rb") as f:
predictions = pipeline(f.read())
for prd in predictions:
print(prd)
if __name__ == "__main__":
args = parse_args()
main(args)

View File

@ -1,6 +1,7 @@
import random
import string
import tempfile
from functools import partial
from itertools import starmap
from operator import itemgetter
@ -8,6 +9,7 @@ import fpdf
import numpy as np
import pytest
from PIL import Image
from funcy import rcompose
from image_prediction.classifier.classifier import Classifier
from image_prediction.classifier.image_classifier import ImageClassifier
@ -196,10 +198,11 @@ def batch_of_expected_numeric_labels(batch_size, classes):
@pytest.fixture
def batch_of_expected_label_to_probability_mappings(batch_of_expected_probability_arrays, classes):
def map_probabilities(probabilities):
lbl2prob = dict(sorted(zip(classes, probabilities), key=itemgetter(1), reverse=True))
lbl2prob = dict(sorted(zip(classes, map(rounder, probabilities)), key=itemgetter(1), reverse=True))
most_likely = [*lbl2prob][0]
return {"label": most_likely, "probabilities": lbl2prob}
rounder = rcompose(partial(np.round, decimals=4), float)
return list(map(map_probabilities, batch_of_expected_probability_arrays))