added floating point conversion to label mapper for json serializability

This commit is contained in:
Matthias Bisping 2022-03-30 19:36:45 +02:00
parent 20718996bd
commit b3e1604ecc
4 changed files with 42 additions and 3 deletions

View File

@ -1,7 +1,9 @@
from functools import partial
from operator import itemgetter from operator import itemgetter
from typing import Mapping, Iterable from typing import Mapping, Iterable
import numpy as np import numpy as np
from funcy import rcompose
from image_prediction.exceptions import UnexpectedLabelFormat from image_prediction.exceptions import UnexpectedLabelFormat
from image_prediction.label_mapper.mapper import LabelMapper from image_prediction.label_mapper.mapper import LabelMapper
@ -10,6 +12,9 @@ from image_prediction.label_mapper.mapper import LabelMapper
class ProbabilityMapper(LabelMapper): class ProbabilityMapper(LabelMapper):
def __init__(self, labels: Mapping[int, str]): def __init__(self, labels: Mapping[int, str]):
self.__labels = labels self.__labels = labels
# String conversion in the middle due to floating point precision issues.
# See: https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
self.__rounder = rcompose(lambda d: round(d, 4), str, float)
def __validate_array_label_format(self, probabilities: np.ndarray) -> None: def __validate_array_label_format(self, probabilities: np.ndarray) -> None:
if not len(probabilities) == len(self.__labels): if not len(probabilities) == len(self.__labels):
@ -19,7 +24,7 @@ class ProbabilityMapper(LabelMapper):
def __map_array(self, probabilities: np.ndarray) -> dict: def __map_array(self, probabilities: np.ndarray) -> dict:
self.__validate_array_label_format(probabilities) self.__validate_array_label_format(probabilities)
cls2prob = dict(sorted(zip(self.__labels, probabilities), key=itemgetter(1), reverse=True)) cls2prob = dict(sorted(zip(self.__labels, list(map(self.__rounder, probabilities))), key=itemgetter(1), reverse=True))
most_likely = [*cls2prob][0] most_likely = [*cls2prob][0]
return {"label": most_likely, "probabilities": cls2prob} return {"label": most_likely, "probabilities": cls2prob}

View File

@ -9,6 +9,7 @@ from image_prediction.estimator.adapter.adapter import EstimatorAdapter
from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier from image_prediction.extractor_classifier.extractor_classifier import ExtractorClassifier
from image_prediction.formatter.formatters.info_formatter import EnumFormatter from image_prediction.formatter.formatters.info_formatter import EnumFormatter
from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor from image_prediction.image_extractor.extractors.parsable import ParsablePDFImageExtractor
from image_prediction.label_mapper.mappers.probability import ProbabilityMapper
from image_prediction.locations import MLRUNS_DIR from image_prediction.locations import MLRUNS_DIR
from image_prediction.model_loader.loader import ModelLoader from image_prediction.model_loader.loader import ModelLoader
from image_prediction.model_loader.loaders.mlflow import MlflowConnector from image_prediction.model_loader.loaders.mlflow import MlflowConnector
@ -21,7 +22,8 @@ def get_image_classifier():
model_loader = ModelLoader(MlflowConnector(MlflowModelReader(MLRUNS_DIR))) model_loader = ModelLoader(MlflowConnector(MlflowModelReader(MLRUNS_DIR)))
model = model_loader.load_model(CONFIG.service.run_id) model = model_loader.load_model(CONFIG.service.run_id)
classes = model_loader.load_classes(CONFIG.service.run_id) classes = model_loader.load_classes(CONFIG.service.run_id)
classifier = Classifier(EstimatorAdapter(model), classes) label_mapper = ProbabilityMapper(classes)
classifier = Classifier(EstimatorAdapter(model), label_mapper)
image_classifier = ImageClassifier(classifier) image_classifier = ImageClassifier(classifier)
return image_classifier return image_classifier

29
scripts/run_pipeline.py Normal file
View File

@ -0,0 +1,29 @@
import argparse
import json
from image_prediction.pipeline import Pipeline
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("pdf")
args = parser.parse_args()
return args
def main(args):
pipeline = Pipeline()
with open(args.pdf, "rb") as f:
predictions = pipeline(f.read())
for prd in predictions:
print(prd)
if __name__ == "__main__":
args = parse_args()
main(args)

View File

@ -1,6 +1,7 @@
import random import random
import string import string
import tempfile import tempfile
from functools import partial
from itertools import starmap from itertools import starmap
from operator import itemgetter from operator import itemgetter
@ -8,6 +9,7 @@ import fpdf
import numpy as np import numpy as np
import pytest import pytest
from PIL import Image from PIL import Image
from funcy import rcompose
from image_prediction.classifier.classifier import Classifier from image_prediction.classifier.classifier import Classifier
from image_prediction.classifier.image_classifier import ImageClassifier from image_prediction.classifier.image_classifier import ImageClassifier
@ -196,10 +198,11 @@ def batch_of_expected_numeric_labels(batch_size, classes):
@pytest.fixture @pytest.fixture
def batch_of_expected_label_to_probability_mappings(batch_of_expected_probability_arrays, classes): def batch_of_expected_label_to_probability_mappings(batch_of_expected_probability_arrays, classes):
def map_probabilities(probabilities): def map_probabilities(probabilities):
lbl2prob = dict(sorted(zip(classes, probabilities), key=itemgetter(1), reverse=True)) lbl2prob = dict(sorted(zip(classes, map(rounder, probabilities)), key=itemgetter(1), reverse=True))
most_likely = [*lbl2prob][0] most_likely = [*lbl2prob][0]
return {"label": most_likely, "probabilities": lbl2prob} return {"label": most_likely, "probabilities": lbl2prob}
rounder = rcompose(partial(np.round, decimals=4), float)
return list(map(map_probabilities, batch_of_expected_probability_arrays)) return list(map(map_probabilities, batch_of_expected_probability_arrays))