Source code for meshed.tests.test_getitem

import pytest


from collections import Counter
from meshed import FuncNode
from meshed.dag import DAG
from pytest import fixture


def X_test(train_test_split):
    return train_test_split[1]


def y_test(train_test_split):
    return train_test_split[3]


def truth(y_test):  # to link up truth and test_y
    return y_test


[docs] def confusion_count(prediction, truth): """Get a dict containing the counts of all combinations of predicction and corresponding truth values.""" return Counter(zip(prediction, truth))
[docs] def prediction(predict_proba, threshold): """Get an array of predictions from thresholding the scores of predict_proba array.""" return list(map(lambda x: x >= threshold, predict_proba))
[docs] def predict_proba(model, X_test): """Get the prediction_proba scores of a model given some test data""" return model.predict_proba(X_test)
def _aligned_items(a, b): """Yield (k, a_value, b_value) triples for all k that are both a key of a and of b""" # reason for casting to dict is to make sure things like pd.Series use the right keys. # could also use k in a.keys() etc. to solve this. a = dict(a) b = dict(b) for k in a: if k in b: yield k, a[k], b[k]
[docs] def dot_product(a, b): """ >>> dot_product({'a': 1, 'b': 2, 'c': 3}, {'b': 4, 'c': -1, 'd': 'whatever'}) 5 """ return sum(ak * bk for _, ak, bk in _aligned_items(a, b))
[docs] def classifier_score(confusion_count, confusion_value): """Compute a score for a classifier that produced the `confusion_count`, based on the given `confusion_value`. Meant to be curried by fixing the confusion_value dict. The function is purposely general -- it is not specific to binary classifier outcomes, or even any classifier outcomes. It simply computes a normalized dot product, depending on the inputs keys to align values to multiply and considering a missing key as an expression of a null value. """ return dot_product(confusion_count, confusion_value) / sum(confusion_count.values())
@fixture def bigger_dag(): bigger_dag = DAG( [ classifier_score, confusion_count, prediction, predict_proba, X_test, y_test, truth, ] ) return bigger_dag def test_full_subgraph(bigger_dag): result = bigger_dag[['truth', 'prediction']:'confusion_count'] expected = 'DAG(func_nodes=[FuncNode(prediction,truth -> confusion_count_ -> confusion_count)], name=None)' assert result.__repr__() == expected