from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# create a synthetic dataset
X, y = make_blobs(random_state=0)

# split data and labels into a training and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# instantiate a model and fit it to the training set
logreg = LogisticRegression(solver='liblinear', multi_class='auto').fit(X_train, y_train)

# evaluate the model on the test set
print("Test set score: {:.2f}".format(logreg.score(X_test, y_test)))

Test set score: 0.88


from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

iris = load_iris()
logreg = LogisticRegression(solver='liblinear', multi_class='auto')

scores = cross_val_score(logreg, iris.data, iris.target, cv = 3)
print("Cross-validation scores: {}".format(scores))

Cross-validation scores: [0.96 0.96 0.94]


scores = cross_val_score(logreg, iris.data, iris.target)
print("Cross-validation scores: {}".format(scores))

Cross-validation scores: [1.         0.96666667 0.93333333 0.9        1.        ]


print("Average cross-validation score: {:.2f}".format(scores.mean()))

Average cross-validation score: 0.96


from sklearn.model_selection import cross_validate
res = cross_validate(logreg, iris.data, iris.target, cv=5,
                     return_train_score=True)
display(res)

{'fit_time': array([0.00200438, 0.00100017, 0.00099945, 0.00099897, 0.00199986]),
 'score_time': array([0.00099516, 0.00099921, 0.00099945, 0.00099778, 0.        ]),
 'test_score': array([1.        , 0.96666667, 0.93333333, 0.9       , 1.        ]),
 'train_score': array([0.95      , 0.96666667, 0.96666667, 0.975     , 0.95833333])}


import pandas as pd
res_df = pd.DataFrame(res)
display(res_df)
print("Mean times and scores:\n", res_df.mean())

Mean times and scores:
 fit_time       0.001401
score_time     0.000798
test_score     0.960000
train_score    0.963333
dtype: float64


from sklearn.datasets import load_iris
iris = load_iris()
print("Iris labels:\n{}".format(iris.target))

Iris labels:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


from sklearn.model_selection import KFold
kfold = KFold(n_splits=3)
print("Cross-validation scores:\n{}".format(
    cross_val_score(logreg, iris.data, iris.target, cv=kfold)))

Cross-validation scores:
[0. 0. 0.]


kfold = KFold(n_splits=3, shuffle=True, random_state=0)
print("Cross-validation scores:\n{}".format(
    cross_val_score(logreg, iris.data, iris.target, cv=kfold)))

Cross-validation scores:
[0.9  0.96 0.96]


from sklearn.model_selection import train_test_split 

# split data into train+validation set and test set
X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data, iris.target, random_state=0)

# split train+validation set into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_trainval, y_trainval, random_state=1)

print("Size of training set: {}   size of validation set: {}   size of test set:"
      " {}\n".format(X_train.shape[0], X_valid.shape[0], X_test.shape[0]))

Size of training set: 84   size of validation set: 28   size of test set: 38


from sklearn.tree import DecisionTreeClassifier

best_score = 0

# make a grid for parameter tuning
for depth in [1,2,3,4,5]:
    for rs in [0,5,10,20,40]:
    # for each combination of parameters train a decisiontree
        tree = DecisionTreeClassifier(max_depth=depth, random_state=rs)
        tree.fit(X_train, y_train)
        # evaluate for the validation set
        score = tree.score(X_valid, y_valid)
        # store the best scores
        if score > best_score:
            best_score = score
            best_parameters = {'max_depth': depth, 'random_state': rs}

print("Best parameters: ", best_parameters)

Best parameters:  {'max_depth': 3, 'random_state': 0}


# rebuild a model on the combined training and validation set,
# and evaluate it on the test set
tree = DecisionTreeClassifier(**best_parameters)
tree.fit(X_trainval, y_trainval)
training_score = tree.score(X_train, y_train)
test_score = tree.score(X_test, y_test)

print("Training set score with best parameters : {:.2f}".format(training_score))
print("Best score on validation set: {:.2f}".format(best_score))
print("Test set score with best parameters: {:.2f}".format(test_score))

Training set score with best parameters : 1.00
Best score on validation set: 0.93
Test set score with best parameters: 0.97


# make a grid for parameter tuning
for depth in [1,2,3,4,5]:
    for rs in [0,5,10,20,40]:
    # for each combination of parameters train a decisiontree
        tree = DecisionTreeClassifier(max_depth=depth, random_state=rs)
        # Extra step : perform cross-validation here (trainval will be split in training en validation several times)
        scores = cross_val_score(tree, X_trainval, y_trainval, cv = 5)
        # compute mean cross-validation accuracy
        score = scores.mean()
        # store the best scores
        if score > best_score:
            best_score = score
            best_parameters = {'max_depth': depth, 'random_state': rs}

print("Best parameters: ", best_parameters)

Best parameters:  {'max_depth': 3, 'random_state': 0}


tree = DecisionTreeClassifier(**best_parameters)
tree.fit(X_trainval, y_trainval)
training_score = tree.score(X_trainval, y_trainval)
test_score = tree.score(X_test, y_test)

print("Training set score with best parameters : {:.2f}".format(training_score))
print("Best score on validation set: {:.2f}".format(best_score))
print("Test set score with best parameters: {:.2f}".format(test_score))

Training set score with best parameters : 0.98
Best score on validation set: 0.96
Test set score with best parameters: 0.97


param_grid = {'max_depth': [1,2,3,4,5],
              'random_state': [0,5,10,20,40]}
print("Parameter grid:\n{}".format(param_grid))

Parameter grid:
{'max_depth': [1, 2, 3, 4, 5], 'random_state': [0, 5, 10, 20, 40]}


from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5,
                          return_train_score=True)
# do not overfit the parameters !
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)
grid_search.fit(X_train, y_train)
print("Test set score: {:.2f}".format(grid_search.score(X_test, y_test)))
print("Best parameters: {}".format(grid_search.best_params_))
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))
print("Best estimator:\n{}".format(grid_search.best_estimator_))

Test set score: 0.97
Best parameters: {'max_depth': 3, 'random_state': 0}
Best cross-validation score: 0.96
Best estimator:
DecisionTreeClassifier(max_depth=3, random_state=0)


from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

digits = load_digits()
y = digits.target == 9  # the target becomes binary, classify a number as 9 or not

# this makes the dataset unbalanced : there will be approximately 9 more false than true examples in the dataset
X_train, X_test, y_train, y_test = train_test_split(digits.data, y, random_state=0)


import numpy as np
from sklearn.dummy import DummyClassifier

#the dummy classifier wil always predict false = not 9
dummy_majority = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)
pred_most_frequent = dummy_majority.predict(X_test)
print("Test score: {:.2f}".format(dummy_majority.score(X_test, y_test)))

Test score: 0.90


from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
pred_tree = tree.predict(X_test)
print("Test score: {:.2f}".format(tree.score(X_test, y_test)))

Test score: 0.92


from sklearn.linear_model import LogisticRegression

random = DummyClassifier().fit(X_train, y_train)
pred_dummy = random.predict(X_test)
print("dummy score: {:.2f}".format(random.score(X_test, y_test)))

logreg = LogisticRegression(C=0.1, solver = 'liblinear').fit(X_train, y_train)
pred_logreg = logreg.predict(X_test)
print("logreg score: {:.2f}".format(logreg.score(X_test, y_test)))

dummy score: 0.90
logreg score: 0.98


from sklearn.metrics import confusion_matrix

print("Confusion matrix Most frequent :\n{}".format(confusion_matrix(y_test, pred_most_frequent)))
print("Confusion matrix Random dummy :\n{}".format(confusion_matrix(y_test, pred_dummy)))
print("Confusion matrix Decision Tree :\n{}".format(confusion_matrix(y_test, pred_tree)))
print("Confusion matrix LogistRegression :\n{}".format(confusion_matrix(y_test, pred_logreg)))

Confusion matrix Most frequent :
[[403   0]
 [ 47   0]]
Confusion matrix Random dummy :
[[403   0]
 [ 47   0]]
Confusion matrix Decision Tree :
[[390  13]
 [ 24  23]]
Confusion matrix LogistRegression :
[[401   2]
 [  8  39]]

Modellen evalueren en verbeteren¶

K. Verbeeck, J. Maervoet¶

Deel 1 : Hoe een model evalueren en verbeteren ?¶

Deel 2. Metrieken om beter te evalueren¶

Deel 1 : Hoe een model evalueren en verbeteren ?¶

1.1 Cross-Validatie¶

Cross-validatie¶

Cross-Validatie in scikit-learn¶

Overzicht Cross-validatie :¶

Stratified Cross-validatie¶

Stratified Cross-validation¶

Leave-one-out cross validation¶

GroupKFold¶

Deel 1 : Hoe een model evalueren en verbeteren ?¶

1. 2 Grid Search¶

Unpacking with *args en **kwargs¶

Deel 1 : Hoe een model evalueren en verbeteren ?¶

1.3 Grid search in combinatie met Cross-validatie¶

Overzicht :¶

Visualisatie van de GridSearchCV aan de hand van een heat map¶

Samenvatting deel 1¶

Deel 2. Metrieken om beter te evalueren¶

Metrieken voor binaire classificatie¶

Sommige fouten kunnen erger zijn dan andere :¶

Metrieken voor binaire classificatie¶

In realiteit zijn de meeste datasets niet gebalanceerd :¶

Confusion matrices¶

Modellen vergelijken a.h.v. hun confusion matrix¶

Relatie met accuracy¶

Precision, recall en f-score¶

De AUC-ROC curve¶

Modellen vergelijken met ROC-AUC¶

Samenvatting deel 2¶

Tutorials :¶

	fit_time	score_time	test_score	train_score
0	0.002004	0.000995	1.000000	0.950000
1	0.001000	0.000999	0.966667	0.966667
2	0.000999	0.000999	0.933333	0.966667
3	0.000999	0.000998	0.900000	0.975000
4	0.002000	0.000000	1.000000	0.958333