from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

X, y = make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

lr = LinearRegression().fit(X_train, y_train)

print("lr.coef_:", lr.coef_)
print("lr.intercept_:", lr.intercept_)

lr.coef_: [0.39390555]
lr.intercept_: -0.031804343026759746

print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

Training set score: 0.67
Test set score: 0.66

housing = pd.read_csv('housing.csv',header=None, sep='\s+')
housing

housing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       506 non-null    float64
 1   1       506 non-null    float64
 2   2       506 non-null    float64
 3   3       506 non-null    int64  
 4   4       506 non-null    float64
 5   5       506 non-null    float64
 6   6       506 non-null    float64
 7   7       506 non-null    float64
 8   8       506 non-null    int64  
 9   9       506 non-null    float64
 10  10      506 non-null    float64
 11  11      506 non-null    float64
 12  12      506 non-null    float64
 13  13      506 non-null    float64
dtypes: float64(12), int64(2)
memory usage: 55.5 KB

X = housing.iloc[:, :-1]
target = housing.iloc[:, -1]
X

X_train, X_test, y_train, y_test = train_test_split(X, target, test_size= 0.2, random_state=42)

lr = LinearRegression().fit(X_train, y_train)

print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

Training set score: 0.75
Test set score: 0.67

from sklearn.linear_model import Ridge

ridge = Ridge().fit(X_train, y_train) # the default waarde alpha = 1.0 wordt hier gebruikt
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))

Training set score: 0.75
Test set score: 0.67

ridge10 = Ridge(alpha=250).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

Training set score: 0.70
Test set score: 0.68

ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge01.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge01.score(X_test, y_test)))

Training set score: 0.75
Test set score: 0.67

from sklearn.linear_model import Lasso

lasso = Lasso().fit(X_train, y_train)  # the default waarde alpha = 1.0 wordt hier gebruikt
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso.coef_ != 0))

Training set score: 0.70
Test set score: 0.67
Number of features used: 10

# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter.
lasso001 = Lasso(alpha=10, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso001.coef_ != 0))

Training set score: 0.52
Test set score: 0.53
Number of features used: 4

lasso00001 = Lasso(alpha=0.0001, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso00001.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso00001.coef_ != 0))

Training set score: 0.75
Test set score: 0.67
Number of features used: 13

from sklearn.datasets import load_digits
digits = load_digits()
print('Image Data Shape' , digits.data.shape)
print("Label Data Shape", digits.target.shape)

Image Data Shape (1797, 64)
Label Data Shape (1797,)

import numpy as np 
import matplotlib.pyplot as plt
plt.figure(figsize=(20,4))
for index, (image, label) in enumerate(zip(digits.data[0:5], 
                                           digits.target[0:5])):
 plt.subplot(1, 5, index + 1)
 plt.imshow(np.reshape(image, (8,8)), cmap=plt.cm.gray)
 plt.title('Training: %i\n' % label, fontsize = 20)

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = 
    train_test_split(digits.data, digits.target, test_size=0.25, 
                     random_state=0)

logisticRegr = LogisticRegression(solver='liblinear', multi_class = 'auto')
logisticRegr

LogisticRegression(solver='liblinear')

LogisticRegression(solver='liblinear')

logisticRegr.fit(x_train, y_train)

LogisticRegression(solver='liblinear')

LogisticRegression(solver='liblinear')

print(logisticRegr.predict(x_test[0].reshape(1,-1)))
print(logisticRegr.predict(x_test[0:10]))

[2]
[2 8 2 6 6 7 1 9 8 5]

score = logisticRegr.score(x_test, y_test)
print(score)

0.9533333333333334

	0	1	2	3	4	5	6	7	8	9	10	11	12	13
0	0.00632	18.0	2.31	0	0.538	6.575	65.2	4.0900	1	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0	0.469	6.421	78.9	4.9671	2	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0	0.469	7.185	61.1	4.9671	2	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0	0.458	6.998	45.8	6.0622	3	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0	0.458	7.147	54.2	6.0622	3	222.0	18.7	396.90	5.33	36.2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
501	0.06263	0.0	11.93	0	0.573	6.593	69.1	2.4786	1	273.0	21.0	391.99	9.67	22.4
502	0.04527	0.0	11.93	0	0.573	6.120	76.7	2.2875	1	273.0	21.0	396.90	9.08	20.6
503	0.06076	0.0	11.93	0	0.573	6.976	91.0	2.1675	1	273.0	21.0	396.90	5.64	23.9
504	0.10959	0.0	11.93	0	0.573	6.794	89.3	2.3889	1	273.0	21.0	393.45	6.48	22.0
505	0.04741	0.0	11.93	0	0.573	6.030	80.8	2.5050	1	273.0	21.0	396.90	7.88	11.9

	0	1	2	3	4	5	6	7	8	9	10	11	12
0	0.00632	18.0	2.31	0	0.538	6.575	65.2	4.0900	1	296.0	15.3	396.90	4.98
1	0.02731	0.0	7.07	0	0.469	6.421	78.9	4.9671	2	242.0	17.8	396.90	9.14
2	0.02729	0.0	7.07	0	0.469	7.185	61.1	4.9671	2	242.0	17.8	392.83	4.03
3	0.03237	0.0	2.18	0	0.458	6.998	45.8	6.0622	3	222.0	18.7	394.63	2.94
4	0.06905	0.0	2.18	0	0.458	7.147	54.2	6.0622	3	222.0	18.7	396.90	5.33
...	...	...	...	...	...	...	...	...	...	...	...	...	...
501	0.06263	0.0	11.93	0	0.573	6.593	69.1	2.4786	1	273.0	21.0	391.99	9.67
502	0.04527	0.0	11.93	0	0.573	6.120	76.7	2.2875	1	273.0	21.0	396.90	9.08
503	0.06076	0.0	11.93	0	0.573	6.976	91.0	2.1675	1	273.0	21.0	396.90	5.64
504	0.10959	0.0	11.93	0	0.573	6.794	89.3	2.3889	1	273.0	21.0	393.45	6.48
505	0.04741	0.0	11.93	0	0.573	6.030	80.8	2.5050	1	273.0	21.0	396.90	7.88

Data Science: Leren a.d.h.v. lineaire Modellen¶

K. Verbeeck¶

Supervised Machine Learning¶

Classification & Regression¶

Lineaire modellen voor Regressie¶

Waar komt de term regressie vandaan ?¶

De taak van regressie is voornamelijk om voorspellingen te maken¶

Model representatie van enkelvoudige lineaire regressie = de vergelijking van een rechte¶

Model representatie van meervoudige lineaire regressie = de vergelijking van een hypervlak¶

Kost-functie¶

Verschillen tussen geschatte en werkelijke waarden¶

Gradient descent¶

LineairRegression in Python / sklearn¶

artificial wave datset¶

Boston Housing dataset¶

Reminder : overfitting¶

Correlatie¶

Correlatie en causaliteit¶

Correlatiematrix van de Boston Housing dataset¶

Regularizatie¶

Regularization : Ridge Regression¶

Ridge Regression : Boston Housing Dataset¶

Ridge versus Lineair regression:¶

Regularization : Lasso Regression¶

Lasso Regression : Boston Housing Dataset¶

Lasso versus Ridge¶

Lineaire modellen voor Classificatie¶

Binaire Classificatie¶

Een hypervlak als decision boundary¶

Modelrepresentatie voor binaire classificatie¶

LogisticRegression¶

LogisticRegression¶

LogisticRegression¶

Voorspellingen maken via Logistic Regression :¶

Multi-class Classificatie¶

Voorbeeld Multi-class classificatie via Logistic Regression¶

Samenvatting van lineaire modellen¶

Inductive Bias¶

Bias : verschillende vormen¶

Referenties :¶