You are on page 1of 4

14/11/2023 00:10 Scikit-learn1 - Jupyter Notebook

Entrée [22]: import pandas as pd


import numpy as np
import seaborn as sns
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

wine_data = load_wine()

Entrée [3]: wine_data.data

Out[3]: array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,


1.065e+03],
[1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
1.050e+03],
[1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
1.185e+03],
...,
[1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
8.350e+02],
[1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
8.400e+02],
[1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
5.600e+02]])

Entrée [4]: # Convert data to pandas dataframe


wine_df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)

Entrée [5]: # Add the target label


wine_df["target"] = wine_data.target

Entrée [6]: # Take a preview


wine_df.head()

Out[6]: alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoids nonflavanoid

0 14.23 1.71 2.43 15.6 127.0 2.80 3.06

1 13.20 1.78 2.14 11.2 100.0 2.65 2.76

2 13.16 2.36 2.67 18.6 101.0 2.80 3.24

3 14.37 1.95 2.50 16.8 113.0 3.85 3.49

4 13.24 2.59 2.87 21.0 118.0 2.80 2.69

localhost:8888/notebooks/Scikit-learn1.ipynb 1/4
14/11/2023 00:10 Scikit-learn1 - Jupyter Notebook

Entrée [7]: wine_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 alcohol 178 non-null float64
1 malic_acid 178 non-null float64
2 ash 178 non-null float64
3 alcalinity_of_ash 178 non-null float64
4 magnesium 178 non-null float64
5 total_phenols 178 non-null float64
6 flavanoids 178 non-null float64
7 nonflavanoid_phenols 178 non-null float64
8 proanthocyanins 178 non-null float64
9 color_intensity 178 non-null float64
10 hue 178 non-null float64
11 od280/od315_of_diluted_wines 178 non-null float64
12 proline 178 non-null float64
13 target 178 non-null int32
dtypes: float64(13), int32(1)
memory usage: 18.9 KB

Entrée [8]: wine_df.describe()

Out[8]: alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoid

count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.00000

mean 13.000618 2.336348 2.366517 19.494944 99.741573 2.295112 2.02927

std 0.811827 1.117146 0.274344 3.339564 14.282484 0.625851 0.99885

min 11.030000 0.740000 1.360000 10.600000 70.000000 0.980000 0.34000

25% 12.362500 1.602500 2.210000 17.200000 88.000000 1.742500 1.20500

50% 13.050000 1.865000 2.360000 19.500000 98.000000 2.355000 2.13500

75% 13.677500 3.082500 2.557500 21.500000 107.000000 2.800000 2.87500

max 14.830000 5.800000 3.230000 30.000000 162.000000 3.880000 5.08000

Entrée [9]: wine_df.tail()

Out[9]: alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoids nonflavan

173 13.71 5.65 2.45 20.5 95.0 1.68 0.61

174 13.40 3.91 2.48 23.0 102.0 1.80 0.75

175 13.27 4.28 2.26 20.0 120.0 1.59 0.69

176 13.17 2.59 2.37 20.0 120.0 1.65 0.68

177 14.13 4.10 2.74 24.5 96.0 2.05 0.76

localhost:8888/notebooks/Scikit-learn1.ipynb 2/4
14/11/2023 00:10 Scikit-learn1 - Jupyter Notebook

Entrée [11]: # Split data into features and label


X = wine_df[wine_data.feature_names].copy()
y = wine_df["target"].copy()

Entrée [12]: # Instantiate scaler and fit on features


scaler = StandardScaler()
scaler.fit(X)

Out[12]: StandardScaler()

Entrée [13]: # Transform features


X_scaled = scaler.transform(X.values)

Entrée [14]: # View first instance


print(X_scaled[0])

[ 1.51861254 -0.5622498 0.23205254 -1.16959318 1.91390522 0.80899739


1.03481896 -0.65956311 1.22488398 0.25171685 0.36217728 1.84791957
1.01300893]

Entrée [16]: # Split data into train and test


X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, y,

Entrée [17]: # Check the splits are correct


print(f"Train size: {round(len(X_train_scaled) / len(X) * 100)}% \n\
Test size: {round(len(X_test_scaled) / len(X) * 100)}%")

Train size: 70%


Test size: 30%

Entrée [19]: # Instnatiating the models


logistic_regression = LogisticRegression()
svm = SVC()
tree = DecisionTreeClassifier()

Entrée [20]: # Training the models


logistic_regression.fit(X_train_scaled, y_train)
svm.fit(X_train_scaled, y_train)
tree.fit(X_train_scaled, y_train)

Out[20]: DecisionTreeClassifier()

Entrée [21]: # Making predictions with each model


log_reg_preds = logistic_regression.predict(X_test_scaled)
svm_preds = svm.predict(X_test_scaled)
tree_preds = tree.predict(X_test_scaled)

localhost:8888/notebooks/Scikit-learn1.ipynb 3/4
14/11/2023 00:10 Scikit-learn1 - Jupyter Notebook

Entrée [23]: # Store model predictions in a dictionary


# this makes it's easier to iterate through each model
# and print the results.
model_preds = {
"Logistic Regression": log_reg_preds,
"Support Vector Machine": svm_preds,
"Decision Tree": tree_preds
}

Entrée [24]: for model, preds in model_preds.items():


print(f"{model} Results:\n{classification_report(y_test, preds)}", sep="\n\

Logistic Regression Results:


precision recall f1-score support

0 1.00 1.00 1.00 17


1 1.00 0.92 0.96 25
2 0.86 1.00 0.92 12

accuracy 0.96 54
macro avg 0.95 0.97 0.96 54
weighted avg 0.97 0.96 0.96 54

Support Vector Machine Results:


precision recall f1-score support

0 1.00 1.00 1.00 17


1 1.00 1.00 1.00 25
2 1.00 1.00 1.00 12

accuracy 1.00 54
macro avg 1.00 1.00 1.00 54
weighted avg 1.00 1.00 1.00 54

Decision Tree Results:


precision recall f1-score support

0 0.94 0.94 0.94 17


1 0.92 0.92 0.92 25
2 0.92 0.92 0.92 12

accuracy 0.93 54
macro avg 0.93 0.93 0.93 54
weighted avg 0.93 0.93 0.93 54

Entrée [ ]: ​

localhost:8888/notebooks/Scikit-learn1.ipynb 4/4

You might also like