Professional Documents
Culture Documents
AI
Machine Learning
Practical Implementation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df=pd.read_csv('Algerian_forest_fires_dataset_UPDATE (8).csv')
df.head()
day month year Temperature RH Ws Rain FFMC DMC DC ISI BUI FWI Classes
2 3 6 2012 26 82 22 13.1 47.1 2.5 7.1 0.3 2.7 0.1 not fire
df.columns
Index(['day', 'month', 'year', 'Temperature', ' RH', ' Ws', 'Rain ', 'FFMC',
'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes '],
dtype='object')
df.head()
df.head()
df['Classes '].value_counts()
1 138
0 109
Name: Classes , dtype: int64
df['Classes']=df['Classes ']
df.drop(['Classes '],axis=1,inplace=True)
## Encoding
df['Classes ']=np.where(df['Classes '].str.contains("not fire"),0,1)
df['Classes'].value_counts()
1 138
0 109
Name: Classes, dtype: int64
df.tail()
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 1/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
df['Classes'].value_counts()
1 137
0 106
Name: Classes, dtype: int64
X.head()
0 0.5
1 0.4
2 0.1
3 0
4 0.5
...
242 6.5
243 0
244 0.2
245 0.7
246 0.5
Name: FWI, Length: 247, dtype: object
X_train.shape,X_test.shape
X_train.head()
X_train[X_train['Temperature']== 'Temperature']
X_train.drop(index=124,inplace=True)
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 2/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
X_train.corr()
<ipython-input-60-1d31ae5364df>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future ver
X_train.corr()
index Classes
Classes
Show 25 per page
Like what you see? Visit the data table notebook to learn more about interactive tables.
X_train[X_train.Temperature !='Temperature'].corr()
<ipython-input-54-d6f31b53d14c>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future ver
X_train[X_train.Temperature != 'Temperature'].corr()
Classes
Classes 1.0
X_train.columns
Index(['Temperature', ' RH', ' Ws', 'Rain ', 'FFMC', 'DMC', 'DC', 'ISI', 'BUI',
'Classes'],
dtype='object')
X_train['Temperature']=X_train['Temperature'].dropna().astype(int)
X_train['DC']=X_train['DC'].dropna().replace('14.6 9','14.69').astype(float)
X_train.corr()
Temperature 1.000000 -0.689393 -0.321891 -0.359438 0.707745 0.490281 0.376328 0.598660 0.463008 0.515195
RH -0.689393 1.000000 0.166559 0.244101 -0.660022 -0.410668 -0.219077 -0.732962 -0.352303 -0.438307
Ws -0.321891 0.166559 1.000000 0.229595 -0.141418 0.015022 0.081155 0.029341 0.039326 -0.030138
Rain -0.359438 0.244101 0.229595 1.000000 -0.557421 -0.286336 -0.294696 -0.337800 -0.295782 -0.365927
FFMC 0.707745 -0.660022 -0.141418 -0.557421 1.000000 0.614965 0.510088 0.740773 0.597772 0.773751
DMC 0.490281 -0.410668 0.015022 -0.286336 0.614965 1.000000 0.871724 0.676476 0.983552 0.599769
DC 0.376328 -0.219077 0.081155 -0.294696 0.510088 0.871724 1.000000 0.475461 0.943763 0.517169
ISI 0.598660 -0.732962 0.029341 -0.337800 0.740773 0.676476 0.475461 1.000000 0.623201 0.703945
BUI 0.463008 -0.352303 0.039326 -0.295782 0.597772 0.983552 0.943763 0.623201 1.000000 0.591169
Classes 0.515195 -0.438307 -0.030138 -0.365927 0.773751 0.599769 0.517169 0.703945 0.591169 1.000000
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 3/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
<Axes: >
X_train.corr()
Temperature 1.000000 -0.689393 -0.321891 -0.359438 0.707745 0.490281 0.376328 0.598660 0.463008 0.515195
RH -0.689393 1.000000 0.166559 0.244101 -0.660022 -0.410668 -0.219077 -0.732962 -0.352303 -0.438307
Ws -0.321891 0.166559 1.000000 0.229595 -0.141418 0.015022 0.081155 0.029341 0.039326 -0.030138
Rain -0.359438 0.244101 0.229595 1.000000 -0.557421 -0.286336 -0.294696 -0.337800 -0.295782 -0.365927
FFMC 0.707745 -0.660022 -0.141418 -0.557421 1.000000 0.614965 0.510088 0.740773 0.597772 0.773751
DMC 0.490281 -0.410668 0.015022 -0.286336 0.614965 1.000000 0.871724 0.676476 0.983552 0.599769
DC 0.376328 -0.219077 0.081155 -0.294696 0.510088 0.871724 1.000000 0.475461 0.943763 0.517169
ISI 0.598660 -0.732962 0.029341 -0.337800 0.740773 0.676476 0.475461 1.000000 0.623201 0.703945
BUI 0.463008 -0.352303 0.039326 -0.295782 0.597772 0.983552 0.943763 0.623201 1.000000 0.591169
Classes 0.515195 -0.438307 -0.030138 -0.365927 0.773751 0.599769 0.517169 0.703945 0.591169 1.000000
## threshold--Domain expertise
corr_features=correlation(X_train,0.85)
corr_features
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 4/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
{'BUI', 'DC'}
Temperature 0
RH 0
Ws 0
Rain 0
FFMC 0
DMC 0
ISI 0
Classes 0
dtype: int64
X_train_scaled
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 5/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
<ipython-input-96-41fb1d7ced73>:2: MatplotlibDeprecationWarning: Auto-removal of overlapping axes is deprecated since 3.6 and will b
plt.subplot(1, 2, 1)
Text(0.5, 1.0, 'X_train After Scaling')
pd.DataFrame(X_train_scaled).isnull().sum()
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
dtype: int64
pd.DataFrame(X_test_scaled).isnull().sum()
0 0
1 0
2 0
3 0
4 0
5 0
6 0
7 0
dtype: int64
y_train=y_train.replace('FWI','0').replace('fire ','0').astype(float)
y_train.dropna(inplace=True)
y_train.shape
(183,)
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 6/7
12/4/23, 2:10 PM 3.0-Model Training.ipynb - Colaboratory
from sklearn.metrics import r2_score
ridge=Ridge()
ridge.fit(X_train_scaled,y_train[1:])
y_pred=ridge.predict(X_test_scaled)
mae=mean_absolute_error(y_test,y_pred)
score=r2_score(y_test,y_pred)
print("Mean absolute error", mae)
print("R2 Score", score)
import pickle
pickle.dump(scaler,open('scaler.pkl','wb'))
pickle.dump(ridge,open('ridge.pkl','wb'))
https://colab.research.google.com/drive/1F2TKQtJV1ATKzgfYLDKIPKZxPOQ_jwjv#scrollTo=Z8Q8HGqYIZdE&printMode=true 7/7
12/4/23, 2:10 PM 5.0-Logistic Regression.ipynb - Colaboratory
dataset=load_iris()
print(dataset.DESCR)
:Summary Statistics:
The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that it's the same as in R, but not as in the UCI
Machine Learning Repository, which has two wrong data points.
.. topic:: References
dataset.keys()
import pandas as pd
import numpy as np
df=pd.DataFrame(dataset.data,columns=dataset.feature_names)
df.head()
https://colab.research.google.com/drive/1RRNicYWmnoysy85JZADuWXcnm1moVvcw#printMode=true 1/5
12/4/23, 2:10 PM 5.0-Logistic Regression.ipynb - Colaboratory
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
df['target']=dataset.target
4 5.0 3.6 1.4 0.2
df.head()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
dataset.target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
## Binary Classification
df_copy=df[df['target']!=2]
df_copy.head()
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
classifier=LogisticRegression()
classifier.fit(X_train,y_train)
▾ LogisticRegression
LogisticRegression()
classifier.predict_proba(X_test)
array([[0.00118085, 0.99881915],
[0.01580857, 0.98419143],
[0.00303433, 0.99696567],
[0.96964813, 0.03035187],
https://colab.research.google.com/drive/1RRNicYWmnoysy85JZADuWXcnm1moVvcw#printMode=true 2/5
12/4/23, 2:10 PM 5.0-Logistic Regression.ipynb - Colaboratory
[0.94251523, 0.05748477],
[0.97160984, 0.02839016],
[0.99355615, 0.00644385],
[0.03169836, 0.96830164],
[0.97459743, 0.02540257],
[0.97892756, 0.02107244],
[0.95512297, 0.04487703],
[0.9607199 , 0.0392801 ],
[0.00429472, 0.99570528],
[0.9858324 , 0.0141676 ],
[0.00924893, 0.99075107],
[0.98144334, 0.01855666],
[0.00208036, 0.99791964],
[0.00125422, 0.99874578],
[0.97463766, 0.02536234],
[0.96123726, 0.03876274]])
## Prediction
y_pred=classifier.predict(X_test)
y_pred
array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])
y_test
83 1
53 1
70 1
45 0
44 0
39 0
22 0
80 1
10 0
0 0
18 0
30 0
73 1
33 0
90 1
4 0
76 1
77 1
12 0
31 0
Name: target, dtype: int64
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))
[[12 0]
[ 0 8]]
1.0
precision recall f1-score support
accuracy 1.00 20
macro avg 1.00 1.00 1.00 20
weighted avg 1.00 1.00 1.00 20
parameters={'penalty':('l1','l2','elasticnet',None),'C':[1,10,20]}
https://colab.research.google.com/drive/1RRNicYWmnoysy85JZADuWXcnm1moVvcw#printMode=true 3/5
12/4/23, 2:10 PM 5.0-Logistic Regression.ipynb - Colaboratory
clf=GridSearchCV(classifier,param_grid=parameters,cv=5)
▸ GridSearchCV
▸ estimator: LogisticRegression
▸ LogisticRegression
clf.best_params_
classifier=LogisticRegression(C=1,penalty='l2')
classifier.fit(X_train,y_train)
▾ LogisticRegression
LogisticRegression(C=1)
## Prediction
y_pred=classifier.predict(X_test)
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))
[[12 0]
[ 0 8]]
1.0
precision recall f1-score support
accuracy 1.00 20
macro avg 1.00 1.00 1.00 20
weighted avg 1.00 1.00 1.00 20
## Reandomized Searchcv
from sklearn.model_selection import RandomizedSearchCV
random_clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameters,cv=5)
random_clf.fit(X_train,y_train)
▸ RandomizedSearchCV
▸ estimator: LogisticRegression
▸ LogisticRegression
random_clf.best_params_
https://colab.research.google.com/drive/1RRNicYWmnoysy85JZADuWXcnm1moVvcw#printMode=true 4/5
THANK - YOU
Slide 15