Professional Documents
Culture Documents
Question 2
In [110]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.metrics import confusion_matrix,precision_recall_curve,f1_score,acc
uracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
In [111]:
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/breas
t-cancer-wisconsin/wdbc.data",header=None)
df.head()
Out[111]:
0 1 2 3 4 5 6 7 8 9 ... 22
0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1
1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2
2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2
3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2
4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1
5 rows × 32 columns
In [112]:
df.columns
Out[112]:
localhost:8888/lab 1/6
01/10/2019 Untitled
In [113]:
df.rename(columns={'842302':'Id', 'M':'diagnosis','17.99':'radius_mean','10.38':
'texture_mean', '122.8':'perimeter_mean', '1001':'area_mean','0.1184':'smoothnes
s_mean', '0.2776':'compactness_mean','0.3001':'concavity_mean', '0.1471':'concav
e_points_mean','0.2419':'symmetry_mean', '0.07871':'fractal_dimension_mean','1.0
95':'radius_error', '0.9053':'texture_error', '8.589':'perimeter_error','153.4':
'area_error', '0.006399':'smoothness_error', '0.04904':'compactness_error', '0.0
5373':'concavity_error', '0.01587':'concave_points_error', '0.03003':'symmetry_e
rror','0.006193':'fractal_dimension_error','25.38':'radius_worst', '17.33':'text
ure_worst', '184.6':'perimeter_worst', '2019':'area_worst', '0.1622':'smoothness
_worst', '0.6656':'compactness_worst','0.7119':'concavity_worst', '0.2654':'conc
ave_points_worst', '0.4601':'symmetry_worst', '0.1189':'fractal_dimension_worst'
}, inplace=True)
In [114]:
df.head()
Out[114]:
0 1 2 3 4 5 6 7 8 9 ... 22
0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1
1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2
2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2
3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2
4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1
5 rows × 32 columns
In [115]:
Out[115]:
0 1 2 3 4 5 6 7 8 9 ... 22
24 852552 M 16.65 21.38 110.00 904.6 0.11210 0.14570 0.15250 0.09170 ... 26.46
224 8813129 B 13.27 17.02 84.55 546.4 0.08445 0.04994 0.03554 0.02456 ... 15.14
455 9112085 B 13.38 30.72 86.34 557.2 0.09245 0.07426 0.02819 0.03264 ... 15.05
443 909777 B 10.57 18.32 66.82 340.9 0.08142 0.04462 0.01993 0.01111 ... 10.94
5 843786 M 12.45 15.70 82.57 477.1 0.12780 0.17000 0.15780 0.08089 ... 15.47
5 rows × 32 columns
localhost:8888/lab 2/6
01/10/2019 Untitled
In [116]:
df1.describe()
Out[116]:
0 2 3 4 5 6
8 rows × 31 columns
In [117]:
df1.columns
Out[117]:
In [118]:
df1.shape
Out[118]:
(569, 32)
In [ ]:
In [132]:
X = df1.iloc[:,2:]
y = df1.iloc[:,1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_
state=42, stratify=y)
localhost:8888/lab 3/6
01/10/2019 Untitled
In [133]:
dt_default = DecisionTreeClassifier()
dt_default.fit(X_train, y_train)
Out[133]:
In [134]:
y_pred = dt_default.predict(X_test)
In [135]:
print("Accuracy:",accuracy_score(y_test, y_pred))
Accuracy: 0.9298245614035088
In [136]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))
Accuracy: 0.9824561403508771
In [137]:
confusion_matrix(y_test,dt_default.predict(X_test))
Out[137]:
array([[69, 3],
[ 5, 37]])
In [138]:
confusion_matrix(y_test,rf.predict(X_test))
Out[138]:
array([[72, 0],
[ 2, 40]])
In [139]:
confusion_matrix(y_test,dt_default.predict(X_test))
Out[139]:
array([[69, 3],
[ 5, 37]])
localhost:8888/lab 4/6
01/10/2019 Untitled
In [ ]:
f1_score(y_test,rf.predict(X_test))
In [ ]:
f1_score(y_test,dt_default.predict(X_test))
In [ ]:
Question 3
part (a)
In [14]:
In [53]:
import numpy as np
import matplotlib.pyplot as plt
X,Y = np.meshgrid(x,y)
z = J(X,Y)
plt.contour(X,Y,z, 100)
Out[53]:
<matplotlib.contour.QuadContourSet at 0x12583c198>
part(c)
localhost:8888/lab 5/6
01/10/2019 Untitled
In [ ]:
def grd_fun(X,Y):
delta_x = 2*X-Y-1
delta_y = Y-X
return [delta_x,delta_y]
arb_x = np.random.randint(-20,20,3)
arb_y = np.random.randint(-20,20,3)
grad = grd_fun(arb_x,arb_y)
plt.quiver(arb_x,arb_y,grad[0],grad[1])
plt.contour(X,Y,z,100)
In [ ]:
In [ ]:
localhost:8888/lab 6/6