2 and 3

01/10/2019 Untitled
Question 2
In [110]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.metrics import confusion_matrix,precision_recall_curve,f1_score,acc
uracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
In [111]:
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/breas
t-cancer-wisconsin/wdbc.data",header=None)
df.head()
Out[111]:
0 1 2 3 4 5 6 7 8 9 ... 22
0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1
1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2
2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2
3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2
4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1
5 rows × 32 columns
In [112]:
df.columns
Out[112]:
Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,

14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31],
dtype='int64')
localhost:8888/lab 1/6
01/10/2019 Untitled
In [113]:
df.rename(columns={'842302':'Id', 'M':'diagnosis','17.99':'radius_mean','10.38':
'texture_mean', '122.8':'perimeter_mean', '1001':'area_mean','0.1184':'smoothnes
s_mean', '0.2776':'compactness_mean','0.3001':'concavity_mean', '0.1471':'concav
e_points_mean','0.2419':'symmetry_mean', '0.07871':'fractal_dimension_mean','1.0
95':'radius_error', '0.9053':'texture_error', '8.589':'perimeter_error','153.4':
'area_error', '0.006399':'smoothness_error', '0.04904':'compactness_error', '0.0
5373':'concavity_error', '0.01587':'concave_points_error', '0.03003':'symmetry_e
rror','0.006193':'fractal_dimension_error','25.38':'radius_worst', '17.33':'text
ure_worst', '184.6':'perimeter_worst', '2019':'area_worst', '0.1622':'smoothness
_worst', '0.6656':'compactness_worst','0.7119':'concavity_worst', '0.2654':'conc
ave_points_worst', '0.4601':'symmetry_worst', '0.1189':'fractal_dimension_worst'
}, inplace=True)
In [114]:
df.head()
Out[114]:
0 1 2 3 4 5 6 7 8 9 ... 22
0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1
1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2
2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2
3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2
4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1
In [115]:
from sklearn.utils import shuffle

df1 = shuffle(df)
df1.head()
Out[115]:
0 1 2 3 4 5 6 7 8 9 ... 22
24 852552 M 16.65 21.38 110.00 904.6 0.11210 0.14570 0.15250 0.09170 ... 26.46
224 8813129 B 13.27 17.02 84.55 546.4 0.08445 0.04994 0.03554 0.02456 ... 15.14
455 9112085 B 13.38 30.72 86.34 557.2 0.09245 0.07426 0.02819 0.03264 ... 15.05
443 909777 B 10.57 18.32 66.82 340.9 0.08142 0.04462 0.01993 0.01111 ... 10.94
5 843786 M 12.45 15.70 82.57 477.1 0.12780 0.17000 0.15780 0.08089 ... 15.47
01/10/2019 Untitled
In [116]:
df1.describe()
Out[116]:
0 2 3 4 5 6
count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 569.000000 569.00000
mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 0.096360 0.10434
std 1.250206e+08 3.524049 4.301036 24.298981 351.914129 0.014064 0.05281
min 8.670000e+03 6.981000 9.710000 43.790000 143.500000 0.052630 0.01938
25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 0.086370 0.06492
50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 0.095870 0.09263
75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 0.105300 0.13040
max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 0.163400 0.34540
In [117]:
df1.columns
Out[117]:
Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,

14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31],
dtype='int64')
In [118]:
df1.shape
Out[118]:
(569, 32)
In [ ]:
In [132]:
X = df1.iloc[:,2:]
y = df1.iloc[:,1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_
state=42, stratify=y)
01/10/2019 Untitled
In [133]:
dt_default = DecisionTreeClassifier()
dt_default.fit(X_train, y_train)
Out[133]:
DecisionTreeClassifier(class_weight=None, criterion='gini', max_dept

h=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split
=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False,
random_state=None, splitter='best')
In [134]:
y_pred = dt_default.predict(X_test)
In [135]:
print("Accuracy:",accuracy_score(y_test, y_pred))
Accuracy: 0.9298245614035088
In [136]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))
Accuracy: 0.9824561403508771
In [137]:
confusion_matrix(y_test,dt_default.predict(X_test))
Out[137]:
array([[69, 3],
[ 5, 37]])
In [138]:
confusion_matrix(y_test,rf.predict(X_test))
Out[138]:
array([[72, 0],
[ 2, 40]])
In [139]:
confusion_matrix(y_test,dt_default.predict(X_test))
Out[139]:
array([[69, 3],
[ 5, 37]])
01/10/2019 Untitled
In [ ]:
f1_score(y_test,rf.predict(X_test))
In [ ]:
f1_score(y_test,dt_default.predict(X_test))
In [ ]:
Question 3
part (a)
In [14]:
def J(w1, w2):

J = 0.5 * ((w2-w1)**2 + (1-w1)**2)
return J
In [53]:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(-20, 20, 80)

y = np.linspace(-20, 20, 80)
X,Y = np.meshgrid(x,y)
z = J(X,Y)
plt.contour(X,Y,z, 100)
Out[53]:
<matplotlib.contour.QuadContourSet at 0x12583c198>
part(c)
01/10/2019 Untitled
In [ ]:
def grd_fun(X,Y):
delta_x = 2*X-Y-1
delta_y = Y-X
return [delta_x,delta_y]
arb_x = np.random.randint(-20,20,3)
arb_y = np.random.randint(-20,20,3)
grad = grd_fun(arb_x,arb_y)
plt.quiver(arb_x,arb_y,grad[0],grad[1])
plt.contour(X,Y,z,100)
In [ ]:
In [ ]:

2 and 3

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

2 and 3

Uploaded by

Copyright:

Available Formats

01/10/2019 Untitled

Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,

from sklearn.utils import shuffle

count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 569.000000 569.00000

mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 0.096360 0.10434

std 1.250206e+08 3.524049 4.301036 24.298981 351.914129 0.014064 0.05281

min 8.670000e+03 6.981000 9.710000 43.790000 143.500000 0.052630 0.01938

25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 0.086370 0.06492

50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 0.095870 0.09263

75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 0.105300 0.13040

max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 0.163400 0.34540

Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,

DecisionTreeClassifier(class_weight=None, criterion='gini', max_dept

def J(w1, w2):

x = np.linspace(-20, 20, 80)

You might also like