You are on page 1of 6

01/10/2019 Untitled

Question 2

In [110]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.metrics import confusion_matrix,precision_recall_curve,f1_score,acc
uracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [111]:

df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/breas
t-cancer-wisconsin/wdbc.data",header=None)
df.head()

Out[111]:

0 1 2 3 4 5 6 7 8 9 ... 22

0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1

1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2

2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2

3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2

4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1

5 rows × 32 columns

In [112]:

df.columns

Out[112]:

Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,


14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31],
dtype='int64')

localhost:8888/lab 1/6
01/10/2019 Untitled

In [113]:

df.rename(columns={'842302':'Id', 'M':'diagnosis','17.99':'radius_mean','10.38':
'texture_mean', '122.8':'perimeter_mean', '1001':'area_mean','0.1184':'smoothnes
s_mean', '0.2776':'compactness_mean','0.3001':'concavity_mean', '0.1471':'concav
e_points_mean','0.2419':'symmetry_mean', '0.07871':'fractal_dimension_mean','1.0
95':'radius_error', '0.9053':'texture_error', '8.589':'perimeter_error','153.4':
'area_error', '0.006399':'smoothness_error', '0.04904':'compactness_error', '0.0
5373':'concavity_error', '0.01587':'concave_points_error', '0.03003':'symmetry_e
rror','0.006193':'fractal_dimension_error','25.38':'radius_worst', '17.33':'text
ure_worst', '184.6':'perimeter_worst', '2019':'area_worst', '0.1622':'smoothness
_worst', '0.6656':'compactness_worst','0.7119':'concavity_worst', '0.2654':'conc
ave_points_worst', '0.4601':'symmetry_worst', '0.1189':'fractal_dimension_worst'
}, inplace=True)

In [114]:

df.head()

Out[114]:

0 1 2 3 4 5 6 7 8 9 ... 22

0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 1

1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 2

2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 2

3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 ... 14.91 2

4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 ... 22.54 1

5 rows × 32 columns

In [115]:

from sklearn.utils import shuffle


df1 = shuffle(df)
df1.head()

Out[115]:

0 1 2 3 4 5 6 7 8 9 ... 22

24 852552 M 16.65 21.38 110.00 904.6 0.11210 0.14570 0.15250 0.09170 ... 26.46

224 8813129 B 13.27 17.02 84.55 546.4 0.08445 0.04994 0.03554 0.02456 ... 15.14

455 9112085 B 13.38 30.72 86.34 557.2 0.09245 0.07426 0.02819 0.03264 ... 15.05

443 909777 B 10.57 18.32 66.82 340.9 0.08142 0.04462 0.01993 0.01111 ... 10.94

5 843786 M 12.45 15.70 82.57 477.1 0.12780 0.17000 0.15780 0.08089 ... 15.47

5 rows × 32 columns

localhost:8888/lab 2/6
01/10/2019 Untitled

In [116]:

df1.describe()

Out[116]:

0 2 3 4 5 6

count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 569.000000 569.00000

mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 0.096360 0.10434

std 1.250206e+08 3.524049 4.301036 24.298981 351.914129 0.014064 0.05281

min 8.670000e+03 6.981000 9.710000 43.790000 143.500000 0.052630 0.01938

25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 0.086370 0.06492

50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 0.095870 0.09263

75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 0.105300 0.13040

max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 0.163400 0.34540

8 rows × 31 columns

In [117]:

df1.columns

Out[117]:

Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,


14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31],
dtype='int64')

In [118]:

df1.shape

Out[118]:

(569, 32)

In [ ]:

In [132]:

X = df1.iloc[:,2:]
y = df1.iloc[:,1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_
state=42, stratify=y)

localhost:8888/lab 3/6
01/10/2019 Untitled

In [133]:

dt_default = DecisionTreeClassifier()
dt_default.fit(X_train, y_train)

Out[133]:

DecisionTreeClassifier(class_weight=None, criterion='gini', max_dept


h=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split
=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False,
random_state=None, splitter='best')

In [134]:

y_pred = dt_default.predict(X_test)

In [135]:

print("Accuracy:",accuracy_score(y_test, y_pred))

Accuracy: 0.9298245614035088

In [136]:

rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))

Accuracy: 0.9824561403508771

In [137]:

confusion_matrix(y_test,dt_default.predict(X_test))

Out[137]:

array([[69, 3],
[ 5, 37]])

In [138]:

confusion_matrix(y_test,rf.predict(X_test))

Out[138]:

array([[72, 0],
[ 2, 40]])

In [139]:

confusion_matrix(y_test,dt_default.predict(X_test))

Out[139]:

array([[69, 3],
[ 5, 37]])

localhost:8888/lab 4/6
01/10/2019 Untitled

In [ ]:

f1_score(y_test,rf.predict(X_test))

In [ ]:

f1_score(y_test,dt_default.predict(X_test))

In [ ]:

Question 3

part (a)

In [14]:

def J(w1, w2):


J = 0.5 * ((w2-w1)**2 + (1-w1)**2)
return J

In [53]:

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-20, 20, 80)


y = np.linspace(-20, 20, 80)

X,Y = np.meshgrid(x,y)
z = J(X,Y)

plt.contour(X,Y,z, 100)

Out[53]:

<matplotlib.contour.QuadContourSet at 0x12583c198>

part(c)

localhost:8888/lab 5/6
01/10/2019 Untitled

In [ ]:

def grd_fun(X,Y):
delta_x = 2*X-Y-1
delta_y = Y-X
return [delta_x,delta_y]

arb_x = np.random.randint(-20,20,3)
arb_y = np.random.randint(-20,20,3)

grad = grd_fun(arb_x,arb_y)

plt.quiver(arb_x,arb_y,grad[0],grad[1])
plt.contour(X,Y,z,100)

In [ ]:

In [ ]:

localhost:8888/lab 6/6

You might also like