You are on page 1of 6

B2_47_Sewank_Nande _Breast_Cancer

In [1]: import pandas as pd import numpy


as np import matplotlib.pyplot
as plt import seaborn as sns

In [2]: data=pd.read_csv('data.csv')

In [3]: data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568 Data
columns (total 33 columns):
# Column Non-Null Count Dtype -
-- ------ -------------- -----
0 id 569 non-null int64
1 diagnosis 569 non-null object
2 radius_mean 569 non-null float64
3 texture_mean 569 non-null float64
4 perimeter_mean 569 non-null
float64
5 area_mean 569 non-null float64
6 smoothness_mean 569 non-null float64
7 compactness_mean 569 non-null float64
8 concavity_mean 569 non-null
float64
9 concave points_mean 569 non-null float64
10 symmetry_mean 569 non-null float64
11 fractal_dimension_mean 569 non-null float64
12 radius_se 569 non-null
float64
13 texture_se 569 non-null float64
14 perimeter_se 569 non-null float64
15 area_se 569 non-null float64
16 smoothness_se 569 non-null
float64
17 compactness_se 569 non-null float64
18 concavity_se 569 non-null float64
19 concave points_se 569 non-null float64
20 symmetry_se 569 non-null
float64
21 fractal_dimension_se 569 non-null float64
22 radius_worst 569 non-null float64
23 texture_worst 569 non-null float64
24 perimeter_worst 569 non-null
float64
25 area_worst 569 non-null float64
26 smoothness_worst 569 non-null float64
27 compactness_worst 569 non-null float64
28 concavity_worst 569 non-null
float64
29 concave points_worst 569 non-null float64
30 symmetry_worst 569 non-null float64
31 fractal_dimension_worst 569 non-null float64
32 Unnamed: 32 0 non-null float64
dtypes: float64(31), int64(1), object(1) memory
usage: 146.8+ KB

In [4]: data.head()
Out[4]:
id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mea

1/6
0 842302 M 17.99 10.38 122.80 1001.0 0.1184

1 842517 M 20.57 17.77 132.90 1326.0 0.0847

2 84300903 M 19.69 21.25 130.00 1203.0 0.1096

3 84348301 M 11.42 20.38 77.58 386.1 0.1425

4 84358402 M 20.29 14.34 135.10 1297.0 0.1003

5 rows × 33 columns
 

In [5]: data.drop(data.columns[[-1,0]],axis=1,inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568 Data
columns (total 31 columns):
# Column Non-Null Count Dtype -
-- ------ -------------- -----
0 diagnosis 569 non-null object
1 radius_mean 569 non-null float64
2 texture_mean 569 non-null float64
3 perimeter_mean 569 non-null float64
4 area_mean 569 non-null
float64
5 smoothness_mean 569 non-null float64
6 compactness_mean 569 non-null float64
7 concavity_mean 569 non-null float64
8 concave points_mean 569 non-null
float64
9 symmetry_mean 569 non-null float64
10 fractal_dimension_mean 569 non-null float64
11 radius_se 569 non-null float64
12 texture_se 569 non-null
float64
13 perimeter_se 569 non-null float64
14 area_se 569 non-null float64
15 smoothness_se 569 non-null float64
16 compactness_se 569 non-null
float64
17 concavity_se 569 non-null float64
18 concave points_se 569 non-null float64
19 symmetry_se 569 non-null float64
20 fractal_dimension_se 569 non-null
float64
21 radius_worst 569 non-null float64
22 texture_worst 569 non-null float64
23 perimeter_worst 569 non-null float64
24 area_worst 569 non-null
float64
25 smoothness_worst 569 non-null float64
26 compactness_worst 569 non-null float64
27 concavity_worst 569 non-null float64
28 concave points_worst 569 non-null
float64
29 symmetry_worst 569 non-null float64
30 fractal_dimension_worst 569 non-null float64
dtypes: float64(30), object(1) memory usage: 137.9+ KB

In [6]: X=data.iloc[:,2:32].values Y=data.iloc[:,1].values

2/6
In [7]: Y
array([17.99 , 20.57 , 19.69 , 11.42 , 20.29 , 12.45 , 18.25 , 13.71 ,
Out[7]:
13. , 12.46 , 16.02 , 15.78 , 19.17 , 15.85 , 13.73 , 14.54 ,
14.68 , 16.13 , 19.81 , 13.54 , 13.08 , 9.504, 15.34 , 21.16 ,
16.65 , 17.14 , 14.58 , 18.61 , 15.3 , 17.57 , 18.63 , 11.84 ,
17.02 , 19.27 , 16.13 , 16.74 , 14.25 , 13.03 , 14.99 , 13.48 ,
13.44 , 10.95 , 19.07 , 13.28 , 13.17 , 18.65 , 8.196, 13.17 ,
12.05 , 13.49 , 11.76 , 13.64 , 11.94 , 18.22 , 15.1 , 11.52 ,
19.21 , 14.71 , 13.05 , 8.618, 10.17 , 8.598, 14.25 , 9.173,
12.68 , 14.78 , 9.465, 11.31 , 9.029, 12.78 , 18.94 , 8.888, 17.2
, 13.8 , 12.31 , 16.07 , 13.53 , 18.05 , 20.18 , 12.86 , 11.45 ,
13.34 , 25.22 , 19.1 , 12. , 18.46 , 14.48 , 19.02 , 12.36 ,
14.64 , 14.62 , 15.37 , 13.27 , 13.45 , 15.06 , 20.26 , 12.18 ,
9.787, 11.6 , 14.42 , 13.61 , 6.981, 12.18 , 9.876, 10.49 , 13.11
, 11.64 , 12.36 , 22.27 , 11.34 , 9.777, 12.63 , 14.26 , 10.51 ,
8.726, 11.93 , 8.95 , 14.87 , 15.78 , 17.95 , 11.41 , 18.66 , 24.25
, 14.5 , 13.37 , 13.85 , 13.61 , 19. , 15.1 , 19.79 , 12.19 ,
15.46 , 16.16 , 15.71 , 18.45 , 12.77 , 11.71 , 11.43 , 14.95 ,
11.28 , 9.738, 16.11 , 11.43 , 12.9 , 10.75 , 11.9 , 11.8 ,
14.95 , 14.44 , 13.74 , 13. , 8.219,
9.731, 11.15 , 13.15 , 12.25 , 17.68 , 16.84 , 12.06 , 10.9 ,
11.75 , 19.19 , 19.59 , 12.34 , 23.27 , 14.97 , 10.8 , 16.78 ,
17.47 , 14.97 , 12.32 , 13.43 , 15.46 , 11.08 , 10.66 , 8.671,
9.904, 16.46 , 13.01 , 12.81 , 27.22 , 21.09 , 15.7 , 11.41 ,
15.28 , 10.08 , 18.31 , 11.71 , 11.81 , 12.3 , 14.22 , 12.77 ,
9.72 , 12.34 , 14.86 , 12.91 , 13.77 , 18.08 , 19.18 , 14.45 ,
12.23 , 17.54 , 23.29 , 13.81 , 12.47 , 15.12 , 9.876, 17.01 ,
13.11 , 15.27 , 20.58 , 11.84 , 28.11 , 17.42 , 14.19 , 13.86 ,
11.89 , 10.2 , 19.8 , 19.53 , 13.65 , 13.56 , 10.18 , 15.75 ,
13.27 , 14.34 , 10.44 , 15. , 12.62 , 12.83 , 17.05 , 11.32 ,
11.22 , 20.51 , 9.567, 14.03 , 23.21 , 20.48 , 14.22 , 17.46 ,
13.64 , 12.42 , 11.3 , 13.75 , 19.4 , 10.48 , 13.2 , 12.89 ,
10.65 , 11.52 , 20.94 , 11.5 , 19.73 , 17.3 , 19.45 , 13.96 ,
19.55 , 15.32 , 15.66 , 15.53 , 20.31 , 17.35 , 17.29 , 15.61 ,
17.19 , 20.73 , 10.6 , 13.59 , 12.87 , 10.71 , 14.29 , 11.29 ,
21.75 , 9.742, 17.93 , 11.89 , 11.33 , 18.81 , 13.59 , 13.85 ,
19.16 , 11.74 , 19.4 , 16.24 , 12.89 , 12.58 , 11.94 , 12.89 ,
11.26 , 11.37 , 14.41 , 14.96 , 12.95 , 11.85 , 12.72 , 13.77 ,
10.91 , 11.76 , 14.26 , 10.51 , 19.53 , 12.46 , 20.09 , 10.49 ,
11.46 , 11.6 , 13.2 , 9. , 13.5 , 13.05 , 11.7 , 14.61 ,
12.76 , 11.54 , 8.597, 12.49 , 12.18 , 18.22 , 9.042, 12.43 ,
10.25 , 20.16 , 12.86 , 20.34 , 12.2 , 12.67 , 14.11 , 12.03 ,
16.27 , 16.26 , 16.03 , 12.98 , 11.22 , 11.25 , 12.3 , 17.06 ,
12.99 , 18.77 , 10.05 , 23.51 , 14.42 , 9.606, 11.06 , 19.68 ,
11.71 , 10.26 , 12.06 , 14.76 , 11.47 , 11.95 , 11.66 , 15.75 ,
25.73 , 15.08 , 11.14 , 12.56 , 13.05 , 13.87 , 8.878, 9.436,
12.54 , 13.3 , 12.76 , 16.5 , 13.4 , 20.44 , 20.2 , 12.21 ,
21.71 , 22.01 , 16.35 , 15.19 , 21.37 , 20.64 , 13.69 , 16.17 ,
10.57 , 13.46 , 13.66 , 11.08 , 11.27 , 11.04 , 12.05 , 12.39 ,
13.28 , 14.6 , 12.21 , 13.88 , 11.27 , 19.55 , 10.26 , 8.734,
15.49 , 21.61 , 12.1 , 14.06 , 13.51 , 12.8 , 11.06 , 11.8 ,
17.91 , 11.93 , 12.96 , 12.94 , 12.34 , 10.94 , 16.14 , 12.85 ,
17.99 , 12.27 , 11.36 , 11.04 , 9.397, 14.99 , 15.13 , 11.89 ,
9.405, 15.5 , 12.7 , 11.16 , 11.57 , 14.69 , 11.61 , 13.66 ,
9.742, 10.03 , 10.48 , 10.8 , 11.13 , 12.72 , 14.9 , 12.4 ,
20.18 , 18.82 , 14.86 , 13.98 , 12.87 , 14.04 , 13.85 , 14.02 ,
10.97 , 17.27 , 13.78 , 10.57 , 18.03 , 11.99 , 17.75 , 14.8 ,
14.53 , 21.1 , 11.87 , 19.59 , 12. , 14.53 , 12.62 , 13.38 ,
11.63 , 13.21 , 13. , 9.755, 17.08 , 27.42 , 14.4 , 11.6 ,
13.17 , 13.24 , 13.14 , 9.668, 17.6 , 11.62 , 9.667, 12.04 ,
14.92 , 12.27 , 10.88 , 12.83 , 14.2 , 13.9 , 11.49 , 16.25 ,
12.16 , 13.9 , 13.47 , 13.7 , 15.73 , 12.45 , 14.64 , 19.44 ,
11.68 , 16.69 , 12.25 , 17.85 , 18.01 , 12.46 , 13.16 , 14.87 ,
12.65 , 12.47 , 18.49 , 20.59 , 15.04 , 13.82 , 12.54 , 23.09 ,
9.268, 9.676, 12.22 , 11.06 , 16.3 , 15.46 , 11.74 , 14.81 ,

3/6
13.4 , 14.58 , 15.05 , 11.34 , 18.31 , 19.89 , 12.88 , 12.75 ,
9.295, 24.63 , 11.26 , 13.71 , 9.847, 8.571, 13.46 , 12.34 ,
13.94 , 12.07 , 11.75 , 11.67 , 13.68 , 20.47 , 10.96 , 20.55 ,
14.27 , 11.69 , 7.729, 7.691, 11.54 , 14.47 , 14.74 , 13.21 ,
13.87 , 13.62 , 10.32 , 10.26 , 9.683, 10.82 , 10.86 , 11.13 ,
12.77 , 9.333, 12.88 , 10.29 , 10.16 , 9.423, 14.59 , 11.51 ,
14.05 , 11.2 , 15.22 , 20.92 , 21.56 , 20.13 , 16.6 , 20.6 ,
7.76 ])

In [8]: display(data.isna().sum()) data=data.drop_duplicates()


data=pd.get_dummies(data,drop_first=True)

diagnosis 0
radius_mean 0
texture_mean 0
perimeter_mean 0
area_mean 0
smoothness_mean 0
compactness_mean 0
concavity_mean 0
concave points_mean 0
symmetry_mean 0
fractal_dimension_mean 0
radius_se 0
texture_se 0
perimeter_se 0
area_se 0
smoothness_se 0
compactness_se 0
concavity_se 0
concave points_se 0
symmetry_se 0
fractal_dimension_se 0
radius_worst 0
texture_worst 0
perimeter_worst 0
area_worst 0
smoothness_worst 0
compactness_worst 0
concavity_worst 0
concave points_worst 0
symmetry_worst 0
fractal_dimension_worst 0
dtype: int64

In [9]: from sklearn.model_selection import train_test_split


from sklearn.preprocessing import StandardScaler
sc=StandardScaler() X=sc.fit_transform(X)
y=data["diagnosis_M"]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)

In [10]: from sklearn.ensemble import RandomForestClassifier


forest=RandomForestClassifier()
forest.fit(X_train,y_train)

RandomForestClassifier()
Out[10]:

In [11]: y_pred_1=forest.predict(X_test) print('\nRandom


forest:')
print('Misclassified samples: %d' % (y_test !=y_pred_1).sum())

4/6
from sklearn.metrics import accuracy_score print('Accuracy:
%.2F' % accuracy_score(y_test,y_pred_1))

Random forest:
Misclassified samples: 6
Accuracy: 0.96

In [12]: from sklearn.preprocessing import LabelEncoder


labelencoder = LabelEncoder()

X[:,0]=labelencoder.fit_transform(X[:,0])

In [13]: y_pred=np.zeros(10000)
y_test=np.zeros(10000)
indices1=np.random.randint(0,10000,30000)
indices2=np.random.randint(0,10000,30000)
indices3=np.random.randint(0,10000,30000)
indices4=np.random.randint(0,10000,30000)
y_pred[indices1]=1 y_test[indices2]=1
y_pred[indices3]=1 y_test[indices4]=1

In [14]: np.sum(y_test)
Out[14]: 9974.0
In [15]: np.sum(y_pred)
Out[15]: 9981.0
In [16]: from sklearn.metrics import accuracy_score print('Accuracy
score:',accuracy_score(y_test,y_pred))

Accuracy score: 0.9955

In [17]: from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,y_pred)

array([[ 0, 26],
Out[17]:
[ 19, 9955]], dtype=int64)

In [18]: from sklearn.metrics import classification_report


print(classification_report(y_test,y_pred))

precision recall f1-score support

0.0 0.00 0.00 0.00 26


1.0 1.00 1.00 1.00 9974

accuracy 1.00 10000


macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 1.00 1.00 10000

In [19]: from sklearn.metrics import precision_recall_curve precision,


recall,thresholds =precision_recall_curve(y_test,y_pred)

In [20]: precision

array([0.9974 , 0.99739505, 1. ])
Out[20]:
In [21]: recall

array([1. , 0.99809505, 0. ])

Out[21]:

5/6
In [22]: thresholds

array([0., 1.])
Out[22]:

6/6

You might also like