Breast Cancer

B2_47_Sewank_Nande _Breast_Cancer
In [1]: import pandas as pd import numpy

as np import matplotlib.pyplot
as plt import seaborn as sns
In [2]: data=pd.read_csv('data.csv')
In [3]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568 Data
columns (total 33 columns):
# Column Non-Null Count Dtype -
-- ------ -------------- -----
0 id 569 non-null int64
1 diagnosis 569 non-null object
2 radius_mean 569 non-null float64
3 texture_mean 569 non-null float64
4 perimeter_mean 569 non-null
float64
5 area_mean 569 non-null float64
6 smoothness_mean 569 non-null float64
7 compactness_mean 569 non-null float64
8 concavity_mean 569 non-null
float64
9 concave points_mean 569 non-null float64
10 symmetry_mean 569 non-null float64
11 fractal_dimension_mean 569 non-null float64
12 radius_se 569 non-null
float64
13 texture_se 569 non-null float64
14 perimeter_se 569 non-null float64
15 area_se 569 non-null float64
16 smoothness_se 569 non-null
float64
17 compactness_se 569 non-null float64
18 concavity_se 569 non-null float64
19 concave points_se 569 non-null float64
20 symmetry_se 569 non-null
float64
21 fractal_dimension_se 569 non-null float64
22 radius_worst 569 non-null float64
23 texture_worst 569 non-null float64
24 perimeter_worst 569 non-null
float64
25 area_worst 569 non-null float64
26 smoothness_worst 569 non-null float64
27 compactness_worst 569 non-null float64
28 concavity_worst 569 non-null
float64
29 concave points_worst 569 non-null float64
30 symmetry_worst 569 non-null float64
31 fractal_dimension_worst 569 non-null float64
32 Unnamed: 32 0 non-null float64
dtypes: float64(31), int64(1), object(1) memory
usage: 146.8+ KB
In [4]: data.head()
Out[4]:
id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mea
1/6
0 842302 M 17.99 10.38 122.80 1001.0 0.1184
1 842517 M 20.57 17.77 132.90 1326.0 0.0847
2 84300903 M 19.69 21.25 130.00 1203.0 0.1096
3 84348301 M 11.42 20.38 77.58 386.1 0.1425
4 84358402 M 20.29 14.34 135.10 1297.0 0.1003
5 rows × 33 columns
 
In [5]: data.drop(data.columns[[-1,0]],axis=1,inplace=True)
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568 Data
columns (total 31 columns):
# Column Non-Null Count Dtype -
-- ------ -------------- -----
0 diagnosis 569 non-null object
1 radius_mean 569 non-null float64
2 texture_mean 569 non-null float64
3 perimeter_mean 569 non-null float64
4 area_mean 569 non-null
float64
5 smoothness_mean 569 non-null float64
6 compactness_mean 569 non-null float64
7 concavity_mean 569 non-null float64
8 concave points_mean 569 non-null
float64
9 symmetry_mean 569 non-null float64
10 fractal_dimension_mean 569 non-null float64
11 radius_se 569 non-null float64
12 texture_se 569 non-null
float64
13 perimeter_se 569 non-null float64
14 area_se 569 non-null float64
15 smoothness_se 569 non-null float64
16 compactness_se 569 non-null
float64
17 concavity_se 569 non-null float64
18 concave points_se 569 non-null float64
19 symmetry_se 569 non-null float64
20 fractal_dimension_se 569 non-null
float64
21 radius_worst 569 non-null float64
22 texture_worst 569 non-null float64
23 perimeter_worst 569 non-null float64
24 area_worst 569 non-null
float64
25 smoothness_worst 569 non-null float64
26 compactness_worst 569 non-null float64
27 concavity_worst 569 non-null float64
28 concave points_worst 569 non-null
float64
29 symmetry_worst 569 non-null float64
30 fractal_dimension_worst 569 non-null float64
dtypes: float64(30), object(1) memory usage: 137.9+ KB
In [6]: X=data.iloc[:,2:32].values Y=data.iloc[:,1].values
2/6
In [7]: Y
array([17.99 , 20.57 , 19.69 , 11.42 , 20.29 , 12.45 , 18.25 , 13.71 ,
Out[7]:
13. , 12.46 , 16.02 , 15.78 , 19.17 , 15.85 , 13.73 , 14.54 ,
14.68 , 16.13 , 19.81 , 13.54 , 13.08 , 9.504, 15.34 , 21.16 ,
16.65 , 17.14 , 14.58 , 18.61 , 15.3 , 17.57 , 18.63 , 11.84 ,
17.02 , 19.27 , 16.13 , 16.74 , 14.25 , 13.03 , 14.99 , 13.48 ,
13.44 , 10.95 , 19.07 , 13.28 , 13.17 , 18.65 , 8.196, 13.17 ,
12.05 , 13.49 , 11.76 , 13.64 , 11.94 , 18.22 , 15.1 , 11.52 ,
19.21 , 14.71 , 13.05 , 8.618, 10.17 , 8.598, 14.25 , 9.173,
12.68 , 14.78 , 9.465, 11.31 , 9.029, 12.78 , 18.94 , 8.888, 17.2
, 13.8 , 12.31 , 16.07 , 13.53 , 18.05 , 20.18 , 12.86 , 11.45 ,
13.34 , 25.22 , 19.1 , 12. , 18.46 , 14.48 , 19.02 , 12.36 ,
14.64 , 14.62 , 15.37 , 13.27 , 13.45 , 15.06 , 20.26 , 12.18 ,
9.787, 11.6 , 14.42 , 13.61 , 6.981, 12.18 , 9.876, 10.49 , 13.11
, 11.64 , 12.36 , 22.27 , 11.34 , 9.777, 12.63 , 14.26 , 10.51 ,
8.726, 11.93 , 8.95 , 14.87 , 15.78 , 17.95 , 11.41 , 18.66 , 24.25
, 14.5 , 13.37 , 13.85 , 13.61 , 19. , 15.1 , 19.79 , 12.19 ,
15.46 , 16.16 , 15.71 , 18.45 , 12.77 , 11.71 , 11.43 , 14.95 ,
11.28 , 9.738, 16.11 , 11.43 , 12.9 , 10.75 , 11.9 , 11.8 ,
14.95 , 14.44 , 13.74 , 13. , 8.219,
9.731, 11.15 , 13.15 , 12.25 , 17.68 , 16.84 , 12.06 , 10.9 ,
11.75 , 19.19 , 19.59 , 12.34 , 23.27 , 14.97 , 10.8 , 16.78 ,
17.47 , 14.97 , 12.32 , 13.43 , 15.46 , 11.08 , 10.66 , 8.671,
9.904, 16.46 , 13.01 , 12.81 , 27.22 , 21.09 , 15.7 , 11.41 ,
15.28 , 10.08 , 18.31 , 11.71 , 11.81 , 12.3 , 14.22 , 12.77 ,
9.72 , 12.34 , 14.86 , 12.91 , 13.77 , 18.08 , 19.18 , 14.45 ,
12.23 , 17.54 , 23.29 , 13.81 , 12.47 , 15.12 , 9.876, 17.01 ,
13.11 , 15.27 , 20.58 , 11.84 , 28.11 , 17.42 , 14.19 , 13.86 ,
11.89 , 10.2 , 19.8 , 19.53 , 13.65 , 13.56 , 10.18 , 15.75 ,
13.27 , 14.34 , 10.44 , 15. , 12.62 , 12.83 , 17.05 , 11.32 ,
11.22 , 20.51 , 9.567, 14.03 , 23.21 , 20.48 , 14.22 , 17.46 ,
13.64 , 12.42 , 11.3 , 13.75 , 19.4 , 10.48 , 13.2 , 12.89 ,
10.65 , 11.52 , 20.94 , 11.5 , 19.73 , 17.3 , 19.45 , 13.96 ,
19.55 , 15.32 , 15.66 , 15.53 , 20.31 , 17.35 , 17.29 , 15.61 ,
17.19 , 20.73 , 10.6 , 13.59 , 12.87 , 10.71 , 14.29 , 11.29 ,
21.75 , 9.742, 17.93 , 11.89 , 11.33 , 18.81 , 13.59 , 13.85 ,
19.16 , 11.74 , 19.4 , 16.24 , 12.89 , 12.58 , 11.94 , 12.89 ,
11.26 , 11.37 , 14.41 , 14.96 , 12.95 , 11.85 , 12.72 , 13.77 ,
10.91 , 11.76 , 14.26 , 10.51 , 19.53 , 12.46 , 20.09 , 10.49 ,
11.46 , 11.6 , 13.2 , 9. , 13.5 , 13.05 , 11.7 , 14.61 ,
12.76 , 11.54 , 8.597, 12.49 , 12.18 , 18.22 , 9.042, 12.43 ,
10.25 , 20.16 , 12.86 , 20.34 , 12.2 , 12.67 , 14.11 , 12.03 ,
16.27 , 16.26 , 16.03 , 12.98 , 11.22 , 11.25 , 12.3 , 17.06 ,
12.99 , 18.77 , 10.05 , 23.51 , 14.42 , 9.606, 11.06 , 19.68 ,
11.71 , 10.26 , 12.06 , 14.76 , 11.47 , 11.95 , 11.66 , 15.75 ,
25.73 , 15.08 , 11.14 , 12.56 , 13.05 , 13.87 , 8.878, 9.436,
12.54 , 13.3 , 12.76 , 16.5 , 13.4 , 20.44 , 20.2 , 12.21 ,
21.71 , 22.01 , 16.35 , 15.19 , 21.37 , 20.64 , 13.69 , 16.17 ,
10.57 , 13.46 , 13.66 , 11.08 , 11.27 , 11.04 , 12.05 , 12.39 ,
13.28 , 14.6 , 12.21 , 13.88 , 11.27 , 19.55 , 10.26 , 8.734,
15.49 , 21.61 , 12.1 , 14.06 , 13.51 , 12.8 , 11.06 , 11.8 ,
17.91 , 11.93 , 12.96 , 12.94 , 12.34 , 10.94 , 16.14 , 12.85 ,
17.99 , 12.27 , 11.36 , 11.04 , 9.397, 14.99 , 15.13 , 11.89 ,
9.405, 15.5 , 12.7 , 11.16 , 11.57 , 14.69 , 11.61 , 13.66 ,
9.742, 10.03 , 10.48 , 10.8 , 11.13 , 12.72 , 14.9 , 12.4 ,
20.18 , 18.82 , 14.86 , 13.98 , 12.87 , 14.04 , 13.85 , 14.02 ,
10.97 , 17.27 , 13.78 , 10.57 , 18.03 , 11.99 , 17.75 , 14.8 ,
14.53 , 21.1 , 11.87 , 19.59 , 12. , 14.53 , 12.62 , 13.38 ,
11.63 , 13.21 , 13. , 9.755, 17.08 , 27.42 , 14.4 , 11.6 ,
13.17 , 13.24 , 13.14 , 9.668, 17.6 , 11.62 , 9.667, 12.04 ,
14.92 , 12.27 , 10.88 , 12.83 , 14.2 , 13.9 , 11.49 , 16.25 ,
12.16 , 13.9 , 13.47 , 13.7 , 15.73 , 12.45 , 14.64 , 19.44 ,
11.68 , 16.69 , 12.25 , 17.85 , 18.01 , 12.46 , 13.16 , 14.87 ,
12.65 , 12.47 , 18.49 , 20.59 , 15.04 , 13.82 , 12.54 , 23.09 ,
9.268, 9.676, 12.22 , 11.06 , 16.3 , 15.46 , 11.74 , 14.81 ,
3/6
13.4 , 14.58 , 15.05 , 11.34 , 18.31 , 19.89 , 12.88 , 12.75 ,
9.295, 24.63 , 11.26 , 13.71 , 9.847, 8.571, 13.46 , 12.34 ,
13.94 , 12.07 , 11.75 , 11.67 , 13.68 , 20.47 , 10.96 , 20.55 ,
14.27 , 11.69 , 7.729, 7.691, 11.54 , 14.47 , 14.74 , 13.21 ,
13.87 , 13.62 , 10.32 , 10.26 , 9.683, 10.82 , 10.86 , 11.13 ,
12.77 , 9.333, 12.88 , 10.29 , 10.16 , 9.423, 14.59 , 11.51 ,
14.05 , 11.2 , 15.22 , 20.92 , 21.56 , 20.13 , 16.6 , 20.6 ,
7.76 ])
In [8]: display(data.isna().sum()) data=data.drop_duplicates()

data=pd.get_dummies(data,drop_first=True)
diagnosis 0
radius_mean 0
texture_mean 0
perimeter_mean 0
area_mean 0
smoothness_mean 0
compactness_mean 0
concavity_mean 0
concave points_mean 0
symmetry_mean 0
fractal_dimension_mean 0
radius_se 0
texture_se 0
perimeter_se 0
area_se 0
smoothness_se 0
compactness_se 0
concavity_se 0
concave points_se 0
symmetry_se 0
fractal_dimension_se 0
radius_worst 0
texture_worst 0
perimeter_worst 0
area_worst 0
smoothness_worst 0
compactness_worst 0
concavity_worst 0
concave points_worst 0
symmetry_worst 0
fractal_dimension_worst 0
dtype: int64
In [9]: from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
sc=StandardScaler() X=sc.fit_transform(X)
y=data["diagnosis_M"]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)
In [10]: from sklearn.ensemble import RandomForestClassifier

forest=RandomForestClassifier()
forest.fit(X_train,y_train)
RandomForestClassifier()
Out[10]:
In [11]: y_pred_1=forest.predict(X_test) print('\nRandom

forest:')
print('Misclassified samples: %d' % (y_test !=y_pred_1).sum())
4/6
from sklearn.metrics import accuracy_score print('Accuracy:
%.2F' % accuracy_score(y_test,y_pred_1))
Random forest:
Misclassified samples: 6
Accuracy: 0.96
In [12]: from sklearn.preprocessing import LabelEncoder

labelencoder = LabelEncoder()
X[:,0]=labelencoder.fit_transform(X[:,0])
In [13]: y_pred=np.zeros(10000)
y_test=np.zeros(10000)
indices1=np.random.randint(0,10000,30000)
y_pred[indices1]=1 y_test[indices2]=1
y_pred[indices3]=1 y_test[indices4]=1
In [14]: np.sum(y_test)
Out[14]: 9974.0
In [15]: np.sum(y_pred)
Out[15]: 9981.0
In [16]: from sklearn.metrics import accuracy_score print('Accuracy
score:',accuracy_score(y_test,y_pred))
Accuracy score: 0.9955
In [17]: from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)
array([[ 0, 26],
Out[17]:
[ 19, 9955]], dtype=int64)
In [18]: from sklearn.metrics import classification_report

print(classification_report(y_test,y_pred))
precision recall f1-score support
0.0 0.00 0.00 0.00 26

1.0 1.00 1.00 1.00 9974
accuracy 1.00 10000

macro avg 0.50 0.50 0.50 10000
weighted avg 0.99 1.00 1.00 10000
In [19]: from sklearn.metrics import precision_recall_curve precision,

recall,thresholds =precision_recall_curve(y_test,y_pred)
In [20]: precision
array([0.9974 , 0.99739505, 1. ])
Out[20]:
In [21]: recall
array([1. , 0.99809505, 0. ])
Out[21]:
5/6
In [22]: thresholds
array([0., 1.])
Out[22]:
6/6

Breast Cancer

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Breast Cancer

Uploaded by

Copyright:

Available Formats

B2_47_Sewank_Nande _Breast_Cancer

In [1]: import pandas as pd import numpy

1 842517 M 20.57 17.77 132.90 1326.0 0.0847

2 84300903 M 19.69 21.25 130.00 1203.0 0.1096

3 84348301 M 11.42 20.38 77.58 386.1 0.1425

4 84358402 M 20.29 14.34 135.10 1297.0 0.1003

In [6]: X=data.iloc[:,2:32].values Y=data.iloc[:,1].values

In [8]: display(data.isna().sum()) data=data.drop_duplicates()

In [9]: from sklearn.model_selection import train_test_split

In [10]: from sklearn.ensemble import RandomForestClassifier

In [11]: y_pred_1=forest.predict(X_test) print('\nRandom

In [12]: from sklearn.preprocessing import LabelEncoder

Accuracy score: 0.9955

In [17]: from sklearn.metrics import confusion_matrix

In [18]: from sklearn.metrics import classification_report

precision recall f1-score support

0.0 0.00 0.00 0.00 26

accuracy 1.00 10000

In [19]: from sklearn.metrics import precision_recall_curve precision,

You might also like