You are on page 1of 5

import pandas as pd

import numpy as np
import os
from glob import glob
import random
import matplotlib.pylab as plt
mypaths=[]
for name in glob('../input/breast-histopathology-
images/IDC_regular_ps50_idx5/*',recursive=True):
mypaths.append(name)
print(mypaths[:5])
['../input/breast-histopathology-images/IDC_regular_ps50_idx5/10295',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10304',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12868',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/10274',
'../input/breast-histopathology-images/IDC_regular_ps50_idx5/12818']
mp=mypaths[60:120]
imagePatches=[]
for i in mp:
imagePatches+=glob(i+'/*/*.png', recursive=True)
#mp
print('total no. of images selected from total images is '+str(len(imagePatches)))
#imagePatches[60:90]
total no. of images selected from total images is 63535
class0 = [] # 0 = no cancer
class1 = [] # 1 = cancer

for filename in imagePatches:


if filename.endswith("class0.png"):
class0.append(filename)
else:
class1.append(filename)
print('Among them, '+str(len(class0))+' is clss0 and '+str(len(class1))+' is
class1')
Among them, 44738 is clss0 and 18797 is class1
sampled_class0 = random.sample(class0, len(class1))
sampled_class1 = random.sample(class1, len(class1))
len(sampled_class0)
18797
from matplotlib.image import imread
import cv2

def get_image_arrays(data, label):


img_arrays = []
for i in data:
if i.endswith('.png'):
img = cv2.imread(i ,cv2.IMREAD_COLOR)
img_sized = cv2.resize(img, (50, 50), interpolation=cv2.INTER_LINEAR)
img_re=img_sized/255.0
img_arrays.append([img_re, label])
return img_arrays
class0_array = get_image_arrays(sampled_class0, 0)
class1_array = get_image_arrays(sampled_class1, 1)
print('done')
done
print(len(class0_array))
print(len(class1_array))
18797
18797
plt.imshow(class1_array[10][0])
<matplotlib.image.AxesImage at 0x7fbdf97b6ad0>

combined_data = np.concatenate((class0_array, class1_array))


#random.seed(41)
#random.shuffle(combined_data)
<string>:6: VisibleDeprecationWarning: Creating an ndarray from ragged nested
sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different
lengths or shapes) is deprecated. If you meant to do this, you must specify
'dtype=object' when creating the ndarray
X = []
y = []

for features,label in combined_data:


X.append(features)
y.append(label)
X = np.array(X).reshape(-1, 50, 50, 3)
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,


random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(28195, 50, 50, 3) (9399, 50, 50, 3) (28195, 2) (9399, 2)
import tensorflow as tf
from tensorflow import keras
model = keras.models.Sequential([
keras.layers.Conv2D(filters=100, kernel_size=(3,3), strides=(1,1),
activation='relu', input_shape=(50,50,3)),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(2,2), strides=(2,2)),
keras.layers.Conv2D(filters=256, kernel_size=(2,2), strides=(2,2),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Conv2D(filters=384, kernel_size=(2,2), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=384, kernel_size=(1,1), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=(1,1), strides=(1,1),
activation='relu', padding="same"),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=(3,3), strides=(2,2)),
keras.layers.Flatten(),
keras.layers.Dense(9216,input_shape=(12544,), activation='relu'),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(4096, activation='relu'),
keras.layers.Dropout(0.5),
keras.layers.Dense(2, activation='sigmoid')
])
model.compile(
optimizer=tf.optimizers.Adam(lr=0.000001),
loss='binary_crossentropy',
metrics=['accuracy','Recall','Precision']
)
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 48, 48, 100) 2800
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 100) 400
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 100) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 12, 12, 256) 102656
_________________________________________________________________
batch_normalization_1 (Batch (None, 12, 12, 256) 1024
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 256) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 5, 5, 384) 393600
_________________________________________________________________
batch_normalization_2 (Batch (None, 5, 5, 384) 1536
_________________________________________________________________
conv2d_3 (Conv2D) (None, 5, 5, 384) 147840
_________________________________________________________________
batch_normalization_3 (Batch (None, 5, 5, 384) 1536
_________________________________________________________________
conv2d_4 (Conv2D) (None, 5, 5, 256) 98560
_________________________________________________________________
batch_normalization_4 (Batch (None, 5, 5, 256) 1024
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 2, 2, 256) 0
_________________________________________________________________
flatten (Flatten) (None, 1024) 0
_________________________________________________________________
dense (Dense) (None, 9216) 9446400
_________________________________________________________________
dense_1 (Dense) (None, 4096) 37752832
_________________________________________________________________
dropout (Dropout) (None, 4096) 0
_________________________________________________________________
dense_2 (Dense) (None, 4096) 16781312
_________________________________________________________________
dropout_1 (Dropout) (None, 4096) 0
_________________________________________________________________
dense_3 (Dense) (None, 2) 8194
=================================================================
Total params: 64,739,714
Trainable params: 64,736,954
Non-trainable params: 2,760
_________________________________________________________________
history=model.fit(X_train,y_train, validation_data=(X_test, y_test),epochs=10)
Epoch 1/10
882/882 [==============================] - 34s 32ms/step - loss: 0.5803 - accuracy:
0.7263 - recall: 0.7124 - precision: 0.6990 - val_loss: 0.3933 - val_accuracy:
0.8317 - val_recall: 0.8215 - val_precision: 0.8398
Epoch 2/10
882/882 [==============================] - 27s 31ms/step - loss: 0.4179 - accuracy:
0.8272 - recall: 0.8228 - precision: 0.8213 - val_loss: 0.3665 - val_accuracy:
0.8424 - val_recall: 0.8302 - val_precision: 0.8506
Epoch 3/10
882/882 [==============================] - 27s 31ms/step - loss: 0.4048 - accuracy:
0.8338 - recall: 0.8275 - precision: 0.8275 - val_loss: 0.3594 - val_accuracy:
0.8437 - val_recall: 0.8360 - val_precision: 0.8513
Epoch 4/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3836 - accuracy:
0.8390 - recall: 0.8349 - precision: 0.8369 - val_loss: 0.3518 - val_accuracy:
0.8490 - val_recall: 0.8406 - val_precision: 0.8542
Epoch 5/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3745 - accuracy:
0.8478 - recall: 0.8459 - precision: 0.8421 - val_loss: 0.3468 - val_accuracy:
0.8520 - val_recall: 0.8432 - val_precision: 0.8571
Epoch 6/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3636 - accuracy:
0.8481 - recall: 0.8467 - precision: 0.8479 - val_loss: 0.3427 - val_accuracy:
0.8536 - val_recall: 0.8443 - val_precision: 0.8605
Epoch 7/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3530 - accuracy:
0.8562 - recall: 0.8535 - precision: 0.8526 - val_loss: 0.3385 - val_accuracy:
0.8553 - val_recall: 0.8472 - val_precision: 0.8613
Epoch 8/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3550 - accuracy:
0.8516 - recall: 0.8502 - precision: 0.8496 - val_loss: 0.3394 - val_accuracy:
0.8559 - val_recall: 0.8476 - val_precision: 0.8601
Epoch 9/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3488 - accuracy:
0.8564 - recall: 0.8554 - precision: 0.8538 - val_loss: 0.3362 - val_accuracy:
0.8581 - val_recall: 0.8514 - val_precision: 0.8619
Epoch 10/10
882/882 [==============================] - 27s 31ms/step - loss: 0.3430 - accuracy:
0.8579 - recall: 0.8562 - precision: 0.8552 - val_loss: 0.3334 - val_accuracy:
0.8605 - val_recall: 0.8532 - val_precision: 0.8642
e=model.evaluate(X_test,y_test)
294/294 [==============================] - 5s 16ms/step - loss: 0.3334 - accuracy:
0.8605 - recall: 0.8532 - precision: 0.8642
print(e[0])
0.33342963457107544
import matplotlib.pyplot as plt
#plotting the Accuracy of test and training sets
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#plotting the loss of test and training sets


plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

y_pred=model.predict(X_test)
y_pred[21]
array([0.60488063, 0.39073038], dtype=float32)
y_test[21]
array([0., 1.], dtype=float32)
Y_pred=[]
for i in y_pred:
if i[0]>i[1]:
Y_pred.append(0)
else:
Y_pred.append(1)
Y_test=[]
for i in y_test:
if i[0]>i[1]:
Y_test.append(0)
else:
Y_test.append(1)
Y_pred[0]
1
Y_test[0]
1
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')
print(confusion_matrix(Y_test, Y_pred))
print('Classification Report')
print(classification_report(Y_test, Y_pred, target_names=['Negative','Positive']))
Confusion Matrix
[[4134 591]
[ 720 3954]]
Classification Report
precision recall f1-score support

Negative 0.85 0.87 0.86 4725


Positive 0.87 0.85 0.86 4674

accuracy 0.86 9399


macro avg 0.86 0.86 0.86 9399
weighted avg 0.86 0.86 0.86 9399

model.save('/s/modelcnn.h5')
confusionmatrix=confusion_matrix(Y_test, Y_pred)
confusionmatrix.shape
(2, 2)
classes=[0,1]
con_mat_df = pd.DataFrame(confusion_matrix(Y_test, Y_pred),
index = classes,
columns = classes)
con_mat_df
0 1
0 4134 591
1 720 3954
import seaborn as sns
figure = plt.figure(figsize=(6, 6))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.cool,fmt='d')
plt.tight_layout()
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

You might also like