You are on page 1of 3

Untitled23 - Jupyter Notebook http://localhost:8888/notebooks/Untitled23.ipynb?

kernel_name=python3

In [1]: import pandas as pd


import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

data = pd.read_excel(r"C:\Users\Admin\Desktop\Datasets\Datasets\Prediction_diabetes.xl

X = data[['age', 'hypertension', 'heart_disease', 'bmi', 'HbA1c_level', 'blood_glucose

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

def calculate_wcss(data):
wcss = []
for n_clusters in range(1, 11):
kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=
kmeans.fit(data)
wcss.append(kmeans.inertia_)
return wcss

def plot_elbow(wcss):
plt.figure(figsize=(8, 6))
plt.plot(range(1, 11), wcss, marker='o', linestyle='--')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('WCSS')
plt.xticks(np.arange(1, 11, 1))
plt.grid(True)
plt.show()

def perform_kmeans(data, n_clusters):


kmeans = KMeans(n_clusters=n_clusters, init='k-means++', random_state=42)
kmeans.fit(data)
labels = kmeans.labels_
silhouette_avg = silhouette_score(data, labels)
print(f"Silhouette Score for {n_clusters} clusters: {silhouette_avg}")
return kmeans, labels

wcss = calculate_wcss(X_scaled)
plot_elbow(wcss)

optimal_clusters = 3

kmeans, labels = perform_kmeans(X_scaled, optimal_clusters)

1 of 5 05-04-2024, 15:46
Untitled23 - Jupyter Notebook http://localhost:8888/notebooks/Untitled23.ipynb?kernel_name=python3

data['Cluster'] = labels

print(data.head())

C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning

2 of 5 05-04-2024, 15:46
Untitled23 - Jupyter Notebook http://localhost:8888/notebooks/Untitled23.ipynb?kernel_name=python3

C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:870: Fu
tureWarning: The default value of `n_init` will change from 10 to 'auto' in 1
.4. Set the value of `n_init` explicitly to suppress the warning
warnings.warn(
C:\ProgramData\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:1382: U
serWarning: KMeans is known to have a memory leak on Windows with MKL, when t
here are less chunks than available threads. You can avoid it by setting the
environment variable OMP_NUM_THREADS=2.
warnings.warn(

Silhouette Score for 3 clusters: 0.47305417442149705


gender age hypertension heart_disease smoking_history bmi \
0 Male 41.0 0 0 former 27.32
1 Male 68.0 0 0 No Info 27.32
2 Female 6.0 0 0 No Info 18.99
3 Female 80.0 0 0 former 31.31
4 Male 57.0 1 0 never 27.32

HbA1c_level blood_glucose_level diabetes Cluster


0 6.6 158 0 1
1 4.8 90 0 1
2 6.0 130 0 1
3 3.5 126 0 1
4 6.5 220 1 0

In [ ]:

4 of 5 05-04-2024, 15:46

You might also like