You are on page 1of 18

SKlearn ‫ مكتبة‬:‫القسم العاشر‬

A. Data Preparation 12. Naïve Bayes


1. Data files from SKlearn 13. LDA , QDA
2. Data cleaning 14. Hierarchical Clusters
3. Metrics module 15. DbScan
4. Feature Selection 16. NLP
5. Data Scaling 17. Apriori
6. Data Split
C. Algorithm Evaluation :
B. ML Algorithms 1. Model Check
1. Linear Regression 2. Grid Search
2. Logistic Regression 3. Pipeline
3. Neural Network 4. Model Save
4. SVR
5. SVC D. Time Series
6. K-means
7. PCA
8. Decision Tree
9. Ensemble Regression
10. Ensemble Classifier
11. K Nearest Neighbors
1
‫‪2.14) Hierarchical Clusters‬‬
‫‪ ‬استخدام التقسيم الهرمي في عمل االقسام في ‪unsupervised ML‬‬

‫‪ ‬يتم استخدامها عبر الموديول ‪cluster.AgglomerativeClustering‬‬

‫‪ ‬الـ ‪ parameters‬المستخدمة في الموديل ‪:‬‬


‫‪‬‬ ‫‪n_clusters‬‬ ‫عدد العناقيد‬
‫‪‬‬ ‫‪affinity‬‬ ‫‪ : euclidean ,l1 ,l2 ,manhattan ,cosine ,precomputed‬طريقة حساب المسافة بين النقاط ‪ ,‬وتكون‬
‫‪‬‬ ‫‪linkage‬‬ ‫‪ : ward , complete , average , single‬اسلوب الحل و تكون بين‬
‫‪‬‬ ‫‪tol‬‬ ‫السماحية المسموح بها‬
‫‪‬‬ ‫‪n_jobs‬‬ ‫عدد المهام التي يتم تنفيذها بالتوازي‬

‫‪ ‬الـ ‪ methods‬المستخدمة مع الموديل ‪:‬‬

‫‪‬‬ ‫)‪fit(X‬‬ ‫لعمل الفيت‬

‫‪2‬‬
‫‪‬‬ ‫)‪fit_predict(X‬‬ ‫لعمل الفيت و التحويل معا‬

‫‪3‬‬
‫الصيغة العامة‬
#Import Libraries
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
#----------------------------------------------------

#Applying AggClusteringModel Model

'''
sklearn.cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean’, memory=None, connectivity=None,
compute_full_tree='auto’, linkage=’ward’,pooling_func=’deprecated’)
'''

AggClusteringModel = AgglomerativeClustering(n_clusters=5,affinity='euclidean',# it can be


l1,l2,manhattan,cosine,precomputed
linkage='ward')# it can be complete,average,single

y_pred_train = AggClusteringModel.fit_predict(X_train)
y_pred_test = AggClusteringModel.fit_predict(X_test)

4
#draw the Hierarchical graph for Training set
dendrogram = sch.dendrogram(sch.linkage(X_train[: 30,:], method = 'ward'))# it can be complete,average,single
plt.title('Training Set')
plt.xlabel('X Values')
plt.ylabel('Distances')
plt.show()

#draw the Hierarchical graph for Test set


dendrogram = sch.dendrogram(sch.linkage(X_test[: 30,:], method = 'ward'))# it can be complete,average,single
plt.title('Test Set')
plt.xlabel('X Value')
plt.ylabel('Distances')
plt.show()

#draw the Scatter for Train set


plt.scatter(X_train[y_pred_train == 0, 0], X_train[y_pred_train == 0, 1], s = 10, c = 'red', label = 'Cluster 1')
plt.scatter(X_train[y_pred_train == 1, 0], X_train[y_pred_train == 1, 1], s = 10, c = 'blue', label = 'Cluster 2')
plt.scatter(X_train[y_pred_train == 2, 0], X_train[y_pred_train == 2, 1], s = 10, c = 'green', label = 'Cluster 3')
plt.scatter(X_train[y_pred_train == 3, 0], X_train[y_pred_train == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4')
plt.scatter(X_train[y_pred_train == 4, 0], X_train[y_pred_train == 4, 1], s = 10, c = 'magenta', label = 'Cluster 5')
plt.title('Training Set')
plt.xlabel('X Value')
5
plt.ylabel('y Value')
plt.legend()
plt.show()

#draw the Scatter for Test set


plt.scatter(X_test[y_pred_test == 0, 0], X_test[y_pred_test == 0, 1], s = 10, c = 'red', label = 'Cluster 1')
plt.scatter(X_test[y_pred_test == 1, 0], X_test[y_pred_test == 1, 1], s = 10, c = 'blue', label = 'Cluster 2')
plt.scatter(X_test[y_pred_test == 2, 0], X_test[y_pred_test == 2, 1], s = 10, c = 'green', label = 'Cluster 3')
plt.scatter(X_test[y_pred_test == 3, 0], X_test[y_pred_test == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4')
plt.scatter(X_test[y_pred_test == 4, 0], X_test[y_pred_test == 4, 1], s = 10, c = 'magenta', label = 'Cluster 5')
plt.title('Testing Set')
plt.xlabel('X Value')
plt.ylabel('y Value')
plt.legend()
plt.show()

6
‫مثال‬
#Import Libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
#----------------------------------------------------

#load breast cancer data

BreastData = load_breast_cancer()

#X Data
X = BreastData.data
#print('X Data is \n' , X[:10])
#print('X shape is ' , X.shape)
#print('X Features are \n' , BreastData.feature_names)

#y Data
y = BreastData.target
7
#print('y Data is \n' , y[:10])
#print('y shape is ' , y.shape)
#print('y Columns are \n' , BreastData.target_names)

#----------------------------------------------------
#Splitting data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

#Splitted Data
#print('X_train shape is ' , X_train.shape)
#print('X_test shape is ' , X_test.shape)
#print('y_train shape is ' , y_train.shape)
#print('y_test shape is ' , y_test.shape)

#----------------------------------------------------
#Applying AggClusteringModel Model
'''
#sklearn.cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean’, memory=None, connectivity=None,
# compute_full_tree='auto’, linkage=’ward’,pooling_func=’deprecated’)
'''

8
AggClusteringModel = AgglomerativeClustering(n_clusters=5,affinity='euclidean',# it can be
l1,l2,manhattan,cosine,precomputed
linkage='ward')# it can be complete,average,single

y_pred_train = AggClusteringModel.fit_predict(X_train)
y_pred_test = AggClusteringModel.fit_predict(X_test)

#draw the Hierarchical graph for Training set


dendrogram = sch.dendrogram(sch.linkage(X_train[: 30,:], method = 'ward'))# it can be complete,average,single
plt.title('Training Set')
plt.xlabel('X Values')
plt.ylabel('Distances')
plt.show()

#draw the Hierarchical graph for Test set


dendrogram = sch.dendrogram(sch.linkage(X_test[: 30,:], method = 'ward'))# it can be complete,average,single
plt.title('Test Set')
plt.xlabel('X Value')
plt.ylabel('Distances')
plt.show()

#draw the Scatter for Train set


9
plt.scatter(X_train[y_pred_train == 0, 0], X_train[y_pred_train == 0, 1], s = 10, c = 'red', label = 'Cluster 1')
plt.scatter(X_train[y_pred_train == 1, 0], X_train[y_pred_train == 1, 1], s = 10, c = 'blue', label = 'Cluster 2')
plt.scatter(X_train[y_pred_train == 2, 0], X_train[y_pred_train == 2, 1], s = 10, c = 'green', label = 'Cluster 3')
plt.scatter(X_train[y_pred_train == 3, 0], X_train[y_pred_train == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4')
plt.scatter(X_train[y_pred_train == 4, 0], X_train[y_pred_train == 4, 1], s = 10, c = 'magenta', label = 'Cluster 5')
plt.title('Training Set')
plt.xlabel('X Value')
plt.ylabel('y Value')
plt.legend()
plt.show()

#draw the Scatter for Test set


plt.scatter(X_test[y_pred_test == 0, 0], X_test[y_pred_test == 0, 1], s = 10, c = 'red', label = 'Cluster 1')
plt.scatter(X_test[y_pred_test == 1, 0], X_test[y_pred_test == 1, 1], s = 10, c = 'blue', label = 'Cluster 2')
plt.scatter(X_test[y_pred_test == 2, 0], X_test[y_pred_test == 2, 1], s = 10, c = 'green', label = 'Cluster 3')
plt.scatter(X_test[y_pred_test == 3, 0], X_test[y_pred_test == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4')
plt.scatter(X_test[y_pred_test == 4, 0], X_test[y_pred_test == 4, 1], s = 10, c = 'magenta', label = 'Cluster 5')
plt.title('Testing Set')
plt.xlabel('X Value')
plt.ylabel('y Value')
plt.legend()
plt.show()
10
‫مثال‬
from sklearn.cluster import AgglomerativeClustering
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],[4, 2], [4, 4], [4, 0]])
clustering = AgglomerativeClustering()
clustering.fit(X)

clustering.labels_

11
‫مثال‬
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('data.csv')
X = dataset.iloc[:, [3, 4]].values

import scipy.cluster.hierarchy as sch


dendrogram = sch.dendrogram(sch.linkage(X[:100,:], method = 'ward'))
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()

from sklearn.cluster import AgglomerativeClustering


hc = AgglomerativeClustering(n_clusters = 10, affinity = 'euclidean', linkage = 'ward')
y_hc = hc.fit_predict(X)

plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s = 10, c = 'red', label = 'Cluster 1')


12
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s = 10, c = 'blue', label = 'Cluster 2')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s = 10, c = 'green', label = 'Cluster 3')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s = 10, c = 'cyan', label = 'Cluster 4')
plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s = 10, c = 'magenta', label = 'Cluster 5')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()

13
‫مثال‬
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import numpy as np

# generating two clusters: x with 10 points and y with 20:


#np.random.seed(1234)
x = np.random.multivariate_normal([10, 0], [[3, 1], [1, 4]], size=[10,])
y = np.random.multivariate_normal([0, 20], [[3, 1], [1, 4]], size=[20,])
x
y

X = np.concatenate((x, y),)
X
print(X.shape) # 150 samples with 2 dimensions
plt.scatter(X[:,0], X[:,1])
plt.show()

# generate the linkage matrix


14
Z = linkage(X, 'ward')
#print(Z)

from scipy.cluster.hierarchy import cophenet


from scipy.spatial.distance import pdist
coph_dists = cophenet(Z, pdist(X))
#coph_dists

plt.figure(figsize=(10, 5))
plt.title('HCA Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(Z,leaf_rotation=90,leaf_font_size=12,)
plt.show()

15
‫مثال‬
import numpy as np

X = np.array([[5,3],
[10,15],
[15,12],
[24,10],
[30,30],
[85,70],
[71,80],
[60,78],
[70,55],
[80,91],])

import matplotlib.pyplot as plt

labels = range(1, 11)


plt.figure(figsize=(6, 4))
plt.subplots_adjust(bottom=0.1)
plt.scatter(X[:,0],X[:,1], label='True Position')
16
for label, x, y in zip(labels, X[:, 0], X[:, 1]):
plt.annotate(
label,
xy=(x, y), xytext=(-3, 3),
textcoords='offset points', ha='right', va='bottom')
plt.show()

from scipy.cluster.hierarchy import dendrogram, linkage


from matplotlib import pyplot as plt

linked = linkage(X, 'single')

labelList = range(1, 11)

plt.figure(figsize=(6, 4))
dendrogram(linked,
orientation='top',
labels=labelList,
distance_sort='descending',
show_leaf_counts=True)
plt.show()
17
‫مثال‬
import matplotlib.pyplot as plt
import pandas as pd

customer_data = pd.read_csv('shopping_data.csv')
customer_data.shape
customer_data.head()
data = customer_data.iloc[:, 3:5].values

import scipy.cluster.hierarchy as shc

plt.figure(figsize=(8, 6))
plt.title("Customer Dendograms")
dend = shc.dendrogram(shc.linkage(data, method='ward'))

from sklearn.cluster import AgglomerativeClustering

cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')


cluster.fit_predict(data)

18

You might also like