Professional Documents
Culture Documents
2.14 Hierarchical Clusters
2.14 Hierarchical Clusters
2
)fit_predict(X لعمل الفيت و التحويل معا
3
الصيغة العامة
#Import Libraries
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
#----------------------------------------------------
'''
sklearn.cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean’, memory=None, connectivity=None,
compute_full_tree='auto’, linkage=’ward’,pooling_func=’deprecated’)
'''
y_pred_train = AggClusteringModel.fit_predict(X_train)
y_pred_test = AggClusteringModel.fit_predict(X_test)
4
#draw the Hierarchical graph for Training set
dendrogram = sch.dendrogram(sch.linkage(X_train[: 30,:], method = 'ward'))# it can be complete,average,single
plt.title('Training Set')
plt.xlabel('X Values')
plt.ylabel('Distances')
plt.show()
6
مثال
#Import Libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as sch
import matplotlib.pyplot as plt
#----------------------------------------------------
BreastData = load_breast_cancer()
#X Data
X = BreastData.data
#print('X Data is \n' , X[:10])
#print('X shape is ' , X.shape)
#print('X Features are \n' , BreastData.feature_names)
#y Data
y = BreastData.target
7
#print('y Data is \n' , y[:10])
#print('y shape is ' , y.shape)
#print('y Columns are \n' , BreastData.target_names)
#----------------------------------------------------
#Splitting data
#Splitted Data
#print('X_train shape is ' , X_train.shape)
#print('X_test shape is ' , X_test.shape)
#print('y_train shape is ' , y_train.shape)
#print('y_test shape is ' , y_test.shape)
#----------------------------------------------------
#Applying AggClusteringModel Model
'''
#sklearn.cluster.AgglomerativeClustering(n_clusters=2, affinity='euclidean’, memory=None, connectivity=None,
# compute_full_tree='auto’, linkage=’ward’,pooling_func=’deprecated’)
'''
8
AggClusteringModel = AgglomerativeClustering(n_clusters=5,affinity='euclidean',# it can be
l1,l2,manhattan,cosine,precomputed
linkage='ward')# it can be complete,average,single
y_pred_train = AggClusteringModel.fit_predict(X_train)
y_pred_test = AggClusteringModel.fit_predict(X_test)
clustering.labels_
11
مثال
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('data.csv')
X = dataset.iloc[:, [3, 4]].values
13
مثال
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
import numpy as np
X = np.concatenate((x, y),)
X
print(X.shape) # 150 samples with 2 dimensions
plt.scatter(X[:,0], X[:,1])
plt.show()
plt.figure(figsize=(10, 5))
plt.title('HCA Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(Z,leaf_rotation=90,leaf_font_size=12,)
plt.show()
15
مثال
import numpy as np
X = np.array([[5,3],
[10,15],
[15,12],
[24,10],
[30,30],
[85,70],
[71,80],
[60,78],
[70,55],
[80,91],])
plt.figure(figsize=(6, 4))
dendrogram(linked,
orientation='top',
labels=labelList,
distance_sort='descending',
show_leaf_counts=True)
plt.show()
17
مثال
import matplotlib.pyplot as plt
import pandas as pd
customer_data = pd.read_csv('shopping_data.csv')
customer_data.shape
customer_data.head()
data = customer_data.iloc[:, 3:5].values
plt.figure(figsize=(8, 6))
plt.title("Customer Dendograms")
dend = shc.dendrogram(shc.linkage(data, method='ward'))
18