Professional Documents
Culture Documents
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
plt.xlabel('x')
plt.ylabel('y')
plt.plot(X[:, 0], X[:, 1], 'bo', markersize=5)
plt.plot()
plt.show()
Xk = X[labels == k, :]
for i in range(n_cluster):
data = X[labels == i]
plt.plot(data[:, 0], data[:, 1], plt_colors[i] + '^', markersize = 4, label = 'cluster_' + str(i))
plt.plot(centers[i][0], centers[i][1], plt_colors[i+4] + 'o', markersize = 10, label = 'center_' +
str(i))
plt.legend()
plt.show()
def kmeans(init_centes, init_labels, X, n_cluster):
centers = init_centes
labels = init_labels
times = 0
while True:
labels = kmeans_predict_labels(X, centers)
kmeans_visualize(X, centers, labels, n_cluster, 'Assigned label for data at time = ' +
str(times + 1))
new_centers = kmeans_update_centers(X, labels, n_cluster)
if kmeans_has_converged(centers, new_centers):
break
centers = new_centers
kmeans_visualize(X, centers, labels, n_cluster, 'Update center possition at time = ' +
str(times + 1))
times += 1
return (centers, labels, times)
X, y = datasets.load_iris(return_X_y=True)
X = pd.DataFrame(data=X, columns=['Sepal length', 'Sepal width', 'Petal lengt
h', 'Petal width'])
y = pd.Series(y).map({0: 'Setosa', 1: 'Versicolor', 2: 'Virginica'})
X.head()
53
pca = prince.PCA(
n_components=2,
n_iter=3,
rescale_with_mean=True,
rescale_with_std=True,
copy=True,
check_input=True,
engine='auto',
random_state=42
)
pca = pca.fit(X)
pca.transform(X).head()
27
54
22/4 /2 02 2
ax = pca.plot_row_coordinates(
X,
ax=None,
figsize=(6, 6),
x_component=0,
y_component=1,
labels=None,
color_labels=y,
ellipse_outline=False,
ellipse_fill=True,
show_points=True
)
ax.get_figure()
28
56
18/3 /2 02 2
Ghi chú:
Để code cài đặt, có thể đặt nút null có giá trị là 0
21
18/3 /2 02 2
22
18/3 /2 02 2
23
18/3 /2 02 2
24
18/3 /2 02 2
RUN
25
50