You are on page 1of 1

#Store and read the data

import pandas as pd
location_df = pd.read_csv( "LogLatData.csv" )
location_df

#plot the data


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
sn.lmplot( "latitude", "longitude", data=location_df, fit_reg = False, size = 4 );
plt.title( "Longitude and Latitude data of locations");

#Selecting the features


new_location_df = location_df[["latitude", "longitude"]]
new_location_df

#K-means Clustering
from sklearn.cluster import KMeans
clusters_new = KMeans( 3)
clusters_new.fit( new_location_df )
new_location_df["clusterid"] = clusters_new.labels_
new_location_df

#Plot the clusters


import seaborn as sn
markers = ['+','^','.']
sn.lmplot( "latitude", "longitude",data=new_location_df,hue = "clusterid",
fit_reg=False,markers = markers, size = 4 );

#Centroid of the clusters


centers = np.array(clusters_new.cluster_centers_)
centers

#Determining number of clusters


import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
cluster_range = range( 1, 10 )
cluster_errors = []
for num_clusters in cluster_range:
clusters = KMeans( num_clusters )
clusters.fit( new_location_df )
cluster_errors.append( clusters.inertia_ )
plt.figure(figsize=(6,4))
plt.plot( cluster_range, cluster_errors, marker = "o" )
plt.title('Elbow Diagram')
plt.xlabel('Number of Clusters')
plt.ylabel('Sum of Squares Error');

#Exporting the output to Excel


new_location_df.to_excel(r'C:\Users\Admin\Desktop\aftercluster.xlsx')

You might also like