import java.util.

ArrayList; public class KMeans_Ex4a { private static final int NUM_CLUSTERS = 2; private static final int TOTAL_DATA = 7;

// Total clusters. // Total data points.

private static final double SAMPLES[][] = new double[][] {{1.0, 1.0}, {1.5, 2.0}, {3.0, 4.0}, {5.0, 7.0}, {3.5, 5.0}, {4.5, 5.0}, {3.5, 4.5}}; private static ArrayList<Data> dataSet = new ArrayList<Data>(); private static ArrayList<Centroid> centroids = new ArrayList<Centroid>(); private static void initialize() { System.out.println("Centroids initialized at:"); centroids.add(new Centroid(1.0, 1.0)); // lowest set. centroids.add(new Centroid(5.0, 7.0)); // highest set. System.out.println(" (" + centroids.get(0).X() + ", " + centroids.ge t(0).Y() + ")"); System.out.println(" (" + centroids.get(1).X() + ", " + centroids.ge t(1).Y() + ")"); System.out.print("\n"); return; } private static void kMeanCluster() { final double bigNumber = Math.pow(10, 10); sure to be larger than our data range. double minimum = bigNumber; at. double distance = 0.0; ue. int sampleNumber = 0; int cluster = 0; boolean isStillMoving = true; Data newData = null; one. while(dataSet.size() < TOTAL_DATA) { newData = new Data(SAMPLES[sampleNumber][0], SAMPLES[sampleNumber][1 ]); dataSet.add(newData); minimum = bigNumber; for(int i = 0; i < NUM_CLUSTERS; i++) { distance = dist(newData, centroids.get(i)); if(distance < minimum){ minimum = distance; cluster = i; }

// some big number that's // The minimum value to be // The current minimum val

// Add in new data, one at a time, recalculating centroids with each new

Y().size(). keep shifting centroids until equilibrium occurs.get(j)). int totalY = 0. centroids. while(isStillMoving) { // calculate new centroids.Y(totalY / totalInCluster). j < dataSet. i < NUM_CLUSTERS. j++) { distance = dist(tempData.get(i). } } if(totalInCluster > 0){ centroids. for(int i = 0. for(int i = 0. centroids. minimum = bigNumber. for(int i = 0.cluster() == i){ totalX += dataSet.X(totalX / totalInCluster). i < NUM_CLUSTERS.X(). i++) { int totalX = 0. } } if(totalInCluster > 0){ centroids.size().get(j).get(i).get(j). centroids.X(). j < NUM_CLUSTERS. totalY += dataSet. i++) { int totalX = 0. } } sampleNumber++.get(j). j++) { if(dataSet. totalInCluster++.cluster() == i){ totalX += dataSet.get(i). for(int j = 0. j < dataSet. i < dataSet. for(int j = 0.get(j). .get(j). int totalInCluster = 0. int totalY = 0. } } // Assign all data to the new centroids isStillMoving = false. totalY += dataSet. // calculate new centroids.X(totalX / totalInCluster).size(). j++) { if(dataSet. if(distance < minimum){ minimum = distance. i++) { Data tempData = dataSet.get(i).get(i). int totalInCluster = 0.cluster(cluster).get(j). } // Now. for(int j = 0.} newData. totalInCluster++.Y().Y(totalY / totalInCluster).

public Data() { return. this. 2) + Math. * @param c . } private static class Data { private double mX = 0. } /** * // Calculate Euclidean distance.mX = x.d.mX.cluster() != cluster){ tempData.X()) . } } tempData. 2)). if(tempData. * @param d . } public Data(double x.Y() . private int mCluster = 0. return. */ private static double dist(Data d. double y) { this. } public void Y(double y) { this.pow((c.mY = y. } public void X(double x) { this. return.Y(y). isStillMoving = true.Data object.sqrt(Math.cluster = j.pow((c. return. } } } return. private double mY = 0.Centroid object.cluster(cluster).X(x).double value. * @return .cluster(cluster).d.Y()). } public double X() { return this. } . Centroid c) { return Math.X() .

} public void cluster(int clusterNumber) { this. } public void X(double newX) { this.mCluster = clusterNumber. } } public static void main(String[] args) { initialize(). return. double newY) { this. } public double X() { return this. return.mY = newY. return. } public void Y(double newY) { this.mY. } public double Y() { return this. this.0.mCluster.mX.0. return. public Centroid() { return. } } private static class Centroid { private double mX = 0. } public Centroid(double newX.public double Y() { return this.mY = newY. . } public int cluster() { return this.mX = newX.mY.mX = newX. private double mY = 0.

return.out.out. j++) { if(dataSet.println("Centroids finalized at:"). } } . for(int j = 0. " + da taSet.get(i). for(int i = 0.X() + ".out. System. // Print out clustering results. j < TOTAL_DATA.println(" (" + dataSet. i++) { System. } System.println(" (" + centroids.Y() + ")").get(i).out.out.out.println().get(j).get(j). " + centroid s.X() + ". } // i // Print out centroid results.println("Cluster " + i + " includes:").cluster() == i){ System. i < NUM_CLUSTERS.get(j).kMeanCluster(). for(int i = 0. i < NUM_CLUSTERS.print("\n"). i++) { System.Y()). } } // j System.