/** * Run the algorithm * * @param input a list of items to be clustered number of clusters. * @return a list of clusters */ public List<Cluster> runAlgorithm(List<ItemValued> input) { // Create a list of clusters List<Cluster> clusters = new ArrayList<Cluster>(k); // If onely 1 item if (input.size() == 1) { // create a cluster with that item ItemValued item = input.get(0); Cluster cluster = new Cluster(item); cluster.addItem(item); clusters.add(cluster); // return that cluster return clusters; } // (1) Randomly generate k empty clusters with a random average (cluster // center) // (1.1) Find the smallest value and largest value double higher = input.get(0).getId(); double lower = input.get(0).getId(); // for each item for (ItemValued item : input) { // if the largest item until now, remember it if (item.getValue() > higher) { higher = item.getValue(); } // if the smallest item until now, remember it if (item.getValue() < lower) { lower = item.getValue(); } } // If all items have the same values, we return only one // cluster. if (higher == lower) { // Create a cluster with all items and return it Cluster cluster = new Cluster(input); clusters.add(cluster); return clusters; } // (1.2) Generate the k empty clusters with a random average // between the smallest and largest values. for (int i = 0; i < k; i++) { // generate random average double average = random.nextInt((int) (higher - lower)) + lower; // create the cluster Cluster cluster = new Cluster(average); clusters.add(cluster); } // (2) Repeat the two next steps until the assignment hasn't changed boolean changed; do { changed = false; // (2.1) Assign each point to the nearest cluster center. // / for each item for (ItemValued item : input) { // find the nearest cluster and the cluster containing the item Cluster nearestCluster = null; Cluster containingCluster = null; double distanceToNearestCluster = Double.MAX_VALUE; // for each cluster for (Cluster cluster : clusters) { // calculate the distance to the current item double distance = averageDistance(cluster, item); // if the smallest distance until now, remember // that cluster if (distance < distanceToNearestCluster) { nearestCluster = cluster; distanceToNearestCluster = distance; } // if the cluster contains that item, // then note that this is the cluster // containing the item. if (cluster.containsItem(item)) { containingCluster = cluster; } } // if the closest cluster to the current item // is not the cluster containing the item if (containingCluster != nearestCluster) { // if the item is in a cluster if (containingCluster != null) { // remove item from the cluster removeItem(containingCluster.getItems(), item); } // add the item to the nearest cluster nearestCluster.addItem(item); changed = true; } } // (2.2) For each cluster, recompute the new cluster average for (Cluster cluster : clusters) { cluster.recomputeClusterAverage(); } } while (changed); // Computer min and max for all clusters for (Cluster cluster : clusters) { cluster.computeHigherAndLower(); } // return the set of clusters return clusters; }
/** * Calculate the distance between the average of a cluster and a given item * * @param cluster1 the cluster * @param item the item * @return the distance as a double */ private double averageDistance(Cluster cluster1, ItemValued item) { return Math.abs(cluster1.getaverage() - item.getValue()); }