public void recalculateClusterDistances(List<ClusterDendogram> clusters, Cluster.distType type) { for (int clustOne = 0; clustOne < clusters.size(); clustOne++) { for (int clustTwo = clustOne + 1; clustTwo < clusters.size(); clustTwo++) { Cluster cluster_A = clusters.get(clustOne).getCluster(); Cluster cluster_B = clusters.get(clustTwo).getCluster(); cluster_A.recalculateDistanceTo(cluster_B, type, mUpperThreshold, mLowerThreshold); cluster_B.recalculateDistanceTo(cluster_A, type, mUpperThreshold, mLowerThreshold); } } }
/* * move clusters to new cluster list, when one of the clusters that will be merged * is found, merge it with the other cluster, then add to new cluster list * only do this for one cluster to be merged to avoid duplicates */ private List<ClusterDendogram> combineClusters( List<ClusterDendogram> clusters, Point minNdx, double correlation, Cluster.distType type) { // System.out.printf("minNdx for cluster combining is <%f, %f>\n", minNdx.getX(), // minNdx.getY()); ArrayList<ClusterDendogram> newClusters = new ArrayList<ClusterDendogram>(); // ArrayList<Dendogram> newDendogram = new ArrayList<Dendogram>(); for (int clusterNdx = 0; clusterNdx < clusters.size(); clusterNdx++) { if (clusterNdx != (int) minNdx.getX() && clusterNdx != (int) minNdx.getY()) { newClusters.add(clusters.get(clusterNdx)); // newDendogram.add(dendogram.get(clusterNdx)); } else if (clusterNdx == (int) minNdx.getX()) { // using minNdx for cluster one for consistency and readability // System.out.printf("minNdx X: %d minNdx Y: %d clustersLength: %d", (int) minNdx.getX(), // (int) minNdx.getY(), clusters.size()); Cluster clusterOne = clusters.get((int) minNdx.getX()).getCluster(); Cluster clusterTwo = clusters.get((int) minNdx.getY()).getCluster(); System.out.printf( "combining clusters:\n===\n\n%s\n and \n%s\nwith maxSimiliarty: %.03f\n\n===", clusterOne, clusterTwo, correlation); Cluster combinedCluster = new Cluster(clusterOne.unionWith(clusterTwo)); // using minNdx for dendogram one for consistency and readability Dendogram leftDend = clusters.get((int) minNdx.getX()).getDendogram(); Dendogram rightDend = clusters.get((int) minNdx.getY()).getDendogram(); // Dendogram newDendogram = new DendogramNode(clusterOne.actualDistance(clusterTwo, type), // leftDend, rightDend); Dendogram newDendogram = new DendogramNode(clusterOne.corrDistance(clusterTwo, type), leftDend, rightDend); newClusters.add(new ClusterDendogram(combinedCluster, newDendogram)); } } // make the new cluster set into the current cluster set for next iteration return newClusters; // clusterer.dendogram = newDendogram; }
// TODO check clusters vs dailyClusters, there's a problem in this method private List<ClusterDendogram> clusterToDate( List<ClusterDendogram> clusters, List<ClusterDendogram> dailyClusters, Cluster.distType type) { // System.out.printf("Clustering clusters (%d) with new day's clusters (%d)\n", clusters.size(), // dailyClusters.size()); // outer for loop loops over clusters in a day // inner for loop loops over clusters built up to the current day /* * clustering between days uses just correlations */ // System.out.printf("\n***clustering to date***\n"); for (ClusterDendogram newClusterDend : dailyClusters) { Cluster newCluster = newClusterDend.getCluster(); // double minDist = Double.MAX_VALUE; double maxSimilarity = 0; int closeClusterNdx = -1; for (int clustNdx = 0; clustNdx < clusters.size(); clustNdx++) { Cluster currClust = clusters.get(clustNdx).getCluster(); double clustDist = newCluster.corrDistance(currClust, type); /* System.out.printf("\n\nnewCluster: \n\t%s\n\ncurrClust:\n\t%s\n\n", newCluster, currClust); System.out.printf("clustDist: %.03f\n", clustDist); */ // if (clustDist < minDist && clustDist < mLowerThreshold) { // System.out.println("cluster to date ward's distance: " + clustDist); // if (clustDist < minDist && clustDist >= mUpperThreshold) { /* System.out.printf("is %.03f > %.03f? %s\n\n", clustDist, maxSimilarity, (clustDist > maxSimilarity)); System.out.printf("mThreshold = %.03f\n", mThresholding); */ // if (clustDist > maxSimilarity && clustDist > mThresholding) { if (mClusterPreference.equals("structure")) { if (clustDist > maxSimilarity && !newCluster.isDifferent(currClust)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusterNdx = clustNdx; } } else if (mClusterPreference.equals("similarity")) { if (clustDist > maxSimilarity && newCluster.isSimilar(currClust)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusterNdx = clustNdx; } } } // replace the cluster closest to the new Cluster with the // oldCluster U newCluster if (closeClusterNdx != -1) { Cluster closeCluster = clusters.get(closeClusterNdx).getCluster(); Dendogram newDendogram = new DendogramNode( maxSimilarity, newClusterDend.getDendogram(), clusters.get(closeClusterNdx).getDendogram()); ClusterDendogram newClustDend = new ClusterDendogram(closeCluster.unionWith(newCluster), newDendogram); clusters.set(closeClusterNdx, newClustDend); } else { clusters.add(newClusterDend); } } // System.out.printf("clusters combined into size %d\n", clusters.size()); // System.out.printf("\n***Finished clustering new day's clusters***\n"); return clusters; }
private List<ClusterDendogram> clusterGroup( List<ClusterDendogram> clusters, Cluster.distType type) { // System.out.printf("***clustering new group***\n"); Point closeClusters = new Point(-1, -1); // double minDist = Double.MAX_VALUE; double maxSimilarity = 0; boolean hasChanged; do { // System.out.printf("entering clustering loop...\n"); hasChanged = false; for (int clustOne = 0; clustOne < clusters.size(); clustOne++) { for (int clustTwo = clustOne + 1; clustTwo < clusters.size(); clustTwo++) { Cluster cluster_A = clusters.get(clustOne).getCluster(); Cluster cluster_B = clusters.get(clustTwo).getCluster(); // System.out.printf("\n\ncluster A:\n\t%s\n\ncluster B:\n\t%s\n\n", cluster_A, // cluster_B); // double clustDist = cluster_A.distance(cluster_B, type); // this will ensure that i'm only comparing based on correlations double clustDist = cluster_A.corrDistance(cluster_B, type); // System.out.printf("clustDist: %.03f\n", clustDist); /* if (clustDist > 1) { System.err.println("cluster group clustDist: " + clustDist + " between " + cluster_A + " and " + cluster_B); } */ // System.out.println("ward's distance: " + clustDist); // if (clustDist < minDist && clustDist > 99.7 ) { // if (clustDist < minDist && clustDist < .03 ) { // if (clustDist < minDist && clustDist > 99.7 ) { this // corresponds to results used in paper // TODO investigate the results for when you use '> minDist' // if (/*clustDist > minDist &&*/ clustDist > 99.7 ) { // System.out.printf("clustDist: %.03f\n", clustDist); // if (clustDist > minDist && clustDist > mThresholding ) { // System.out.printf("is %.03f > %.03f? %s\n\n", clustDist, maxSimilarity, (clustDist > // maxSimilarity)); // System.out.printf("mThreshold = %.03f\n", mThresholding); // if (clustDist > maxSimilarity && clustDist > mThresholding) { if (mClusterPreference.equals("structure")) { if (clustDist > maxSimilarity && !cluster_A.isDifferent(cluster_B)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusters = new Point(clustOne, clustTwo); } } else if (mClusterPreference.equals("similarity")) { if (clustDist > maxSimilarity && cluster_A.isSimilar(cluster_B)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusters = new Point(clustOne, clustTwo); } } } } /* * if newCluster list is a different sized then clearly two clusters were * combined. In this case set hasChanges to true and set the cluster list to * the new cluster list */ List<ClusterDendogram> newClusters = combineClusters(clusters, closeClusters, maxSimilarity, type); if (newClusters.size() != clusters.size()) { hasChanged = true; clusters = newClusters; System.out.println("recalculating cluster distances..."); recalculateClusterDistances(clusters, type); System.out.println("finished recalculating cluster distances..."); } // reset various variables closeClusters = new Point(-1, -1); maxSimilarity = 0; // System.out.printf("finishing clustering iteration...\n"); // continue clustering until clusters do not change } while (hasChanged); // System.out.printf("***Finished clustering group***\n"); return clusters; }