// TODO check clusters vs dailyClusters, there's a problem in this method private List<ClusterDendogram> clusterToDate( List<ClusterDendogram> clusters, List<ClusterDendogram> dailyClusters, Cluster.distType type) { // System.out.printf("Clustering clusters (%d) with new day's clusters (%d)\n", clusters.size(), // dailyClusters.size()); // outer for loop loops over clusters in a day // inner for loop loops over clusters built up to the current day /* * clustering between days uses just correlations */ // System.out.printf("\n***clustering to date***\n"); for (ClusterDendogram newClusterDend : dailyClusters) { Cluster newCluster = newClusterDend.getCluster(); // double minDist = Double.MAX_VALUE; double maxSimilarity = 0; int closeClusterNdx = -1; for (int clustNdx = 0; clustNdx < clusters.size(); clustNdx++) { Cluster currClust = clusters.get(clustNdx).getCluster(); double clustDist = newCluster.corrDistance(currClust, type); /* System.out.printf("\n\nnewCluster: \n\t%s\n\ncurrClust:\n\t%s\n\n", newCluster, currClust); System.out.printf("clustDist: %.03f\n", clustDist); */ // if (clustDist < minDist && clustDist < mLowerThreshold) { // System.out.println("cluster to date ward's distance: " + clustDist); // if (clustDist < minDist && clustDist >= mUpperThreshold) { /* System.out.printf("is %.03f > %.03f? %s\n\n", clustDist, maxSimilarity, (clustDist > maxSimilarity)); System.out.printf("mThreshold = %.03f\n", mThresholding); */ // if (clustDist > maxSimilarity && clustDist > mThresholding) { if (mClusterPreference.equals("structure")) { if (clustDist > maxSimilarity && !newCluster.isDifferent(currClust)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusterNdx = clustNdx; } } else if (mClusterPreference.equals("similarity")) { if (clustDist > maxSimilarity && newCluster.isSimilar(currClust)) { maxSimilarity = clustDist; // System.out.printf("maxSimilarity: %.03f\n", maxSimilarity); closeClusterNdx = clustNdx; } } } // replace the cluster closest to the new Cluster with the // oldCluster U newCluster if (closeClusterNdx != -1) { Cluster closeCluster = clusters.get(closeClusterNdx).getCluster(); Dendogram newDendogram = new DendogramNode( maxSimilarity, newClusterDend.getDendogram(), clusters.get(closeClusterNdx).getDendogram()); ClusterDendogram newClustDend = new ClusterDendogram(closeCluster.unionWith(newCluster), newDendogram); clusters.set(closeClusterNdx, newClustDend); } else { clusters.add(newClusterDend); } } // System.out.printf("clusters combined into size %d\n", clusters.size()); // System.out.printf("\n***Finished clustering new day's clusters***\n"); return clusters; }
private List<ClusterDendogram> clusterIsolates( Cluster.distType type, Map<Connectivity, IsolateSimilarityMatrix> isolateNetworks) { /* IsolateRegion region = settings.getRegion(); double distanceThreshold = settings.getDistanceThreshold(); double lowerThreshold = settings.getLowerThreshold(); double upperThreshold = settings.getUpperThreshold(); Cluster.distType type = settings.getDistanceType(); */ // mappings represent days to isolates Map<String, Map<Integer, List<Isolate>>> technicianIsolateMap = null; // list of all constructed clusters List<ClusterDendogram> clusters = new ArrayList<ClusterDendogram>(); List<ClusterDendogram> technicianClusters = new ArrayList<ClusterDendogram>(); /* * Marker */ // MARKER new code. get the isolateMap from the similarity matrix now // also, multiple similarity matrices are stored in the isolate networks // map so that it is possible to iterate on correlations based on their // strength instead of just going all willy nilly IsolateSimilarityMatrix similarityMatrix = isolateNetworks.get(Connectivity.STRONG); technicianIsolateMap = similarityMatrix.getIsolateMap(); for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); // Cluster the list of isolates in this day List<ClusterDendogram> currClusters = clusterIsolateList(similarityMatrix, isolateMap.get(sampleDay), type); IsolateOutputWriter.outputClustersByDay(similarityMatrix, sampleDay, currClusters); /* System.err.println("currClusters length: " + currClusters.size()); /* for (ClusterDendogram clustDend : currClusters) { System.out.println(clustDend.getDendogram().getXML()); } */ // System.err.printf("on day %d there are a total of %d clusters", sampleDay, // clusters.size()); // Cluster all previous days with this day clusters = clusterToDate(clusters, currClusters, type); } technicianClusters.addAll(clusters); clusters = new ArrayList<ClusterDendogram>(); } clusters = clusterGroup(technicianClusters, type); System.out.printf( "\n\n================\nFINISHED STAGE 1. CLUSTERING SQUISHIES\n" + "===================\n\n"); similarityMatrix = isolateNetworks.get(Connectivity.WEAK); technicianIsolateMap = similarityMatrix.getIsolateMap(); /* * this is so that when doing the second pass through all of the clusters * squishy correlations will also be known */ for (ClusterDendogram clust : clusters) { clust.getCluster().setSimilarityMatrix(similarityMatrix); } for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); for (Isolate isolate : isolateMap.get(sampleDay)) { // clusters = clusterWeakIsolates(similarityMatrix, clusters, isolate, type); if (!isolate.hasBeenClustered()) { Cluster newCluster = new Cluster(similarityMatrix, isolate); Dendogram newDendogram = new DendogramLeaf(isolate); clusters.add(new ClusterDendogram(newCluster, newDendogram)); } } } } clusters = clusterGroup(clusters, type); System.out.printf("\n\n==================\nFINISHED STAGE 2.\n======================\n\n"); return clusters; }