private List<ClusterDendogram> clusterIsolateList( IsolateSimilarityMatrix similarityMatrix, List<Isolate> isolates, Cluster.distType type) { // represent fecal samples List<ClusterDendogram> clusterB = new ArrayList<ClusterDendogram>(); // represent fecal samples List<ClusterDendogram> clusterF = new ArrayList<ClusterDendogram>(); // represent immediate (after) samples List<ClusterDendogram> clusterI = new ArrayList<ClusterDendogram>(); // represent later samples List<ClusterDendogram> clusterL = new ArrayList<ClusterDendogram>(); List<ClusterDendogram> clusterD = new ArrayList<ClusterDendogram>(); // clusters resulting from clustering the above clusters will be placed in // clusters and this will prevent me from having to refactor the rest of this // method. List<ClusterDendogram> clusters = new ArrayList<ClusterDendogram>(); for (Isolate sample : isolates) { Cluster newCluster = new Cluster(similarityMatrix, sample); Dendogram newDendogram = new DendogramLeaf(sample); switch (sample.getSampleMethod()) { case FECAL: clusterF.add(new ClusterDendogram(newCluster, newDendogram)); break; case IMM: clusterI.add(new ClusterDendogram(newCluster, newDendogram)); break; case LATER: clusterL.add(new ClusterDendogram(newCluster, newDendogram)); break; case DEEP: clusterD.add(new ClusterDendogram(newCluster, newDendogram)); break; case BEFORE: clusterB.add(new ClusterDendogram(newCluster, newDendogram)); break; default: System.err.println("serious error here"); break; } } // System.out.printf("clusterList size: %d\n", clusters.size()); // cluster within each group clusterF = clusterGroup(clusterF, type); clusterI = clusterGroup(clusterI, type); clusterL = clusterGroup(clusterL, type); clusterD = clusterGroup(clusterD, type); clusterB = clusterGroup(clusterB, type); // cluster each group together: // F and I together first since they are the closest in time // F_I and L next since they are the next closest in time // was going to use "clusterAcrossGroup" but there seemed to be a lot of // logical traps such as where to put clusters that are combined and all of // the problems that followed from that clusters.addAll(clusterF); clusters.addAll(clusterD); clusters = clusterGroup(clusters, type); clusters.addAll(clusterI); clusters = clusterGroup(clusters, type); clusters.addAll(clusterL); clusters = clusterGroup(clusters, type); clusters.addAll(clusterB); clusters = clusterGroup(clusters, type); // clusters within all the day's clusters // based on the above clusterGroup call this would likely be repetitive // clusters = clusterGroup(clusters, type); return clusters; }
private List<ClusterDendogram> clusterIsolates( Cluster.distType type, Map<Connectivity, IsolateSimilarityMatrix> isolateNetworks) { /* IsolateRegion region = settings.getRegion(); double distanceThreshold = settings.getDistanceThreshold(); double lowerThreshold = settings.getLowerThreshold(); double upperThreshold = settings.getUpperThreshold(); Cluster.distType type = settings.getDistanceType(); */ // mappings represent days to isolates Map<String, Map<Integer, List<Isolate>>> technicianIsolateMap = null; // list of all constructed clusters List<ClusterDendogram> clusters = new ArrayList<ClusterDendogram>(); List<ClusterDendogram> technicianClusters = new ArrayList<ClusterDendogram>(); /* * Marker */ // MARKER new code. get the isolateMap from the similarity matrix now // also, multiple similarity matrices are stored in the isolate networks // map so that it is possible to iterate on correlations based on their // strength instead of just going all willy nilly IsolateSimilarityMatrix similarityMatrix = isolateNetworks.get(Connectivity.STRONG); technicianIsolateMap = similarityMatrix.getIsolateMap(); for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); // Cluster the list of isolates in this day List<ClusterDendogram> currClusters = clusterIsolateList(similarityMatrix, isolateMap.get(sampleDay), type); IsolateOutputWriter.outputClustersByDay(similarityMatrix, sampleDay, currClusters); /* System.err.println("currClusters length: " + currClusters.size()); /* for (ClusterDendogram clustDend : currClusters) { System.out.println(clustDend.getDendogram().getXML()); } */ // System.err.printf("on day %d there are a total of %d clusters", sampleDay, // clusters.size()); // Cluster all previous days with this day clusters = clusterToDate(clusters, currClusters, type); } technicianClusters.addAll(clusters); clusters = new ArrayList<ClusterDendogram>(); } clusters = clusterGroup(technicianClusters, type); System.out.printf( "\n\n================\nFINISHED STAGE 1. CLUSTERING SQUISHIES\n" + "===================\n\n"); similarityMatrix = isolateNetworks.get(Connectivity.WEAK); technicianIsolateMap = similarityMatrix.getIsolateMap(); /* * this is so that when doing the second pass through all of the clusters * squishy correlations will also be known */ for (ClusterDendogram clust : clusters) { clust.getCluster().setSimilarityMatrix(similarityMatrix); } for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); for (Isolate isolate : isolateMap.get(sampleDay)) { // clusters = clusterWeakIsolates(similarityMatrix, clusters, isolate, type); if (!isolate.hasBeenClustered()) { Cluster newCluster = new Cluster(similarityMatrix, isolate); Dendogram newDendogram = new DendogramLeaf(isolate); clusters.add(new ClusterDendogram(newCluster, newDendogram)); } } } } clusters = clusterGroup(clusters, type); System.out.printf("\n\n==================\nFINISHED STAGE 2.\n======================\n\n"); return clusters; }