public boolean cluster(String[] args) { boolean success = false; // handle command line arguments; sets dataFile and threshold success = parseArgs(args); Map<String, Map<Integer, List<Isolate>>> dataMap = new HashMap<String, Map<Integer, List<Isolate>>>(); /* * Each isolate similarity matrix holds a correlation for both regions */ IsolateSimilarityMatrix partialCorrelations = new IsolateSimilarityMatrix(); Map<Connectivity, IsolateSimilarityMatrix> isolateNetworks = new HashMap<Connectivity, IsolateSimilarityMatrix>(); isolateNetworks.put(Connectivity.STRONG, new IsolateSimilarityMatrix()); isolateNetworks.put(Connectivity.WEAK, new IsolateSimilarityMatrix()); Cluster.distType type = null; String dataFileName = null; double lowerThreshold = -1, upperThreshold = -1; for (File dataFile : dataFileMap.keySet()) { FileSettings settings = dataFileMap.get(dataFile); dataFileName = dataFile.getName(); IsolateRegion region = settings.getRegion(); type = settings.getDistanceType(); lowerThreshold = settings.getLowerThreshold(); upperThreshold = settings.getUpperThreshold(); if (dataFile != null) { IsolateFileParser parser = new IsolateFileParser(dataFile, settings); // MARKER old code // isolateMap = parser.extractData(similarityMatrix); // mIsolateNetworks = parser.extractData(); parser.extractData(isolateNetworks, partialCorrelations); } System.out.println("strong Network size: " + isolateNetworks.get(Connectivity.STRONG).size()); } // each point is a cluster, and we will combine two clusters in each iteration List<ClusterDendogram> clustDends = clusterIsolates(type, isolateNetworks); // System.err.println("clustDends length: " + clustDends.size()); // if the isolates yielded NO clusters (wtf data disappear?) or // if clusterIsolates returned null, then this was *NOT* a success success = clustDends != null && !clustDends.isEmpty(); String outputFileDir = String.format("ClusterResults/%s_%.02f_%.02f", type, lowerThreshold, upperThreshold); /* String outputFileName = String.format("%s/%s", outputFileDir, dataFile.getName().substring(0, dataFile.getName().indexOf(".csv"))); */ String origFileName = HClustering.mOutputFileName.equals("") ? dataFileName.substring(0, dataFileName.indexOf(".csv")) : HClustering.mOutputFileName; System.out.println( "Writing to file " + origFileName + " even though outputFilename should be " + HClustering.mOutputFileName); String outputFileName = String.format("%s/%s", outputFileDir, origFileName); IsolateOutputWriter.outputClusters(clustDends, outputFileDir, outputFileName + ".xml"); IsolateOutputWriter.outputCytoscapeFormat(clustDends, outputFileName); IsolateOutputWriter.outputTemporalClusters(clustDends, outputFileName); IsolateOutputWriter.outputTemporalCharts(clustDends, outputFileName); return success; }
private List<ClusterDendogram> clusterIsolates( Cluster.distType type, Map<Connectivity, IsolateSimilarityMatrix> isolateNetworks) { /* IsolateRegion region = settings.getRegion(); double distanceThreshold = settings.getDistanceThreshold(); double lowerThreshold = settings.getLowerThreshold(); double upperThreshold = settings.getUpperThreshold(); Cluster.distType type = settings.getDistanceType(); */ // mappings represent days to isolates Map<String, Map<Integer, List<Isolate>>> technicianIsolateMap = null; // list of all constructed clusters List<ClusterDendogram> clusters = new ArrayList<ClusterDendogram>(); List<ClusterDendogram> technicianClusters = new ArrayList<ClusterDendogram>(); /* * Marker */ // MARKER new code. get the isolateMap from the similarity matrix now // also, multiple similarity matrices are stored in the isolate networks // map so that it is possible to iterate on correlations based on their // strength instead of just going all willy nilly IsolateSimilarityMatrix similarityMatrix = isolateNetworks.get(Connectivity.STRONG); technicianIsolateMap = similarityMatrix.getIsolateMap(); for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); // Cluster the list of isolates in this day List<ClusterDendogram> currClusters = clusterIsolateList(similarityMatrix, isolateMap.get(sampleDay), type); IsolateOutputWriter.outputClustersByDay(similarityMatrix, sampleDay, currClusters); /* System.err.println("currClusters length: " + currClusters.size()); /* for (ClusterDendogram clustDend : currClusters) { System.out.println(clustDend.getDendogram().getXML()); } */ // System.err.printf("on day %d there are a total of %d clusters", sampleDay, // clusters.size()); // Cluster all previous days with this day clusters = clusterToDate(clusters, currClusters, type); } technicianClusters.addAll(clusters); clusters = new ArrayList<ClusterDendogram>(); } clusters = clusterGroup(technicianClusters, type); System.out.printf( "\n\n================\nFINISHED STAGE 1. CLUSTERING SQUISHIES\n" + "===================\n\n"); similarityMatrix = isolateNetworks.get(Connectivity.WEAK); technicianIsolateMap = similarityMatrix.getIsolateMap(); /* * this is so that when doing the second pass through all of the clusters * squishy correlations will also be known */ for (ClusterDendogram clust : clusters) { clust.getCluster().setSimilarityMatrix(similarityMatrix); } for (String technician : technicianIsolateMap.keySet()) { System.out.printf("clustering technician %s's dataset...\n", technician); Map<Integer, List<Isolate>> isolateMap = technicianIsolateMap.get(technician); for (int sampleDay : isolateMap.keySet()) { System.out.printf("clustering day %d...\n", sampleDay); for (Isolate isolate : isolateMap.get(sampleDay)) { // clusters = clusterWeakIsolates(similarityMatrix, clusters, isolate, type); if (!isolate.hasBeenClustered()) { Cluster newCluster = new Cluster(similarityMatrix, isolate); Dendogram newDendogram = new DendogramLeaf(isolate); clusters.add(new ClusterDendogram(newCluster, newDendogram)); } } } } clusters = clusterGroup(clusters, type); System.out.printf("\n\n==================\nFINISHED STAGE 2.\n======================\n\n"); return clusters; }