public void writeMap(Cluster cluster, float yBar) { Set<Submission> subSet = new HashSet<Submission>(cluster.size()); String documents = ""; for (int i = 0; i < cluster.size(); i++) { Submission sub = submissions.elementAt(cluster.getSubmissionAt(i)); documents += sub.name + " "; subSet.add(sub); } documents = documents.trim(); String theme = ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program); mapString += "<area shape=\"rect\" coords=\"" + (cluster.x - 2) + "," + (yBar) + "," + (cluster.x + 2) + "," + (cluster.y + 2) + "\" onMouseover=\"set('" + cluster.size() + "','" + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6) + "','" + trimStringToLength(documents, 50) + "','" + theme + "')\" "; // if (cluster.size() == 1) // mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n"; // else mapString += "nohref>\n"; }
public void drawCluster(Cluster cluster) { int index = clusters.indexOf(cluster); if (index != -1) { cluster.y = maxY; cluster.x = minX + index * factor; if (cluster.size() > 1) g.setColor(Color.RED); else g.setColor(Color.BLACK); g.drawRect(cluster.x - 1, cluster.y - cluster.size(), 2, 1 + cluster.size()); } else { Cluster left = cluster.getLeft(); Cluster right = cluster.getRight(); drawCluster(left); drawCluster(right); int yBar = minY + (int) ((maxY - minY) * (cluster.getSimilarity() / threshold)); g.setColor(Color.DARK_GRAY); if (left.y > yBar) { g.drawLine(left.x, left.y - 1, left.x, yBar); writeMap(left, yBar); } if (right.y > yBar) { g.drawLine(right.x, right.y - 1, right.x, yBar); writeMap(right, yBar); } g.setColor(Color.BLACK); g.drawLine(left.x, yBar, right.x, yBar); cluster.x = (right.x + left.x) / 2; cluster.y = yBar; } }
@Test public void testBacked() { Cluster<Integer, Integer> c = new BackedClusterImpl<Integer, Integer>(graph); Graph<Integer, Integer> inducedGraph = c.getInducedGraph(); assertEquals(0, inducedGraph.getEdgeCount()); assertEquals(0, c.size()); c.add(0); c.add(1); assertEquals(2, c.size()); assertEquals(1, inducedGraph.getEdgeCount()); }
/** Print it! */ public String printClusters(Cluster clustering, float threshold, HTMLFile f) { int maxSize = 0; ArrayList<Cluster> clusters = getClusters(clustering, threshold); for (Iterator<Cluster> i = clusters.iterator(); i.hasNext(); ) { Cluster cluster = i.next(); if (cluster.size() > maxSize) maxSize = cluster.size(); } TreeSet<Cluster> sorted = new TreeSet<Cluster>(clusters); clusters = null; // Now print them: return outputClustering(f, sorted, maxSize); }
private void setupAndStartImpl() throws Exception { if (cluster.size() == 0) { return; } cluster.assignRoles(); setupAndStartCluster(); }
private void checkHealthImpl() throws Exception { if (cluster.size() == 0) { return; } cluster.assignRoles(); verifyOperation(); }
private ArrayList<Cluster> getClusters(Cluster clustering, float threshold) { ArrayList<Cluster> clusters = new ArrayList<Cluster>(); // First determine the clusters Stack<Cluster> stack = new Stack<Cluster>(); stack.push(clustering); while (!stack.empty()) { Cluster current = stack.pop(); if (current.size() == 1) { clusters.add(current); // singleton clusters } else { if (current.getSimilarity() >= threshold) { clusters.add(current); } else { // current.size() != 1 !!! stack.push(current.getLeft()); stack.push(current.getRight()); } } } return clusters; }
/** * @param args * @throws InvalidEducationValueException */ public static void main(String[] args) throws InvalidEducationValueException { final ClusterNumberFormat cnf = new ClusterNumberFormat(); final ClusterTestData ctd = new ClusterTestData(); Cluster tempClusterI = null; Cluster tempClusterJ = null; Cluster minClusterA = null; Cluster minClusterB = null; Instance tempClusterIInstance = null; Instance tempClusterJInstance = null; Instance minInstanceA = null; Instance minInstanceB = null; double dist = 0.0; double minDist = Double.POSITIVE_INFINITY; ArrayList<Cluster> clusters = ctd.getClusters(); while (clusters.size() > 1) { for (int i = 0; i < clusters.size(); i++) { for (int j = i + 1; j < clusters.size(); j++) { tempClusterI = clusters.get(i); tempClusterJ = clusters.get(j); System.out.println( "Cluster " + tempClusterI.getName() + " has Instance(s) " + tempClusterI.getInstancesNameSet()); System.out.println( "Cluster " + tempClusterJ.getName() + " has Instance(s) " + tempClusterJ.getInstancesNameSet()); for (int k = 0; k < tempClusterI.size(); k++) { for (int l = 0; l < tempClusterJ.size(); l++) { tempClusterIInstance = tempClusterI.get(k); tempClusterJInstance = tempClusterJ.get(l); dist = ClusterCalculation.distance(tempClusterIInstance, tempClusterJInstance); System.out.println( " DIST(" + tempClusterIInstance.getName() + "," + tempClusterJInstance.getName() + ") = " + cnf.format(dist)); if (dist < minDist) { minDist = dist; minClusterA = tempClusterI; minInstanceA = tempClusterIInstance; minClusterB = tempClusterJ; minInstanceB = tempClusterJInstance; } } } } } System.out.println( "*** Minimum exists between Instance " + minInstanceA.getName() + " of Cluster " + minClusterA.getName() + " and Instance " + minInstanceB.getName() + " of Cluster " + minClusterB.getName() + " with a distance of " + cnf.format(minDist)); System.out.print( " * Merging cluster " + minClusterB.getName() + " into cluster " + minClusterA.getName() + ": "); // minClusterA and minClusterB are the two clusters with the closest member Instance(s). // Merge B into A and remove B from the list of Clusters. minClusterA.merge(minClusterB); clusters.remove(minClusterB); System.out.println( "Cluster " + minClusterA.getName() + " now contains instance(s) " + minClusterA.getInstancesNameSet()); minDist = Double.POSITIVE_INFINITY; } }
/** This method returns the distribution HTML code as a string */ private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) { int[] distribution = new int[maxSize + 1]; int max = 0; for (int i = 0; i <= maxSize; i++) distribution[i] = 0; // Now output the clustering: f.println("<TABLE CELLPADDING=2 CELLSPACING=2>"); f.println( "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_number") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Threshold") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_members") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Most_frequent_words") + "</TR>"); Iterator<Cluster> clusterI = allClusters.iterator(); for (int i = 1; clusterI.hasNext(); i++) { Cluster cluster = clusterI.next(); if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()]; // no singleton clusters if (cluster.size() == 1) continue; f.print( "<TR><TD ALIGN=center BGCOLOR=#8080ff>" + i + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.size() + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.getSimilarity() + "<TD ALIGN=left BGCOLOR=#c0c0ff>"); // sort names TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>(); for (int x = 0; x < cluster.size(); x++) { sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x))); } for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) { Submission sub = iter.next(); int index = submissions.indexOf(sub); f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>"); if (iter.hasNext()) f.print(", "); neededSubmissions.add(sub); // write files for these. } if (this.program.get_language() instanceof jplag.text.Language) { f.println( "<TD ALIGN=left BGCOLOR=#c0c0ff>" + ThemeGenerator.generateThemes( sortedSubmissions, this.program.get_themewords(), true, this.program)); } else { f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-"); } f.println("</TR>"); } f.println("</TABLE>\n<P>\n"); f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>"); String text; text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n"; text += "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Number_of_clusters") + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n"; for (int i = 0; i <= maxSize; i++) { if (distribution[i] == 0) continue; text += "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>" + i + "<TD ALIGN=right BGCOLOR=#c0c0ff>" + distribution[i] + "<TD BGCOLOR=#c0c0ff>\n"; for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#"); if (distribution[i] * barLength / max == 0) { if (distribution[i] == 0) text += ("."); else text += ("#"); } text += ("</TR>\n"); } text += ("</TABLE>\n"); f.print(text); return text; }
private void setupAndStartCluster() throws Exception { // form config files String masters = cluster.getMaster().getPrivateDnsName() + "\n"; Files.write(masters.getBytes(), new File(mastersFile)); List<String> slavesList = new ArrayList<String>(); for (int i = 0; i < cluster.size(); ++i) { Server server = cluster.get(i); if (server.isTaskTracker()) { slavesList.add(server.getPrivateDnsName()); } } String[] slaves = (String[]) slavesList.toArray(new String[0]); Files.write(Util.arrayToString(slaves).getBytes(), new File(slavesFile)); String coreSite = Util.readTextFile("config/" + coreSiteFile); coreSite = coreSite.replaceFirst("localhost", cluster.getMaster().getPrivateDnsName()); Files.write(coreSite.getBytes(), new File(coreSiteFile)); String mapredSite = Util.readTextFile("config/" + mapredSiteFile); mapredSite = mapredSite.replaceFirst("localhost", cluster.getJobTracker().getPrivateDnsName()); Files.write(mapredSite.getBytes(), new File(mapredSiteFile)); String cmd; String[] output; // push config files to the cluster logger.info("Configuring the Hadoop cluster"); ClusterCommand clusterCommand = new ClusterCommand(cluster); clusterCommand.runScpWaitForAll(mastersFile, mastersFile); clusterCommand.runScpWaitForAll(slavesFile, slavesFile); clusterCommand.runScpWaitForAll("config/" + hdfsSiteFile, hdfsSiteFile); clusterCommand.runScpWaitForAll(coreSiteFile, coreSiteFile); clusterCommand.runScpWaitForAll(mapredSiteFile, mapredSiteFile); // copy from home on remote to the config area clusterCommand.runCommandWaitForAll("sudo cp " + mastersFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + slavesFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + hdfsSiteFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + coreSiteFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + mapredSiteFile + " /etc/hadoop/conf/"); // create /mnt/tmp for everyone to use clusterCommand.runCommandWaitForAll("sudo rm -fr /mnt/tmp"); clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp"); clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp"); // create /mnt/tmp for hadoop tmp dir clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp/hadoop"); clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp/hadoop"); logger.info("Hadoop cluster configured, starting the services"); // shut down all services // clean up dfs on slaves hadoopReady = false; cmd = "for service in /etc/init.d/hadoop-0.20-*; do sudo $service stop; done"; clusterCommand.runCommandWaitForAll(cmd); cmd = "sudo rm -fr /var/lib/hadoop-0.20/cache/*"; clusterCommand.runCommandWaitForAll(cmd); SSHAgent sshAgent = new SSHAgent(); sshAgent.setUser(ParameterProcessing.CLUSTER_USER_NAME); sshAgent.setKey(ParameterProcessing.PEM_CERTIFICATE_NAME); sshAgent.setHost(cluster.getMaster().getDnsName()); cmd = "sudo -u hdfs hadoop namenode -format"; sshAgent.executeCommand(cmd); cmd = "sudo service hadoop-0.20-namenode start"; output = sshAgent.executeCommand(cmd); logger.info(Util.arrayToString(output)); // start all hdfs slaves clusterCommand = new ClusterCommand(cluster.getDataNodes()); cmd = "sudo service hadoop-0.20-datanode start"; clusterCommand.runCommandWaitForAll(cmd); // start all tasktrackers clusterCommand = new ClusterCommand(cluster.getTaskTrackers()); cmd = "sudo service hadoop-0.20-tasktracker start"; clusterCommand.runCommandWaitForAll(cmd); sshAgent.setHost(cluster.getJobTracker().getDnsName()); cmd = "sudo service hadoop-0.20-jobtracker start"; output = sshAgent.executeCommand(cmd); logger.info(Util.arrayToString(output)); logger.info("Cluster configuration and startup is complete"); cmd = "sudo rm /usr/lib/hadoop/lib/jets3t*.jar"; clusterCommand = new ClusterCommand(cluster); clusterCommand.runCommandWaitForAll(cmd); // install a fresh version of FreeEed installFreeEed(); // run a distributed grep app verifyOperation(); if (callingUI != null) { callingUI.refreshStatus(); } }