/** Constructs the majorityTree. Used to make Network so remember to call this first! */ public CTree constructMajorityTree() { int numOfNode = 0; CTree tree = new CTree(); // Creates the clusters. tree.clusters = constructClusters(partitions, resPercentage); // Begins by constructing the star tree. for (TreeNode node : tree.clusters.get(0)) { TreeNode root = tree.getRoot(); // Retrieves the root, node.parent = root; // Parent of this node -> root. root.children.add(node); // Adds this node as the children of the root. tree.nodeList.add(node); // Adds this node to the list of nodes. assert tree.nodeList.get(0).name.equals("root"); tree.parentList.put(node.name, 0); // Adds "this node -> root" parent mapping. } // Constructs internal nodes for the rest of the majority bi-partitions and rewires them. for (int z = 1; z < tree.clusters.size(); z++) { Cluster cluster = tree.clusters.get(z); // only take the majority ones....! if (cluster.isMajority == true) { // 1. Retrieves the parent of the first node in this cluster. TreeNode parent = tree.nodeList.get(tree.parentList.get(cluster.get(0).name)); // 2. Constructs a new internal node. String nodeName = "int" + Integer.toString(numOfNode); TreeNode internalNode = new TreeNode(nodeName); internalNode.addProperty("noOfOccurrences", cluster.noOfOccurrences); internalNode.edgeLength = cluster.edgeLength; internalNode.parent = parent; // 3. Insert the new node into the node list. tree.nodeList.add(internalNode); assert tree.nodeList.get(tree.nodeList.size() - 1).name.equals(internalNode.name); tree.parentList.put(nodeName, tree.nodeList.size() - 1); // update the clusters node references... // (use a method of storing edges that referenced the positions in the Tree's nodelists for // reference later.) tree.clusters.get(z).nodeRefA = tree.parentList.get(cluster.get(0).name); tree.clusters.get(z).nodeRefB = (tree.nodeList.size() - 1); for (TreeNode node : cluster) { // 4. Makes this node the child of the new internal node. node.parent = internalNode; assert node.parent.name.equals(tree.nodeList.get(tree.nodeList.size() - 1).name); tree.parentList.put(node.name, tree.nodeList.size() - 1); internalNode.children.add(node); // 5. Delete the moved node(s) from the parent's children. // TODO: optimize? probably not. for (int i = 0; i < parent.children.size(); i++) { if (parent.children.get(i).name.equals(node.name)) { parent.children.remove(i); break; } } } // Wires up the internal node. parent.children.add(internalNode); numOfNode++; } } return tree; }
/** * @param args * @throws InvalidEducationValueException */ public static void main(String[] args) throws InvalidEducationValueException { final ClusterNumberFormat cnf = new ClusterNumberFormat(); final ClusterTestData ctd = new ClusterTestData(); Cluster tempClusterI = null; Cluster tempClusterJ = null; Cluster minClusterA = null; Cluster minClusterB = null; Instance tempClusterIInstance = null; Instance tempClusterJInstance = null; Instance minInstanceA = null; Instance minInstanceB = null; double dist = 0.0; double minDist = Double.POSITIVE_INFINITY; ArrayList<Cluster> clusters = ctd.getClusters(); while (clusters.size() > 1) { for (int i = 0; i < clusters.size(); i++) { for (int j = i + 1; j < clusters.size(); j++) { tempClusterI = clusters.get(i); tempClusterJ = clusters.get(j); System.out.println( "Cluster " + tempClusterI.getName() + " has Instance(s) " + tempClusterI.getInstancesNameSet()); System.out.println( "Cluster " + tempClusterJ.getName() + " has Instance(s) " + tempClusterJ.getInstancesNameSet()); for (int k = 0; k < tempClusterI.size(); k++) { for (int l = 0; l < tempClusterJ.size(); l++) { tempClusterIInstance = tempClusterI.get(k); tempClusterJInstance = tempClusterJ.get(l); dist = ClusterCalculation.distance(tempClusterIInstance, tempClusterJInstance); System.out.println( " DIST(" + tempClusterIInstance.getName() + "," + tempClusterJInstance.getName() + ") = " + cnf.format(dist)); if (dist < minDist) { minDist = dist; minClusterA = tempClusterI; minInstanceA = tempClusterIInstance; minClusterB = tempClusterJ; minInstanceB = tempClusterJInstance; } } } } } System.out.println( "*** Minimum exists between Instance " + minInstanceA.getName() + " of Cluster " + minClusterA.getName() + " and Instance " + minInstanceB.getName() + " of Cluster " + minClusterB.getName() + " with a distance of " + cnf.format(minDist)); System.out.print( " * Merging cluster " + minClusterB.getName() + " into cluster " + minClusterA.getName() + ": "); // minClusterA and minClusterB are the two clusters with the closest member Instance(s). // Merge B into A and remove B from the list of Clusters. minClusterA.merge(minClusterB); clusters.remove(minClusterB); System.out.println( "Cluster " + minClusterA.getName() + " now contains instance(s) " + minClusterA.getInstancesNameSet()); minDist = Double.POSITIVE_INFINITY; } }
private void setupAndStartCluster() throws Exception { // form config files String masters = cluster.getMaster().getPrivateDnsName() + "\n"; Files.write(masters.getBytes(), new File(mastersFile)); List<String> slavesList = new ArrayList<String>(); for (int i = 0; i < cluster.size(); ++i) { Server server = cluster.get(i); if (server.isTaskTracker()) { slavesList.add(server.getPrivateDnsName()); } } String[] slaves = (String[]) slavesList.toArray(new String[0]); Files.write(Util.arrayToString(slaves).getBytes(), new File(slavesFile)); String coreSite = Util.readTextFile("config/" + coreSiteFile); coreSite = coreSite.replaceFirst("localhost", cluster.getMaster().getPrivateDnsName()); Files.write(coreSite.getBytes(), new File(coreSiteFile)); String mapredSite = Util.readTextFile("config/" + mapredSiteFile); mapredSite = mapredSite.replaceFirst("localhost", cluster.getJobTracker().getPrivateDnsName()); Files.write(mapredSite.getBytes(), new File(mapredSiteFile)); String cmd; String[] output; // push config files to the cluster logger.info("Configuring the Hadoop cluster"); ClusterCommand clusterCommand = new ClusterCommand(cluster); clusterCommand.runScpWaitForAll(mastersFile, mastersFile); clusterCommand.runScpWaitForAll(slavesFile, slavesFile); clusterCommand.runScpWaitForAll("config/" + hdfsSiteFile, hdfsSiteFile); clusterCommand.runScpWaitForAll(coreSiteFile, coreSiteFile); clusterCommand.runScpWaitForAll(mapredSiteFile, mapredSiteFile); // copy from home on remote to the config area clusterCommand.runCommandWaitForAll("sudo cp " + mastersFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + slavesFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + hdfsSiteFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + coreSiteFile + " /etc/hadoop/conf/"); clusterCommand.runCommandWaitForAll("sudo cp " + mapredSiteFile + " /etc/hadoop/conf/"); // create /mnt/tmp for everyone to use clusterCommand.runCommandWaitForAll("sudo rm -fr /mnt/tmp"); clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp"); clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp"); // create /mnt/tmp for hadoop tmp dir clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp/hadoop"); clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp/hadoop"); logger.info("Hadoop cluster configured, starting the services"); // shut down all services // clean up dfs on slaves hadoopReady = false; cmd = "for service in /etc/init.d/hadoop-0.20-*; do sudo $service stop; done"; clusterCommand.runCommandWaitForAll(cmd); cmd = "sudo rm -fr /var/lib/hadoop-0.20/cache/*"; clusterCommand.runCommandWaitForAll(cmd); SSHAgent sshAgent = new SSHAgent(); sshAgent.setUser(ParameterProcessing.CLUSTER_USER_NAME); sshAgent.setKey(ParameterProcessing.PEM_CERTIFICATE_NAME); sshAgent.setHost(cluster.getMaster().getDnsName()); cmd = "sudo -u hdfs hadoop namenode -format"; sshAgent.executeCommand(cmd); cmd = "sudo service hadoop-0.20-namenode start"; output = sshAgent.executeCommand(cmd); logger.info(Util.arrayToString(output)); // start all hdfs slaves clusterCommand = new ClusterCommand(cluster.getDataNodes()); cmd = "sudo service hadoop-0.20-datanode start"; clusterCommand.runCommandWaitForAll(cmd); // start all tasktrackers clusterCommand = new ClusterCommand(cluster.getTaskTrackers()); cmd = "sudo service hadoop-0.20-tasktracker start"; clusterCommand.runCommandWaitForAll(cmd); sshAgent.setHost(cluster.getJobTracker().getDnsName()); cmd = "sudo service hadoop-0.20-jobtracker start"; output = sshAgent.executeCommand(cmd); logger.info(Util.arrayToString(output)); logger.info("Cluster configuration and startup is complete"); cmd = "sudo rm /usr/lib/hadoop/lib/jets3t*.jar"; clusterCommand = new ClusterCommand(cluster); clusterCommand.runCommandWaitForAll(cmd); // install a fresh version of FreeEed installFreeEed(); // run a distributed grep app verifyOperation(); if (callingUI != null) { callingUI.refreshStatus(); } }