Пример #1
0
 public void writeMap(Cluster cluster, float yBar) {
   Set<Submission> subSet = new HashSet<Submission>(cluster.size());
   String documents = "";
   for (int i = 0; i < cluster.size(); i++) {
     Submission sub = submissions.elementAt(cluster.getSubmissionAt(i));
     documents += sub.name + " ";
     subSet.add(sub);
   }
   documents = documents.trim();
   String theme =
       ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program);
   mapString +=
       "<area shape=\"rect\" coords=\""
           + (cluster.x - 2)
           + ","
           + (yBar)
           + ","
           + (cluster.x + 2)
           + ","
           + (cluster.y + 2)
           + "\" onMouseover=\"set('"
           + cluster.size()
           + "','"
           + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6)
           + "','"
           + trimStringToLength(documents, 50)
           + "','"
           + theme
           + "')\" ";
   //		if (cluster.size() == 1)
   //			mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n";
   //		else
   mapString += "nohref>\n";
 }
Пример #2
0
 public void drawCluster(Cluster cluster) {
   int index = clusters.indexOf(cluster);
   if (index != -1) {
     cluster.y = maxY;
     cluster.x = minX + index * factor;
     if (cluster.size() > 1) g.setColor(Color.RED);
     else g.setColor(Color.BLACK);
     g.drawRect(cluster.x - 1, cluster.y - cluster.size(), 2, 1 + cluster.size());
   } else {
     Cluster left = cluster.getLeft();
     Cluster right = cluster.getRight();
     drawCluster(left);
     drawCluster(right);
     int yBar = minY + (int) ((maxY - minY) * (cluster.getSimilarity() / threshold));
     g.setColor(Color.DARK_GRAY);
     if (left.y > yBar) {
       g.drawLine(left.x, left.y - 1, left.x, yBar);
       writeMap(left, yBar);
     }
     if (right.y > yBar) {
       g.drawLine(right.x, right.y - 1, right.x, yBar);
       writeMap(right, yBar);
     }
     g.setColor(Color.BLACK);
     g.drawLine(left.x, yBar, right.x, yBar);
     cluster.x = (right.x + left.x) / 2;
     cluster.y = yBar;
   }
 }
 @Test
 public void testBacked() {
   Cluster<Integer, Integer> c = new BackedClusterImpl<Integer, Integer>(graph);
   Graph<Integer, Integer> inducedGraph = c.getInducedGraph();
   assertEquals(0, inducedGraph.getEdgeCount());
   assertEquals(0, c.size());
   c.add(0);
   c.add(1);
   assertEquals(2, c.size());
   assertEquals(1, inducedGraph.getEdgeCount());
 }
Пример #4
0
  /** Print it! */
  public String printClusters(Cluster clustering, float threshold, HTMLFile f) {
    int maxSize = 0;

    ArrayList<Cluster> clusters = getClusters(clustering, threshold);

    for (Iterator<Cluster> i = clusters.iterator(); i.hasNext(); ) {
      Cluster cluster = i.next();
      if (cluster.size() > maxSize) maxSize = cluster.size();
    }

    TreeSet<Cluster> sorted = new TreeSet<Cluster>(clusters);
    clusters = null;

    // Now print them:
    return outputClustering(f, sorted, maxSize);
  }
Пример #5
0
 private void setupAndStartImpl() throws Exception {
   if (cluster.size() == 0) {
     return;
   }
   cluster.assignRoles();
   setupAndStartCluster();
 }
Пример #6
0
 private void checkHealthImpl() throws Exception {
   if (cluster.size() == 0) {
     return;
   }
   cluster.assignRoles();
   verifyOperation();
 }
Пример #7
0
  private ArrayList<Cluster> getClusters(Cluster clustering, float threshold) {
    ArrayList<Cluster> clusters = new ArrayList<Cluster>();

    // First determine the clusters
    Stack<Cluster> stack = new Stack<Cluster>();
    stack.push(clustering);
    while (!stack.empty()) {
      Cluster current = stack.pop();

      if (current.size() == 1) {
        clusters.add(current); // singleton clusters
      } else {
        if (current.getSimilarity() >= threshold) {
          clusters.add(current);
        } else {
          // current.size() != 1   !!!
          stack.push(current.getLeft());
          stack.push(current.getRight());
        }
      }
    }
    return clusters;
  }
  /**
   * @param args
   * @throws InvalidEducationValueException
   */
  public static void main(String[] args) throws InvalidEducationValueException {
    final ClusterNumberFormat cnf = new ClusterNumberFormat();
    final ClusterTestData ctd = new ClusterTestData();

    Cluster tempClusterI = null;
    Cluster tempClusterJ = null;
    Cluster minClusterA = null;
    Cluster minClusterB = null;
    Instance tempClusterIInstance = null;
    Instance tempClusterJInstance = null;
    Instance minInstanceA = null;
    Instance minInstanceB = null;

    double dist = 0.0;
    double minDist = Double.POSITIVE_INFINITY;

    ArrayList<Cluster> clusters = ctd.getClusters();

    while (clusters.size() > 1) {
      for (int i = 0; i < clusters.size(); i++) {
        for (int j = i + 1; j < clusters.size(); j++) {
          tempClusterI = clusters.get(i);
          tempClusterJ = clusters.get(j);

          System.out.println(
              "Cluster "
                  + tempClusterI.getName()
                  + " has Instance(s) "
                  + tempClusterI.getInstancesNameSet());
          System.out.println(
              "Cluster "
                  + tempClusterJ.getName()
                  + " has Instance(s) "
                  + tempClusterJ.getInstancesNameSet());

          for (int k = 0; k < tempClusterI.size(); k++) {
            for (int l = 0; l < tempClusterJ.size(); l++) {
              tempClusterIInstance = tempClusterI.get(k);
              tempClusterJInstance = tempClusterJ.get(l);

              dist = ClusterCalculation.distance(tempClusterIInstance, tempClusterJInstance);

              System.out.println(
                  "   DIST("
                      + tempClusterIInstance.getName()
                      + ","
                      + tempClusterJInstance.getName()
                      + ") = "
                      + cnf.format(dist));

              if (dist < minDist) {
                minDist = dist;
                minClusterA = tempClusterI;
                minInstanceA = tempClusterIInstance;
                minClusterB = tempClusterJ;
                minInstanceB = tempClusterJInstance;
              }
            }
          }
        }
      }

      System.out.println(
          "*** Minimum exists between Instance "
              + minInstanceA.getName()
              + " of Cluster "
              + minClusterA.getName()
              + " and Instance "
              + minInstanceB.getName()
              + " of Cluster "
              + minClusterB.getName()
              + " with a distance of "
              + cnf.format(minDist));
      System.out.print(
          "  * Merging cluster "
              + minClusterB.getName()
              + " into cluster "
              + minClusterA.getName()
              + ": ");

      // minClusterA and minClusterB are the two clusters with the closest member Instance(s).
      // Merge B into A and remove B from the list of Clusters.
      minClusterA.merge(minClusterB);
      clusters.remove(minClusterB);

      System.out.println(
          "Cluster "
              + minClusterA.getName()
              + " now contains instance(s) "
              + minClusterA.getInstancesNameSet());

      minDist = Double.POSITIVE_INFINITY;
    }
  }
Пример #9
0
  /** This method returns the distribution HTML code as a string */
  private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) {
    int[] distribution = new int[maxSize + 1];
    int max = 0;
    for (int i = 0; i <= maxSize; i++) distribution[i] = 0;

    // Now output the clustering:
    f.println("<TABLE CELLPADDING=2 CELLSPACING=2>");

    f.println(
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_number")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Threshold")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_members")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Most_frequent_words")
            + "</TR>");
    Iterator<Cluster> clusterI = allClusters.iterator();
    for (int i = 1; clusterI.hasNext(); i++) {
      Cluster cluster = clusterI.next();
      if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()];

      // no singleton clusters
      if (cluster.size() == 1) continue;

      f.print(
          "<TR><TD ALIGN=center BGCOLOR=#8080ff>"
              + i
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.size()
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.getSimilarity()
              + "<TD ALIGN=left BGCOLOR=#c0c0ff>");

      // sort names
      TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>();
      for (int x = 0; x < cluster.size(); x++) {
        sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x)));
      }

      for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) {
        Submission sub = iter.next();
        int index = submissions.indexOf(sub);
        f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>");
        if (iter.hasNext()) f.print(", ");
        neededSubmissions.add(sub); // write files for these.
      }

      if (this.program.get_language() instanceof jplag.text.Language) {
        f.println(
            "<TD ALIGN=left BGCOLOR=#c0c0ff>"
                + ThemeGenerator.generateThemes(
                    sortedSubmissions, this.program.get_themewords(), true, this.program));
      } else {
        f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-");
      }

      f.println("</TR>");
    }
    f.println("</TABLE>\n<P>\n");

    f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>");

    String text;
    text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n";
    text +=
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Number_of_clusters")
            + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n";
    for (int i = 0; i <= maxSize; i++) {
      if (distribution[i] == 0) continue;
      text +=
          "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>"
              + i
              + "<TD ALIGN=right BGCOLOR=#c0c0ff>"
              + distribution[i]
              + "<TD BGCOLOR=#c0c0ff>\n";
      for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#");
      if (distribution[i] * barLength / max == 0) {
        if (distribution[i] == 0) text += (".");
        else text += ("#");
      }
      text += ("</TR>\n");
    }
    text += ("</TABLE>\n");

    f.print(text);
    return text;
  }
Пример #10
0
  private void setupAndStartCluster() throws Exception {
    // form config files
    String masters = cluster.getMaster().getPrivateDnsName() + "\n";
    Files.write(masters.getBytes(), new File(mastersFile));

    List<String> slavesList = new ArrayList<String>();
    for (int i = 0; i < cluster.size(); ++i) {
      Server server = cluster.get(i);
      if (server.isTaskTracker()) {
        slavesList.add(server.getPrivateDnsName());
      }
    }
    String[] slaves = (String[]) slavesList.toArray(new String[0]);
    Files.write(Util.arrayToString(slaves).getBytes(), new File(slavesFile));

    String coreSite = Util.readTextFile("config/" + coreSiteFile);
    coreSite = coreSite.replaceFirst("localhost", cluster.getMaster().getPrivateDnsName());
    Files.write(coreSite.getBytes(), new File(coreSiteFile));

    String mapredSite = Util.readTextFile("config/" + mapredSiteFile);
    mapredSite = mapredSite.replaceFirst("localhost", cluster.getJobTracker().getPrivateDnsName());
    Files.write(mapredSite.getBytes(), new File(mapredSiteFile));

    String cmd;

    String[] output;
    // push config files to the cluster
    logger.info("Configuring the Hadoop cluster");
    ClusterCommand clusterCommand = new ClusterCommand(cluster);
    clusterCommand.runScpWaitForAll(mastersFile, mastersFile);
    clusterCommand.runScpWaitForAll(slavesFile, slavesFile);
    clusterCommand.runScpWaitForAll("config/" + hdfsSiteFile, hdfsSiteFile);
    clusterCommand.runScpWaitForAll(coreSiteFile, coreSiteFile);
    clusterCommand.runScpWaitForAll(mapredSiteFile, mapredSiteFile);
    // copy from home on remote to the config area
    clusterCommand.runCommandWaitForAll("sudo cp " + mastersFile + " /etc/hadoop/conf/");
    clusterCommand.runCommandWaitForAll("sudo cp " + slavesFile + " /etc/hadoop/conf/");
    clusterCommand.runCommandWaitForAll("sudo cp " + hdfsSiteFile + " /etc/hadoop/conf/");
    clusterCommand.runCommandWaitForAll("sudo cp " + coreSiteFile + " /etc/hadoop/conf/");
    clusterCommand.runCommandWaitForAll("sudo cp " + mapredSiteFile + " /etc/hadoop/conf/");
    // create /mnt/tmp for everyone to use
    clusterCommand.runCommandWaitForAll("sudo rm -fr /mnt/tmp");
    clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp");
    clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp");
    // create /mnt/tmp for hadoop tmp dir
    clusterCommand.runCommandWaitForAll("sudo mkdir /mnt/tmp/hadoop");
    clusterCommand.runCommandWaitForAll("sudo chmod 777 /mnt/tmp/hadoop");

    logger.info("Hadoop cluster configured, starting the services");
    // shut down all services
    // clean up dfs on slaves
    hadoopReady = false;
    cmd = "for service in /etc/init.d/hadoop-0.20-*; do sudo $service stop; done";
    clusterCommand.runCommandWaitForAll(cmd);
    cmd = "sudo rm -fr /var/lib/hadoop-0.20/cache/*";
    clusterCommand.runCommandWaitForAll(cmd);

    SSHAgent sshAgent = new SSHAgent();
    sshAgent.setUser(ParameterProcessing.CLUSTER_USER_NAME);
    sshAgent.setKey(ParameterProcessing.PEM_CERTIFICATE_NAME);
    sshAgent.setHost(cluster.getMaster().getDnsName());

    cmd = "sudo -u hdfs hadoop namenode -format";
    sshAgent.executeCommand(cmd);

    cmd = "sudo service hadoop-0.20-namenode start";
    output = sshAgent.executeCommand(cmd);
    logger.info(Util.arrayToString(output));

    // start all hdfs slaves
    clusterCommand = new ClusterCommand(cluster.getDataNodes());
    cmd = "sudo service hadoop-0.20-datanode start";
    clusterCommand.runCommandWaitForAll(cmd);
    // start all tasktrackers
    clusterCommand = new ClusterCommand(cluster.getTaskTrackers());
    cmd = "sudo service hadoop-0.20-tasktracker start";
    clusterCommand.runCommandWaitForAll(cmd);

    sshAgent.setHost(cluster.getJobTracker().getDnsName());
    cmd = "sudo service hadoop-0.20-jobtracker start";
    output = sshAgent.executeCommand(cmd);
    logger.info(Util.arrayToString(output));
    logger.info("Cluster configuration and startup is complete");

    cmd = "sudo rm /usr/lib/hadoop/lib/jets3t*.jar";
    clusterCommand = new ClusterCommand(cluster);
    clusterCommand.runCommandWaitForAll(cmd);
    // install a fresh version of FreeEed
    installFreeEed();
    // run a distributed grep app
    verifyOperation();
    if (callingUI != null) {
      callingUI.refreshStatus();
    }
  }