Beispiel #1
0
 public void writeMap(Cluster cluster, float yBar) {
   Set<Submission> subSet = new HashSet<Submission>(cluster.size());
   String documents = "";
   for (int i = 0; i < cluster.size(); i++) {
     Submission sub = submissions.elementAt(cluster.getSubmissionAt(i));
     documents += sub.name + " ";
     subSet.add(sub);
   }
   documents = documents.trim();
   String theme =
       ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program);
   mapString +=
       "<area shape=\"rect\" coords=\""
           + (cluster.x - 2)
           + ","
           + (yBar)
           + ","
           + (cluster.x + 2)
           + ","
           + (cluster.y + 2)
           + "\" onMouseover=\"set('"
           + cluster.size()
           + "','"
           + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6)
           + "','"
           + trimStringToLength(documents, 50)
           + "','"
           + theme
           + "')\" ";
   //		if (cluster.size() == 1)
   //			mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n";
   //		else
   mapString += "nohref>\n";
 }
Beispiel #2
0
  /** Min clustering... */
  public Cluster minMaxAvrClustering() {
    int nrOfSubmissions = submissions.size();
    boolean minClustering = (Options.MIN_CLUSTER == this.program.get_clusterType());
    boolean maxClustering = (Options.MAX_CLUSTER == this.program.get_clusterType());
    SimilarityMatrix simMatrix = this.program.get_similarity();

    ArrayList<Cluster> clusters = new ArrayList<Cluster>(submissions.size());
    for (int i = 0; i < nrOfSubmissions; i++) clusters.add(new Cluster(i, this));

    while (clusters.size() > 1) {
      int indexA = -1, indexB = -1;
      float maxSim = -1;
      int nrOfClusters = clusters.size();

      // find similarity
      for (int a = 0; a < (nrOfClusters - 1); a++) {
        Cluster cluster = clusters.get(a);
        for (int b = a + 1; b < nrOfClusters; b++) {
          float sim;
          if (minClustering) sim = cluster.maxSimilarity(clusters.get(b), simMatrix);
          else if (maxClustering) sim = cluster.minSimilarity(clusters.get(b), simMatrix);
          else sim = cluster.avrSimilarity(clusters.get(b), simMatrix);
          if (sim > maxSim) {
            maxSim = sim;
            indexA = a;
            indexB = b;
          }
        }
      }

      if (maxSim > maxMergeValue) maxMergeValue = maxSim;

      // now merge these clusters
      Cluster clusterA = clusters.get(indexA);
      Cluster clusterB = clusters.get(indexB);
      clusters.remove(clusterA);
      clusters.remove(clusterB);
      clusters.add(new Cluster(clusterA, clusterB, maxSim, this));
    }
    return clusters.get(0);
  }
Beispiel #3
0
  /** This method returns the distribution HTML code as a string */
  private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) {
    int[] distribution = new int[maxSize + 1];
    int max = 0;
    for (int i = 0; i <= maxSize; i++) distribution[i] = 0;

    // Now output the clustering:
    f.println("<TABLE CELLPADDING=2 CELLSPACING=2>");

    f.println(
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_number")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Threshold")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_members")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Most_frequent_words")
            + "</TR>");
    Iterator<Cluster> clusterI = allClusters.iterator();
    for (int i = 1; clusterI.hasNext(); i++) {
      Cluster cluster = clusterI.next();
      if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()];

      // no singleton clusters
      if (cluster.size() == 1) continue;

      f.print(
          "<TR><TD ALIGN=center BGCOLOR=#8080ff>"
              + i
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.size()
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.getSimilarity()
              + "<TD ALIGN=left BGCOLOR=#c0c0ff>");

      // sort names
      TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>();
      for (int x = 0; x < cluster.size(); x++) {
        sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x)));
      }

      for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) {
        Submission sub = iter.next();
        int index = submissions.indexOf(sub);
        f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>");
        if (iter.hasNext()) f.print(", ");
        neededSubmissions.add(sub); // write files for these.
      }

      if (this.program.get_language() instanceof jplag.text.Language) {
        f.println(
            "<TD ALIGN=left BGCOLOR=#c0c0ff>"
                + ThemeGenerator.generateThemes(
                    sortedSubmissions, this.program.get_themewords(), true, this.program));
      } else {
        f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-");
      }

      f.println("</TR>");
    }
    f.println("</TABLE>\n<P>\n");

    f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>");

    String text;
    text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n";
    text +=
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Number_of_clusters")
            + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n";
    for (int i = 0; i <= maxSize; i++) {
      if (distribution[i] == 0) continue;
      text +=
          "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>"
              + i
              + "<TD ALIGN=right BGCOLOR=#c0c0ff>"
              + distribution[i]
              + "<TD BGCOLOR=#c0c0ff>\n";
      for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#");
      if (distribution[i] * barLength / max == 0) {
        if (distribution[i] == 0) text += (".");
        else text += ("#");
      }
      text += ("</TR>\n");
    }
    text += ("</TABLE>\n");

    f.print(text);
    return text;
  }