Esempio n. 1
0
 public void writeMap(Cluster cluster, float yBar) {
   Set<Submission> subSet = new HashSet<Submission>(cluster.size());
   String documents = "";
   for (int i = 0; i < cluster.size(); i++) {
     Submission sub = submissions.elementAt(cluster.getSubmissionAt(i));
     documents += sub.name + " ";
     subSet.add(sub);
   }
   documents = documents.trim();
   String theme =
       ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program);
   mapString +=
       "<area shape=\"rect\" coords=\""
           + (cluster.x - 2)
           + ","
           + (yBar)
           + ","
           + (cluster.x + 2)
           + ","
           + (cluster.y + 2)
           + "\" onMouseover=\"set('"
           + cluster.size()
           + "','"
           + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6)
           + "','"
           + trimStringToLength(documents, 50)
           + "','"
           + theme
           + "')\" ";
   //		if (cluster.size() == 1)
   //			mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n";
   //		else
   mapString += "nohref>\n";
 }
Esempio n. 2
0
 public void drawCluster(Cluster cluster) {
   int index = clusters.indexOf(cluster);
   if (index != -1) {
     cluster.y = maxY;
     cluster.x = minX + index * factor;
     if (cluster.size() > 1) g.setColor(Color.RED);
     else g.setColor(Color.BLACK);
     g.drawRect(cluster.x - 1, cluster.y - cluster.size(), 2, 1 + cluster.size());
   } else {
     Cluster left = cluster.getLeft();
     Cluster right = cluster.getRight();
     drawCluster(left);
     drawCluster(right);
     int yBar = minY + (int) ((maxY - minY) * (cluster.getSimilarity() / threshold));
     g.setColor(Color.DARK_GRAY);
     if (left.y > yBar) {
       g.drawLine(left.x, left.y - 1, left.x, yBar);
       writeMap(left, yBar);
     }
     if (right.y > yBar) {
       g.drawLine(right.x, right.y - 1, right.x, yBar);
       writeMap(right, yBar);
     }
     g.setColor(Color.BLACK);
     g.drawLine(left.x, yBar, right.x, yBar);
     cluster.x = (right.x + left.x) / 2;
     cluster.y = yBar;
   }
 }
Esempio n. 3
0
  /** Print it! */
  public String printClusters(Cluster clustering, float threshold, HTMLFile f) {
    int maxSize = 0;

    ArrayList<Cluster> clusters = getClusters(clustering, threshold);

    for (Iterator<Cluster> i = clusters.iterator(); i.hasNext(); ) {
      Cluster cluster = i.next();
      if (cluster.size() > maxSize) maxSize = cluster.size();
    }

    TreeSet<Cluster> sorted = new TreeSet<Cluster>(clusters);
    clusters = null;

    // Now print them:
    return outputClustering(f, sorted, maxSize);
  }
Esempio n. 4
0
  /** Min clustering... */
  public Cluster minMaxAvrClustering() {
    int nrOfSubmissions = submissions.size();
    boolean minClustering = (Options.MIN_CLUSTER == this.program.get_clusterType());
    boolean maxClustering = (Options.MAX_CLUSTER == this.program.get_clusterType());
    SimilarityMatrix simMatrix = this.program.get_similarity();

    ArrayList<Cluster> clusters = new ArrayList<Cluster>(submissions.size());
    for (int i = 0; i < nrOfSubmissions; i++) clusters.add(new Cluster(i, this));

    while (clusters.size() > 1) {
      int indexA = -1, indexB = -1;
      float maxSim = -1;
      int nrOfClusters = clusters.size();

      // find similarity
      for (int a = 0; a < (nrOfClusters - 1); a++) {
        Cluster cluster = clusters.get(a);
        for (int b = a + 1; b < nrOfClusters; b++) {
          float sim;
          if (minClustering) sim = cluster.maxSimilarity(clusters.get(b), simMatrix);
          else if (maxClustering) sim = cluster.minSimilarity(clusters.get(b), simMatrix);
          else sim = cluster.avrSimilarity(clusters.get(b), simMatrix);
          if (sim > maxSim) {
            maxSim = sim;
            indexA = a;
            indexB = b;
          }
        }
      }

      if (maxSim > maxMergeValue) maxMergeValue = maxSim;

      // now merge these clusters
      Cluster clusterA = clusters.get(indexA);
      Cluster clusterB = clusters.get(indexB);
      clusters.remove(clusterA);
      clusters.remove(clusterB);
      clusters.add(new Cluster(clusterA, clusterB, maxSim, this));
    }
    return clusters.get(0);
  }
Esempio n. 5
0
  private ArrayList<Cluster> getClusters(Cluster clustering, float threshold) {
    ArrayList<Cluster> clusters = new ArrayList<Cluster>();

    // First determine the clusters
    Stack<Cluster> stack = new Stack<Cluster>();
    stack.push(clustering);
    while (!stack.empty()) {
      Cluster current = stack.pop();

      if (current.size() == 1) {
        clusters.add(current); // singleton clusters
      } else {
        if (current.getSimilarity() >= threshold) {
          clusters.add(current);
        } else {
          // current.size() != 1   !!!
          stack.push(current.getLeft());
          stack.push(current.getRight());
        }
      }
    }
    return clusters;
  }
Esempio n. 6
0
  /** This method returns the distribution HTML code as a string */
  private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) {
    int[] distribution = new int[maxSize + 1];
    int max = 0;
    for (int i = 0; i <= maxSize; i++) distribution[i] = 0;

    // Now output the clustering:
    f.println("<TABLE CELLPADDING=2 CELLSPACING=2>");

    f.println(
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_number")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Threshold")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_members")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Most_frequent_words")
            + "</TR>");
    Iterator<Cluster> clusterI = allClusters.iterator();
    for (int i = 1; clusterI.hasNext(); i++) {
      Cluster cluster = clusterI.next();
      if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()];

      // no singleton clusters
      if (cluster.size() == 1) continue;

      f.print(
          "<TR><TD ALIGN=center BGCOLOR=#8080ff>"
              + i
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.size()
              + "<TD ALIGN=center BGCOLOR=#c0c0ff>"
              + cluster.getSimilarity()
              + "<TD ALIGN=left BGCOLOR=#c0c0ff>");

      // sort names
      TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>();
      for (int x = 0; x < cluster.size(); x++) {
        sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x)));
      }

      for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) {
        Submission sub = iter.next();
        int index = submissions.indexOf(sub);
        f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>");
        if (iter.hasNext()) f.print(", ");
        neededSubmissions.add(sub); // write files for these.
      }

      if (this.program.get_language() instanceof jplag.text.Language) {
        f.println(
            "<TD ALIGN=left BGCOLOR=#c0c0ff>"
                + ThemeGenerator.generateThemes(
                    sortedSubmissions, this.program.get_themewords(), true, this.program));
      } else {
        f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-");
      }

      f.println("</TR>");
    }
    f.println("</TABLE>\n<P>\n");

    f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>");

    String text;
    text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n";
    text +=
        "<TR><TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Cluster_size")
            + "<TH ALIGN=center BGCOLOR=#8080ff>"
            + msg.getString("Clusters.Number_of_clusters")
            + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n";
    for (int i = 0; i <= maxSize; i++) {
      if (distribution[i] == 0) continue;
      text +=
          "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>"
              + i
              + "<TD ALIGN=right BGCOLOR=#c0c0ff>"
              + distribution[i]
              + "<TD BGCOLOR=#c0c0ff>\n";
      for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#");
      if (distribution[i] * barLength / max == 0) {
        if (distribution[i] == 0) text += (".");
        else text += ("#");
      }
      text += ("</TR>\n");
    }
    text += ("</TABLE>\n");

    f.print(text);
    return text;
  }