public void writeMap(Cluster cluster, float yBar) { Set<Submission> subSet = new HashSet<Submission>(cluster.size()); String documents = ""; for (int i = 0; i < cluster.size(); i++) { Submission sub = submissions.elementAt(cluster.getSubmissionAt(i)); documents += sub.name + " "; subSet.add(sub); } documents = documents.trim(); String theme = ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program); mapString += "<area shape=\"rect\" coords=\"" + (cluster.x - 2) + "," + (yBar) + "," + (cluster.x + 2) + "," + (cluster.y + 2) + "\" onMouseover=\"set('" + cluster.size() + "','" + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6) + "','" + trimStringToLength(documents, 50) + "','" + theme + "')\" "; // if (cluster.size() == 1) // mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n"; // else mapString += "nohref>\n"; }
public void drawCluster(Cluster cluster) { int index = clusters.indexOf(cluster); if (index != -1) { cluster.y = maxY; cluster.x = minX + index * factor; if (cluster.size() > 1) g.setColor(Color.RED); else g.setColor(Color.BLACK); g.drawRect(cluster.x - 1, cluster.y - cluster.size(), 2, 1 + cluster.size()); } else { Cluster left = cluster.getLeft(); Cluster right = cluster.getRight(); drawCluster(left); drawCluster(right); int yBar = minY + (int) ((maxY - minY) * (cluster.getSimilarity() / threshold)); g.setColor(Color.DARK_GRAY); if (left.y > yBar) { g.drawLine(left.x, left.y - 1, left.x, yBar); writeMap(left, yBar); } if (right.y > yBar) { g.drawLine(right.x, right.y - 1, right.x, yBar); writeMap(right, yBar); } g.setColor(Color.BLACK); g.drawLine(left.x, yBar, right.x, yBar); cluster.x = (right.x + left.x) / 2; cluster.y = yBar; } }
/** Print it! */ public String printClusters(Cluster clustering, float threshold, HTMLFile f) { int maxSize = 0; ArrayList<Cluster> clusters = getClusters(clustering, threshold); for (Iterator<Cluster> i = clusters.iterator(); i.hasNext(); ) { Cluster cluster = i.next(); if (cluster.size() > maxSize) maxSize = cluster.size(); } TreeSet<Cluster> sorted = new TreeSet<Cluster>(clusters); clusters = null; // Now print them: return outputClustering(f, sorted, maxSize); }
/** Min clustering... */ public Cluster minMaxAvrClustering() { int nrOfSubmissions = submissions.size(); boolean minClustering = (Options.MIN_CLUSTER == this.program.get_clusterType()); boolean maxClustering = (Options.MAX_CLUSTER == this.program.get_clusterType()); SimilarityMatrix simMatrix = this.program.get_similarity(); ArrayList<Cluster> clusters = new ArrayList<Cluster>(submissions.size()); for (int i = 0; i < nrOfSubmissions; i++) clusters.add(new Cluster(i, this)); while (clusters.size() > 1) { int indexA = -1, indexB = -1; float maxSim = -1; int nrOfClusters = clusters.size(); // find similarity for (int a = 0; a < (nrOfClusters - 1); a++) { Cluster cluster = clusters.get(a); for (int b = a + 1; b < nrOfClusters; b++) { float sim; if (minClustering) sim = cluster.maxSimilarity(clusters.get(b), simMatrix); else if (maxClustering) sim = cluster.minSimilarity(clusters.get(b), simMatrix); else sim = cluster.avrSimilarity(clusters.get(b), simMatrix); if (sim > maxSim) { maxSim = sim; indexA = a; indexB = b; } } } if (maxSim > maxMergeValue) maxMergeValue = maxSim; // now merge these clusters Cluster clusterA = clusters.get(indexA); Cluster clusterB = clusters.get(indexB); clusters.remove(clusterA); clusters.remove(clusterB); clusters.add(new Cluster(clusterA, clusterB, maxSim, this)); } return clusters.get(0); }
private ArrayList<Cluster> getClusters(Cluster clustering, float threshold) { ArrayList<Cluster> clusters = new ArrayList<Cluster>(); // First determine the clusters Stack<Cluster> stack = new Stack<Cluster>(); stack.push(clustering); while (!stack.empty()) { Cluster current = stack.pop(); if (current.size() == 1) { clusters.add(current); // singleton clusters } else { if (current.getSimilarity() >= threshold) { clusters.add(current); } else { // current.size() != 1 !!! stack.push(current.getLeft()); stack.push(current.getRight()); } } } return clusters; }
/** This method returns the distribution HTML code as a string */ private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) { int[] distribution = new int[maxSize + 1]; int max = 0; for (int i = 0; i <= maxSize; i++) distribution[i] = 0; // Now output the clustering: f.println("<TABLE CELLPADDING=2 CELLSPACING=2>"); f.println( "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_number") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Threshold") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_members") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Most_frequent_words") + "</TR>"); Iterator<Cluster> clusterI = allClusters.iterator(); for (int i = 1; clusterI.hasNext(); i++) { Cluster cluster = clusterI.next(); if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()]; // no singleton clusters if (cluster.size() == 1) continue; f.print( "<TR><TD ALIGN=center BGCOLOR=#8080ff>" + i + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.size() + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.getSimilarity() + "<TD ALIGN=left BGCOLOR=#c0c0ff>"); // sort names TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>(); for (int x = 0; x < cluster.size(); x++) { sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x))); } for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) { Submission sub = iter.next(); int index = submissions.indexOf(sub); f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>"); if (iter.hasNext()) f.print(", "); neededSubmissions.add(sub); // write files for these. } if (this.program.get_language() instanceof jplag.text.Language) { f.println( "<TD ALIGN=left BGCOLOR=#c0c0ff>" + ThemeGenerator.generateThemes( sortedSubmissions, this.program.get_themewords(), true, this.program)); } else { f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-"); } f.println("</TR>"); } f.println("</TABLE>\n<P>\n"); f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>"); String text; text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n"; text += "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Number_of_clusters") + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n"; for (int i = 0; i <= maxSize; i++) { if (distribution[i] == 0) continue; text += "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>" + i + "<TD ALIGN=right BGCOLOR=#c0c0ff>" + distribution[i] + "<TD BGCOLOR=#c0c0ff>\n"; for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#"); if (distribution[i] * barLength / max == 0) { if (distribution[i] == 0) text += ("."); else text += ("#"); } text += ("</TR>\n"); } text += ("</TABLE>\n"); f.print(text); return text; }