public void writeMap(Cluster cluster, float yBar) { Set<Submission> subSet = new HashSet<Submission>(cluster.size()); String documents = ""; for (int i = 0; i < cluster.size(); i++) { Submission sub = submissions.elementAt(cluster.getSubmissionAt(i)); documents += sub.name + " "; subSet.add(sub); } documents = documents.trim(); String theme = ThemeGenerator.generateThemes(subSet, this.program.get_themewords(), false, this.program); mapString += "<area shape=\"rect\" coords=\"" + (cluster.x - 2) + "," + (yBar) + "," + (cluster.x + 2) + "," + (cluster.y + 2) + "\" onMouseover=\"set('" + cluster.size() + "','" + trimStringToLength(String.valueOf(cluster.getSimilarity()), 6) + "','" + trimStringToLength(documents, 50) + "','" + theme + "')\" "; // if (cluster.size() == 1) // mapString += "href=\"submission"+cluster.getSubmissionAt(0)+".html\">\n"; // else mapString += "nohref>\n"; }
/** Min clustering... */ public Cluster minMaxAvrClustering() { int nrOfSubmissions = submissions.size(); boolean minClustering = (Options.MIN_CLUSTER == this.program.get_clusterType()); boolean maxClustering = (Options.MAX_CLUSTER == this.program.get_clusterType()); SimilarityMatrix simMatrix = this.program.get_similarity(); ArrayList<Cluster> clusters = new ArrayList<Cluster>(submissions.size()); for (int i = 0; i < nrOfSubmissions; i++) clusters.add(new Cluster(i, this)); while (clusters.size() > 1) { int indexA = -1, indexB = -1; float maxSim = -1; int nrOfClusters = clusters.size(); // find similarity for (int a = 0; a < (nrOfClusters - 1); a++) { Cluster cluster = clusters.get(a); for (int b = a + 1; b < nrOfClusters; b++) { float sim; if (minClustering) sim = cluster.maxSimilarity(clusters.get(b), simMatrix); else if (maxClustering) sim = cluster.minSimilarity(clusters.get(b), simMatrix); else sim = cluster.avrSimilarity(clusters.get(b), simMatrix); if (sim > maxSim) { maxSim = sim; indexA = a; indexB = b; } } } if (maxSim > maxMergeValue) maxMergeValue = maxSim; // now merge these clusters Cluster clusterA = clusters.get(indexA); Cluster clusterB = clusters.get(indexB); clusters.remove(clusterA); clusters.remove(clusterB); clusters.add(new Cluster(clusterA, clusterB, maxSim, this)); } return clusters.get(0); }
/** This method returns the distribution HTML code as a string */ private String outputClustering(HTMLFile f, Collection<Cluster> allClusters, int maxSize) { int[] distribution = new int[maxSize + 1]; int max = 0; for (int i = 0; i <= maxSize; i++) distribution[i] = 0; // Now output the clustering: f.println("<TABLE CELLPADDING=2 CELLSPACING=2>"); f.println( "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_number") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Threshold") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_members") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Most_frequent_words") + "</TR>"); Iterator<Cluster> clusterI = allClusters.iterator(); for (int i = 1; clusterI.hasNext(); i++) { Cluster cluster = clusterI.next(); if (max < ++distribution[cluster.size()]) max = distribution[cluster.size()]; // no singleton clusters if (cluster.size() == 1) continue; f.print( "<TR><TD ALIGN=center BGCOLOR=#8080ff>" + i + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.size() + "<TD ALIGN=center BGCOLOR=#c0c0ff>" + cluster.getSimilarity() + "<TD ALIGN=left BGCOLOR=#c0c0ff>"); // sort names TreeSet<Submission> sortedSubmissions = new TreeSet<Submission>(); for (int x = 0; x < cluster.size(); x++) { sortedSubmissions.add(submissions.elementAt(cluster.getSubmissionAt(x))); } for (Iterator<Submission> iter = sortedSubmissions.iterator(); iter.hasNext(); ) { Submission sub = iter.next(); int index = submissions.indexOf(sub); f.print("<A HREF=\"submission" + index + ".html\">" + sub.name + "</A>"); if (iter.hasNext()) f.print(", "); neededSubmissions.add(sub); // write files for these. } if (this.program.get_language() instanceof jplag.text.Language) { f.println( "<TD ALIGN=left BGCOLOR=#c0c0ff>" + ThemeGenerator.generateThemes( sortedSubmissions, this.program.get_themewords(), true, this.program)); } else { f.println("<TD ALIGN=left BGCOLOR=#c0c0ff>-"); } f.println("</TR>"); } f.println("</TABLE>\n<P>\n"); f.println("<H5>" + msg.getString("Clusters.Distribution_of_cluster_size") + ":</H5>"); String text; text = "<TABLE CELLPADDING=1 CELLSPACING=1>\n"; text += "<TR><TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Cluster_size") + "<TH ALIGN=center BGCOLOR=#8080ff>" + msg.getString("Clusters.Number_of_clusters") + "<TH ALIGN=center BGCOLOR=#8080ff>.</TR>\n"; for (int i = 0; i <= maxSize; i++) { if (distribution[i] == 0) continue; text += "<TR><TD ALIGN=center BGCOLOR=#c0c0ff>" + i + "<TD ALIGN=right BGCOLOR=#c0c0ff>" + distribution[i] + "<TD BGCOLOR=#c0c0ff>\n"; for (int j = (distribution[i] * barLength / max); j > 0; j--) text += ("#"); if (distribution[i] * barLength / max == 0) { if (distribution[i] == 0) text += ("."); else text += ("#"); } text += ("</TR>\n"); } text += ("</TABLE>\n"); f.print(text); return text; }