public static void main(String[] args) throws IOException { BufferedReader in = new BufferedReader(new FileReader("Defrag_4.out")); Vector<Cluster> v = new Vector<Cluster>(); Vector<Vector<Tweet>> t = new Vector<Vector<Tweet>>(); HashMap<String, Integer> map = new HashMap<String, Integer>(); Vector<HashMap<String, Integer>> vm = new Vector<HashMap<String, Integer>>(); String s = ""; int i = 0, j = 0; Vector<Tweet> vector = new Vector<Tweet>(); vm.add(new HashMap<String, Integer>()); while ((s = in.readLine()) != null) { if (s.equals( "------------------------------------------------------------------------------")) { // System.out.println(vector.size()); t.add(vector); vector = new Vector<Tweet>(); vm.add(new HashMap<String, Integer>()); continue; } i++; String label = s.substring(0, s.indexOf(" ")); String str_tw = (s.substring(s.indexOf(" ") + 1)).trim(); vector.add(new Tweet(str_tw, label)); Integer oldCount = map.get(label); Integer oldCount_Internal = vm.get(vm.size() - 1).get(label); map.put(label, oldCount == null ? 1 : oldCount + 1); vm.get(vm.size() - 1).put(label, oldCount_Internal == null ? 1 : oldCount_Internal + 1); } t.add(vector); // Building Clusters int index = 0; for (Vector<Tweet> vt : t) { v.add(new Cluster(vt.firstElement(), index)); for (int idx = 1; idx < vt.size(); idx++) { v.get(index).addmember(vt.get(idx)); } index++; } System.out.println(v.size()); // De-Fragmentation Vector<Cluster> vc = new Vector<Cluster>(); // for(i = 0; i < v.size(); i++) { // Cluster master = v.get(i); // TweetVector tv_master = master.getCentroid(); // for(j = i + 1; j < v.size(); j++) { // Cluster slave = v.get(j); // TweetVector tv_slave = slave.getCentroid(); // double sim = tv_master.cosineSimilarity(tv_slave); // if(sim >= 0.35) { // master.addMembers(slave.getMembers()); // v.remove(j); // } // } // vc.add(master); // } // for(i = 0; i < v.size(); i++) { // Cluster master = v.get(i); // TweetVector tv_master = master.getCentroid(); // for(j = i + 1; j < v.size(); j++) { // Cluster slave = v.get(j); // TweetVector tv_slave = slave.getCentroid(); // double sim = tv_master.cosineSimilarity(tv_slave); // if(sim >= 0.32) { // for(Tweet tweet : slave.getMembers()) { // master.addmember(tweet); // } // v.remove(j); // } // } // vc.add(master); // } System.out.println(vc.size()); Vector<Vector<Tweet>> newClusters = new Vector<Vector<Tweet>>(); for (Cluster clus : vc) { newClusters.add(clus.getMembers()); } // percision double tp; double fn; double fp; // for(String x : map.keySet()) { // System.out.println(x + " " + map.get(x)); // } /** * i عدد ال elements اللي جوه Cluster موجود دلوقتي j و عدد ال elements اللي ليها نفس ال Class k * و عدد ال elements اللي من نفس ال Class جوا نفس ال cluster * * <p>TP = max(k within the cluster) * * <p>FN = i - TP * * <p>FP = j - TP * * <p>TP = max(k in within cluster) J = number of elements in class * * <p>that has maximum K */ BufferedWriter bw = new BufferedWriter(new FileWriter("F-Measure4_1_After.out")); index = 0; double avg = 0.0; for (Vector<Tweet> tv : t) { i = tv.size(); String maxLabel = ""; int k = 0; for (String str : vm.get(index).keySet()) { int x = vm.get(index).get(str); if (x > k) { k = x; maxLabel = str; } } j = map.get(maxLabel); tp = k; fn = i - tp; fp = j - tp; double precision = tp / (tp + fp); double recall = tp / (tp + fn); double fmeasure = (2 * precision * recall) / (precision + recall); index++; avg += (fmeasure); bw.append("F-Measure = " + fmeasure + "\n"); } bw.append("Overall F-Measure = " + (avg / vm.size()) + "\n"); bw.close(); // System.out.println(i); // Collections.shuffle(t); // LeaderFollowers lf = new LeaderFollowers(0.35, 0); // for(Tweet tw : t) { // lf.go_tweet(tw); // } // BufferedWriter bw = new BufferedWriter(new FileWriter("Purity.out")); // i = 1; // BufferedWriter bw2 = new BufferedWriter(new FileWriter("Defrag_4.out")); //// System.out.println(lf.getClusters().size()); // for(Cluster c : vc) { // for(Tweet tw : c.getMembers()) // bw2.write(tw.getLabel() + " " + tw.getOriginal() + "\n"); // // bw2.write("------------------------------------------------------------------------------\n"); // } // bw2.close(); // double overAll = 0.0; // for(Vector<Tweet> vt : t) { // if(!vt.isEmpty()) { // double pur = purity(vt, vt.firstElement().getLabel()); // j = map.get(vt.firstElement().getLabel()); // double[] arr = tp2(vt, vt.firstElement().getLabel(), j); // tp = arr[0]; // fn = arr[1]; // fp = arr[2]; // double f = (2 * fn * fp) / (fn + fp); // overAll += ((double)vt.size() / (double)i) * pur; // bw.write(pur + " " + f + "\n"); // } // } // bw.write("Over All Purity = " + overAll + "\n"); // System.err.println(overAll); // bw.close(); }
public void calcscores() { Matrix mbase, mbaseavg; Matrix mtemp; double sco, div; @SuppressWarnings("unused") double scodef, divdef; mbase = clustinit.vtestsm; mbaseavg = clustinit.avgsm; scorevar = new double[(int) mbase.getRowCount()]; scorestabdet = new double[(int) mbase.getRowCount()]; scorevardef = new double[(int) mbase.getRowCount()]; scorestabdesc = 0; scorestabpop = 0; for (int i = 0; i < 3; i++) { scorevar[i] = 0; scorevardef[i] = 0; } for (int i = 3; i < mbase.getRowCount(); i++) { mtemp = mbase.selectRows(Ret.LINK, i); sco = mtemp.abs(Ret.LINK).getMeanValue(); sco = sco * mbaseavg.selectRows(Ret.LINK, i).getStdValue(); div = mbaseavg.abs(Ret.LINK).selectRows(Ret.LINK, i).getMeanValue(); if (div > 0) { sco = sco / div; } else { sco = 0; } scorevar[i] = sco; } mbase = clustinit.vtestsmdef; mbaseavg = clustinit.avgsmdef; for (int i = 3; i < mbase.getRowCount(); i++) { mtemp = mbase.selectRows(Ret.LINK, i); sco = mtemp.abs(Ret.LINK).getMeanValue(); sco = sco * mbaseavg.selectRows(Ret.LINK, i).getStdValue(); div = mbaseavg.abs(Ret.LINK).selectRows(Ret.LINK, i).getMeanValue(); if (div > 0) { sco = sco / div; } else { sco = 0; } scorevardef[i] = sco; } scorestabdesc = 0; @SuppressWarnings("unused") int nbvarninit; for (int i = 3; i < mbase.getRowCount(); i++) { scorestabdet[i] = 0; for (int j = 0; j < mbase.getColumnCount(); j++) { scorestabdet[i] += Math.abs( clustinit.vtestsm.getAsDouble(i, j) - clustinit.vtestsm.getAsDouble(i, clustinit.idtickinit)) / Math.max( Math.abs(clustinit.vtestsm.getAsDouble(i, j)), Math.abs(clustinit.vtestsm.getAsDouble(i, clustinit.idtickinit))) / mbase.getColumnCount(); } scorestabdet[i] = 1 - scorestabdet[i]; if (!Double.isNaN(scorestabdet[i])) { scorestabdesc = scorestabdesc + scorestabdet[i] / (mbase.getRowCount() - 3); } } scorestabpop = 0; Cluster nc; for (int j = 0; j < clustHistDef.size(); j++) { nc = clustHistDef.get(j); scorestabpop += (double) clustinit.getNumberOfCommonComponents(nc) / (double) Math.max(clustinit.getComponentIds().size(), nc.getComponentIds().size()); } scorestabpop = (double) scorestabpop / clustinit.vtestsm.getColumnCount(); long idColumn = mbase.getRowForLabel(Cluster.ID_C_NAME); long classLabelColumn = mbase.getRowForLabel(Cluster.CLASS_LABEL_C_NAME); long labelColorColumn = mbase.getRowForLabel("LABEL-COLOR"); for (int i = 3; i < mbase.getRowCount(); i++) { Pattern p = Pattern.compile("CLASS_LABEL"); Matcher m = p.matcher(mbase.getRowLabel(i)); if ((i == idColumn || i == classLabelColumn || i == labelColorColumn || m.lookingAt())) { scorevardef[i] = 0; scorevar[i] = 0; } } }