public static void main(String[] args) throws IOException {
    BufferedReader in = new BufferedReader(new FileReader("Defrag_4.out"));
    Vector<Cluster> v = new Vector<Cluster>();
    Vector<Vector<Tweet>> t = new Vector<Vector<Tweet>>();
    HashMap<String, Integer> map = new HashMap<String, Integer>();
    Vector<HashMap<String, Integer>> vm = new Vector<HashMap<String, Integer>>();
    String s = "";
    int i = 0, j = 0;
    Vector<Tweet> vector = new Vector<Tweet>();
    vm.add(new HashMap<String, Integer>());
    while ((s = in.readLine()) != null) {
      if (s.equals(
          "------------------------------------------------------------------------------")) {
        //                System.out.println(vector.size());
        t.add(vector);
        vector = new Vector<Tweet>();
        vm.add(new HashMap<String, Integer>());
        continue;
      }
      i++;
      String label = s.substring(0, s.indexOf(" "));
      String str_tw = (s.substring(s.indexOf(" ") + 1)).trim();
      vector.add(new Tweet(str_tw, label));
      Integer oldCount = map.get(label);
      Integer oldCount_Internal = vm.get(vm.size() - 1).get(label);
      map.put(label, oldCount == null ? 1 : oldCount + 1);
      vm.get(vm.size() - 1).put(label, oldCount_Internal == null ? 1 : oldCount_Internal + 1);
    }
    t.add(vector);

    // Building Clusters
    int index = 0;
    for (Vector<Tweet> vt : t) {
      v.add(new Cluster(vt.firstElement(), index));
      for (int idx = 1; idx < vt.size(); idx++) {
        v.get(index).addmember(vt.get(idx));
      }
      index++;
    }

    System.out.println(v.size());
    // De-Fragmentation
    Vector<Cluster> vc = new Vector<Cluster>();

    //        for(i = 0; i < v.size(); i++) {
    //            Cluster master = v.get(i);
    //            TweetVector tv_master = master.getCentroid();
    //            for(j = i + 1; j < v.size(); j++) {
    //                Cluster slave = v.get(j);
    //                TweetVector tv_slave = slave.getCentroid();
    //                double sim = tv_master.cosineSimilarity(tv_slave);
    //                if(sim >= 0.35) {
    //                    master.addMembers(slave.getMembers());
    //                    v.remove(j);
    //                }
    //            }
    //            vc.add(master);
    //        }

    //        for(i = 0; i < v.size(); i++) {
    //            Cluster master = v.get(i);
    //            TweetVector tv_master = master.getCentroid();
    //            for(j = i + 1; j < v.size(); j++) {
    //                Cluster slave = v.get(j);
    //                TweetVector tv_slave = slave.getCentroid();
    //                double sim = tv_master.cosineSimilarity(tv_slave);
    //                if(sim >= 0.32) {
    //                    for(Tweet tweet : slave.getMembers()) {
    //                        master.addmember(tweet);
    //                    }
    //                    v.remove(j);
    //                }
    //            }
    //            vc.add(master);
    //        }

    System.out.println(vc.size());

    Vector<Vector<Tweet>> newClusters = new Vector<Vector<Tweet>>();
    for (Cluster clus : vc) {
      newClusters.add(clus.getMembers());
    }

    // percision
    double tp;
    double fn;
    double fp;

    //        for(String x : map.keySet()) {
    //            System.out.println(x + " " + map.get(x));
    //        }

    /**
     * i عدد ال elements اللي جوه Cluster موجود دلوقتي j و عدد ال elements اللي ليها نفس ال Class k
     * و عدد ال elements اللي من نفس ال Class جوا نفس ال cluster
     *
     * <p>TP = max(k within the cluster)
     *
     * <p>FN = i - TP
     *
     * <p>FP = j - TP
     *
     * <p>TP = max(k in within cluster) J = number of elements in class
     *
     * <p>that has maximum K
     */
    BufferedWriter bw = new BufferedWriter(new FileWriter("F-Measure4_1_After.out"));
    index = 0;
    double avg = 0.0;
    for (Vector<Tweet> tv : t) {
      i = tv.size();
      String maxLabel = "";
      int k = 0;
      for (String str : vm.get(index).keySet()) {
        int x = vm.get(index).get(str);
        if (x > k) {
          k = x;
          maxLabel = str;
        }
      }
      j = map.get(maxLabel);
      tp = k;
      fn = i - tp;
      fp = j - tp;
      double precision = tp / (tp + fp);
      double recall = tp / (tp + fn);
      double fmeasure = (2 * precision * recall) / (precision + recall);
      index++;
      avg += (fmeasure);
      bw.append("F-Measure = " + fmeasure + "\n");
    }
    bw.append("Overall F-Measure = " + (avg / vm.size()) + "\n");
    bw.close();

    //        System.out.println(i);
    //        Collections.shuffle(t);
    //        LeaderFollowers lf = new LeaderFollowers(0.35, 0);
    //        for(Tweet tw : t) {
    //            lf.go_tweet(tw);
    //        }
    //        BufferedWriter bw = new BufferedWriter(new FileWriter("Purity.out"));
    //        i = 1;
    //        BufferedWriter bw2 = new BufferedWriter(new FileWriter("Defrag_4.out"));
    ////        System.out.println(lf.getClusters().size());
    //        for(Cluster c : vc) {
    //            for(Tweet tw : c.getMembers())
    //                bw2.write(tw.getLabel() + " " + tw.getOriginal() + "\n");
    //
    // bw2.write("------------------------------------------------------------------------------\n");
    //        }
    //        bw2.close();
    //        double overAll = 0.0;
    //        for(Vector<Tweet> vt : t) {
    //            if(!vt.isEmpty()) {
    //                double pur = purity(vt, vt.firstElement().getLabel());
    //                j = map.get(vt.firstElement().getLabel());
    //                double[] arr = tp2(vt, vt.firstElement().getLabel(), j);
    //                tp = arr[0];
    //                fn = arr[1];
    //                fp = arr[2];
    //                double f = (2 * fn * fp) / (fn + fp);
    //                overAll += ((double)vt.size() / (double)i) * pur;
    //                bw.write(pur + "   " + f + "\n");
    //            }
    //        }
    //        bw.write("Over All Purity = " + overAll + "\n");
    //        System.err.println(overAll);
    //        bw.close();
  }
Beispiel #2
0
  public void calcscores() {
    Matrix mbase, mbaseavg;
    Matrix mtemp;
    double sco, div;
    @SuppressWarnings("unused")
    double scodef, divdef;
    mbase = clustinit.vtestsm;
    mbaseavg = clustinit.avgsm;
    scorevar = new double[(int) mbase.getRowCount()];
    scorestabdet = new double[(int) mbase.getRowCount()];
    scorevardef = new double[(int) mbase.getRowCount()];
    scorestabdesc = 0;
    scorestabpop = 0;
    for (int i = 0; i < 3; i++) {
      scorevar[i] = 0;
      scorevardef[i] = 0;
    }
    for (int i = 3; i < mbase.getRowCount(); i++) {

      mtemp = mbase.selectRows(Ret.LINK, i);
      sco = mtemp.abs(Ret.LINK).getMeanValue();
      sco = sco * mbaseavg.selectRows(Ret.LINK, i).getStdValue();
      div = mbaseavg.abs(Ret.LINK).selectRows(Ret.LINK, i).getMeanValue();
      if (div > 0) {
        sco = sco / div;
      } else {
        sco = 0;
      }
      scorevar[i] = sco;
    }
    mbase = clustinit.vtestsmdef;
    mbaseavg = clustinit.avgsmdef;
    for (int i = 3; i < mbase.getRowCount(); i++) {
      mtemp = mbase.selectRows(Ret.LINK, i);
      sco = mtemp.abs(Ret.LINK).getMeanValue();
      sco = sco * mbaseavg.selectRows(Ret.LINK, i).getStdValue();
      div = mbaseavg.abs(Ret.LINK).selectRows(Ret.LINK, i).getMeanValue();
      if (div > 0) {
        sco = sco / div;
      } else {
        sco = 0;
      }
      scorevardef[i] = sco;
    }
    scorestabdesc = 0;
    @SuppressWarnings("unused")
    int nbvarninit;
    for (int i = 3; i < mbase.getRowCount(); i++) {
      scorestabdet[i] = 0;
      for (int j = 0; j < mbase.getColumnCount(); j++) {
        scorestabdet[i] +=
            Math.abs(
                    clustinit.vtestsm.getAsDouble(i, j)
                        - clustinit.vtestsm.getAsDouble(i, clustinit.idtickinit))
                / Math.max(
                    Math.abs(clustinit.vtestsm.getAsDouble(i, j)),
                    Math.abs(clustinit.vtestsm.getAsDouble(i, clustinit.idtickinit)))
                / mbase.getColumnCount();
      }
      scorestabdet[i] = 1 - scorestabdet[i];
      if (!Double.isNaN(scorestabdet[i])) {
        scorestabdesc = scorestabdesc + scorestabdet[i] / (mbase.getRowCount() - 3);
      }
    }
    scorestabpop = 0;
    Cluster nc;
    for (int j = 0; j < clustHistDef.size(); j++) {
      nc = clustHistDef.get(j);
      scorestabpop +=
          (double) clustinit.getNumberOfCommonComponents(nc)
              / (double) Math.max(clustinit.getComponentIds().size(), nc.getComponentIds().size());
    }
    scorestabpop = (double) scorestabpop / clustinit.vtestsm.getColumnCount();

    long idColumn = mbase.getRowForLabel(Cluster.ID_C_NAME);
    long classLabelColumn = mbase.getRowForLabel(Cluster.CLASS_LABEL_C_NAME);
    long labelColorColumn = mbase.getRowForLabel("LABEL-COLOR");
    for (int i = 3; i < mbase.getRowCount(); i++) {
      Pattern p = Pattern.compile("CLASS_LABEL");
      Matcher m = p.matcher(mbase.getRowLabel(i));
      if ((i == idColumn || i == classLabelColumn || i == labelColorColumn || m.lookingAt())) {
        scorevardef[i] = 0;
        scorevar[i] = 0;
      }
    }
  }