Exemplo n.º 1
0
  /** To compare motif found by each methods with TF.jtux using STAMP */
  private static void compareTFStampResults(String[] args) {
    final int TOP_MOTIF_RANK = 9;
    double stamp_p_value = Double.parseDouble(args[3]);

    // load the mapping file between tf and known motif db entries
    //		String[] lines = readSmallTextFile(args[0]);
    //		HashMap<String, HashSet<String>> tf2db = new HashMap<String, HashSet<String>>();
    //		for (int i=0;i<lines.length;i++){
    //			String[] fs = lines[i].split("\t");
    //			if (fs.length<=1)
    //				continue;
    //			String tf = fs[0];
    //			HashSet<String> entries = new HashSet<String>();
    //			for (int j=1;j<fs.length;j++){
    //				String entry = fs[j].trim();
    //				if (entry.length()>=1)
    //					entries.add(entry);
    //			}
    //			tf2db.put(tf, entries);
    //		}

    // load encode expts, and motif methods
    String[] expts = readSmallTextFile(args[0]); // read expt/tf pairs
    HashMap<String, String> expt2tf = new HashMap<String, String>();
    for (int i = 0; i < expts.length; i++) {
      String[] fs = expts[i].split("\t");
      expt2tf.put(fs[0].trim(), fs[1].trim());
      expts[i] = fs[0].trim(); // replace with expt only
    }
    String[] methods = readSmallTextFile(args[1]);

    // load  STAMP file for each expt_method pair
    HashMap<String, Integer> performances = new HashMap<String, Integer>();
    File stamp_result_dir = new File(args[2]);
    for (String expt : expts) {
      //			String tf = expt2tf.get(expt);
      //			if (tf2db.containsKey(tf)){
      each_method:
      for (String method : methods) {
        String expt_method = expt + "." + method;
        File f = new File(stamp_result_dir, expt_method + "_match_pairs.txt");
        if (!f.exists()) continue;
        String[] sls = readSmallTextFile(f.getAbsolutePath()); // stampe lines
        for (int i = 0; i < sls.length - 1; i += 2) {
          if (sls[i].startsWith(">")) {
            int rank = i / 2; // motif rank in this expt
            // only check the top match to TF.jtux
            String[] sl_fs = sls[i + 1].split("\t");
            //							String entry = sl_fs[0].trim();
            double p = Double.parseDouble(sl_fs[1]);
            if (p < stamp_p_value) {
              performances.put(expt_method, rank);
              continue each_method;
            }
          }
        }
      }
      //			}
      //			else {
      //				System.err.println(tf + " is not in the TF_2_DB list: " + args[0]);
      //			}

    }

    // print out results
    StringBuilder sb = new StringBuilder("expt\ttf\t");
    for (String method : methods) {
      sb.append(method).append("\t");
    }
    sb.append("\n");
    HashMap<String, Integer> tf2count = new HashMap<String, Integer>();
    for (String expt : expts) {
      String tf = expt2tf.get(expt);
      //			if (!tf2db.containsKey(tf))						// only count annotated expts/tfs with public known
      // motif
      //				continue;
      if (tf2count.containsKey(tf)) tf2count.put(tf, tf2count.get(tf) + 1);
      else tf2count.put(tf, 1);
      sb.append(expt).append("\t").append(tf).append("\t");
      for (String method : methods) {
        String pair = expt + "." + method;
        int rank = performances.containsKey(pair) ? performances.get(pair) : 99;
        sb.append(rank).append("\t");
      }
      sb.append("\n");
    }
    CommonUtils.writeFile("method_rank_matrix.txt", sb.toString());

    // print out result for each top rank
    HashMap<String, int[]> performanceByExpt = new HashMap<String, int[]>();
    HashMap<String, float[]> performanceByTF = new HashMap<String, float[]>();
    sb = new StringBuilder("Rank\t");
    StringBuilder sb2 = new StringBuilder("Rank\t");
    for (String method : methods) {
      sb.append(method).append("\t");
      sb2.append(method).append("\t");
      performanceByExpt.put(method, new int[9]);
      performanceByTF.put(method, new float[9]);
    }
    sb.append("\n");
    sb2.append("\n");
    for (String expt : expts) {
      String tf = expt2tf.get(expt);
      for (String method : methods) {
        String pair = expt + "." + method;
        int rank = performances.containsKey(pair) ? performances.get(pair) : 99;
        for (int r = 0; r < TOP_MOTIF_RANK; r++) {
          if (rank <= r) {
            performanceByExpt.get(method)[r]++;
            performanceByTF.get(method)[r] += 1.0 / tf2count.get(tf);
          }
        }
      }
    }

    for (int r = 0; r < TOP_MOTIF_RANK; r++) {
      sb.append("Top" + r + "\t");
      sb2.append("Top" + r + "\t");
      for (String method : methods) {
        int[] scores = performanceByExpt.get(method);
        float[] scores_tf = performanceByTF.get(method);
        sb.append(scores[r]).append("\t");
        sb2.append(String.format("%.2f\t", scores_tf[r]));
      }
      sb.append("\n");
      sb2.append("\n");
    }

    CommonUtils.writeFile("method_expt_scores.txt", sb.toString());
    CommonUtils.writeFile("method_tf_scores.txt", sb2.toString());
  }
Exemplo n.º 2
0
  /** To compare motif found by each methods with known motif in database using STAMP */
  private static void compareStampResults(String[] args) {

    final int STAMP_UNIT_LINE_COUNT = 11;

    int stamp_top_count = Integer.parseInt(args[5]);
    double stamp_p_value = Double.parseDouble(args[6]);

    // load the mapping file between tf and known motif db entries
    String[] lines = readSmallTextFile(args[0]);
    HashMap<String, HashSet<String>> tf2db = new HashMap<String, HashSet<String>>();
    for (int i = 1; i < lines.length; i++) { // skip line 0, header
      String[] fs = lines[i].split("\t");
      if (fs.length <= 1) continue;
      String tf = fs[0];
      HashSet<String> entries = new HashSet<String>();
      for (int j = 1; j < fs.length; j++) {
        String entry = fs[j].trim();
        if (entry.length() >= 1) entries.add(entry);
      }
      tf2db.put(tf, entries);
    }

    // load motif clusters
    lines = readSmallTextFile(args[1]);
    ArrayList<HashSet<String>> clusters = new ArrayList<HashSet<String>>();
    for (int i = 0; i < lines.length; i++) {
      String[] fs = lines[i].split("Cluster_Members:\t");
      if (fs.length <= 1) continue;
      String[] ms = fs[1].split("\t");
      HashSet<String> cluster = new HashSet<String>();
      for (String m : ms) {
        m = m.trim();
        if (m.length() >= 1) cluster.add(m);
      }
      clusters.add(cluster);
    }

    // expand tf to db entry mapping
    for (String tf : tf2db.keySet()) {
      HashSet<String> expandedSet = new HashSet<String>();
      HashSet<String> annotations = tf2db.get(tf);
      for (String anno : annotations) {
        for (HashSet<String> cluster : clusters) {
          if (cluster.contains(anno)) expandedSet.addAll(cluster);
        }
      }
      tf2db.put(tf, expandedSet);
    }

    // load encode expts, and motif methods
    String[] expts = readSmallTextFile(args[2]);
    HashMap<String, String> expt2tf = new HashMap<String, String>();
    for (int i = 0; i < expts.length; i++) {
      String[] fs = expts[i].split("\t");
      expt2tf.put(fs[0].trim(), fs[1].trim());
      expts[i] = fs[0].trim();
    }
    String[] methods = readSmallTextFile(args[3]);

    // load  STAMP file for each expt_method pair
    HashMap<String, Integer> performances = new HashMap<String, Integer>();
    File dir = new File(args[4]);
    for (String expt : expts) {
      String tf = expt2tf.get(expt);
      if (tf2db.containsKey(tf)) {
        each_method:
        for (String method : methods) {
          String pair = expt + "." + method;
          File f = new File(dir, pair + "_match_pairs.txt");
          if (!f.exists()) continue;
          String[] sls = readSmallTextFile(f.getAbsolutePath()); // stampe lines
          for (int i = 0; i < sls.length; i += STAMP_UNIT_LINE_COUNT) {
            if (sls[i].startsWith(">")) {
              int rank = i / STAMP_UNIT_LINE_COUNT; // motif rank in this expt
              for (int j = 1;
                  j <= stamp_top_count;
                  j++) { // each top db entry in STAMP match results
                String[] sl_fs = sls[i + j].split("\t");
                String entry = sl_fs[0].trim();
                if (tf2db.get(tf).contains(entry)) {
                  double p = Double.parseDouble(sl_fs[1]);
                  if (p < stamp_p_value) {
                    performances.put(pair, rank);
                    continue each_method;
                  }
                }
              }
            }
          }
        }
      }
    }

    // print out results
    StringBuilder sb = new StringBuilder("expt\ttf\t");
    for (String method : methods) {
      sb.append(method).append("\t");
    }
    sb.append("\n");
    HashMap<String, Integer> tf2count = new HashMap<String, Integer>();
    for (String expt : expts) {
      String tf = expt2tf.get(expt);
      if (!tf2db.containsKey(tf)) // only count annotated expts/tfs with public known motif
      continue;
      if (tf2count.containsKey(tf)) tf2count.put(tf, tf2count.get(tf) + 1);
      else tf2count.put(tf, 1);
      sb.append(expt).append("\t").append(tf).append("\t");
      for (String method : methods) {
        String pair = expt + "." + method;
        int rank = performances.containsKey(pair) ? performances.get(pair) : 99;
        sb.append(rank).append("\t");
      }
      sb.append("\n");
    }
    CommonUtils.writeFile("method_rank_matrix.txt", sb.toString());

    // print out result for each top rank
    HashMap<String, int[]> performanceByExpt = new HashMap<String, int[]>();
    HashMap<String, float[]> performanceByTF = new HashMap<String, float[]>();
    sb = new StringBuilder("Rank\t");
    StringBuilder sb2 = new StringBuilder("Rank\t");
    for (String method : methods) {
      sb.append(method).append("\t");
      sb2.append(method).append("\t");
      performanceByExpt.put(method, new int[9]);
      performanceByTF.put(method, new float[9]);
    }
    sb.append("\n");
    sb2.append("\n");
    for (String expt : expts) {
      String tf = expt2tf.get(expt);
      for (String method : methods) {
        String pair = expt + "." + method;
        int rank = performances.containsKey(pair) ? performances.get(pair) : 99;
        for (int r = 0; r < 9; r++) {
          if (rank <= r) {
            performanceByExpt.get(method)[r]++;
            performanceByTF.get(method)[r] += 1.0 / tf2count.get(tf);
          }
        }
      }
    }

    for (int r = 0; r < 9; r++) {
      sb.append("Top" + r + "\t");
      sb2.append("Top" + r + "\t");
      for (String method : methods) {
        int[] scores = performanceByExpt.get(method);
        float[] scores_tf = performanceByTF.get(method);
        sb.append(scores[r]).append("\t");
        sb2.append(String.format("%.2f\t", scores_tf[r]));
      }
      sb.append("\n");
      sb2.append("\n");
    }

    CommonUtils.writeFile("method_expt_scores.txt", sb.toString());
    CommonUtils.writeFile("method_tf_scores.txt", sb2.toString());
  }