/** To compare motif found by each methods with TF.jtux using STAMP */ private static void compareTFStampResults(String[] args) { final int TOP_MOTIF_RANK = 9; double stamp_p_value = Double.parseDouble(args[3]); // load the mapping file between tf and known motif db entries // String[] lines = readSmallTextFile(args[0]); // HashMap<String, HashSet<String>> tf2db = new HashMap<String, HashSet<String>>(); // for (int i=0;i<lines.length;i++){ // String[] fs = lines[i].split("\t"); // if (fs.length<=1) // continue; // String tf = fs[0]; // HashSet<String> entries = new HashSet<String>(); // for (int j=1;j<fs.length;j++){ // String entry = fs[j].trim(); // if (entry.length()>=1) // entries.add(entry); // } // tf2db.put(tf, entries); // } // load encode expts, and motif methods String[] expts = readSmallTextFile(args[0]); // read expt/tf pairs HashMap<String, String> expt2tf = new HashMap<String, String>(); for (int i = 0; i < expts.length; i++) { String[] fs = expts[i].split("\t"); expt2tf.put(fs[0].trim(), fs[1].trim()); expts[i] = fs[0].trim(); // replace with expt only } String[] methods = readSmallTextFile(args[1]); // load STAMP file for each expt_method pair HashMap<String, Integer> performances = new HashMap<String, Integer>(); File stamp_result_dir = new File(args[2]); for (String expt : expts) { // String tf = expt2tf.get(expt); // if (tf2db.containsKey(tf)){ each_method: for (String method : methods) { String expt_method = expt + "." + method; File f = new File(stamp_result_dir, expt_method + "_match_pairs.txt"); if (!f.exists()) continue; String[] sls = readSmallTextFile(f.getAbsolutePath()); // stampe lines for (int i = 0; i < sls.length - 1; i += 2) { if (sls[i].startsWith(">")) { int rank = i / 2; // motif rank in this expt // only check the top match to TF.jtux String[] sl_fs = sls[i + 1].split("\t"); // String entry = sl_fs[0].trim(); double p = Double.parseDouble(sl_fs[1]); if (p < stamp_p_value) { performances.put(expt_method, rank); continue each_method; } } } } // } // else { // System.err.println(tf + " is not in the TF_2_DB list: " + args[0]); // } } // print out results StringBuilder sb = new StringBuilder("expt\ttf\t"); for (String method : methods) { sb.append(method).append("\t"); } sb.append("\n"); HashMap<String, Integer> tf2count = new HashMap<String, Integer>(); for (String expt : expts) { String tf = expt2tf.get(expt); // if (!tf2db.containsKey(tf)) // only count annotated expts/tfs with public known // motif // continue; if (tf2count.containsKey(tf)) tf2count.put(tf, tf2count.get(tf) + 1); else tf2count.put(tf, 1); sb.append(expt).append("\t").append(tf).append("\t"); for (String method : methods) { String pair = expt + "." + method; int rank = performances.containsKey(pair) ? performances.get(pair) : 99; sb.append(rank).append("\t"); } sb.append("\n"); } CommonUtils.writeFile("method_rank_matrix.txt", sb.toString()); // print out result for each top rank HashMap<String, int[]> performanceByExpt = new HashMap<String, int[]>(); HashMap<String, float[]> performanceByTF = new HashMap<String, float[]>(); sb = new StringBuilder("Rank\t"); StringBuilder sb2 = new StringBuilder("Rank\t"); for (String method : methods) { sb.append(method).append("\t"); sb2.append(method).append("\t"); performanceByExpt.put(method, new int[9]); performanceByTF.put(method, new float[9]); } sb.append("\n"); sb2.append("\n"); for (String expt : expts) { String tf = expt2tf.get(expt); for (String method : methods) { String pair = expt + "." + method; int rank = performances.containsKey(pair) ? performances.get(pair) : 99; for (int r = 0; r < TOP_MOTIF_RANK; r++) { if (rank <= r) { performanceByExpt.get(method)[r]++; performanceByTF.get(method)[r] += 1.0 / tf2count.get(tf); } } } } for (int r = 0; r < TOP_MOTIF_RANK; r++) { sb.append("Top" + r + "\t"); sb2.append("Top" + r + "\t"); for (String method : methods) { int[] scores = performanceByExpt.get(method); float[] scores_tf = performanceByTF.get(method); sb.append(scores[r]).append("\t"); sb2.append(String.format("%.2f\t", scores_tf[r])); } sb.append("\n"); sb2.append("\n"); } CommonUtils.writeFile("method_expt_scores.txt", sb.toString()); CommonUtils.writeFile("method_tf_scores.txt", sb2.toString()); }
/** To compare motif found by each methods with known motif in database using STAMP */ private static void compareStampResults(String[] args) { final int STAMP_UNIT_LINE_COUNT = 11; int stamp_top_count = Integer.parseInt(args[5]); double stamp_p_value = Double.parseDouble(args[6]); // load the mapping file between tf and known motif db entries String[] lines = readSmallTextFile(args[0]); HashMap<String, HashSet<String>> tf2db = new HashMap<String, HashSet<String>>(); for (int i = 1; i < lines.length; i++) { // skip line 0, header String[] fs = lines[i].split("\t"); if (fs.length <= 1) continue; String tf = fs[0]; HashSet<String> entries = new HashSet<String>(); for (int j = 1; j < fs.length; j++) { String entry = fs[j].trim(); if (entry.length() >= 1) entries.add(entry); } tf2db.put(tf, entries); } // load motif clusters lines = readSmallTextFile(args[1]); ArrayList<HashSet<String>> clusters = new ArrayList<HashSet<String>>(); for (int i = 0; i < lines.length; i++) { String[] fs = lines[i].split("Cluster_Members:\t"); if (fs.length <= 1) continue; String[] ms = fs[1].split("\t"); HashSet<String> cluster = new HashSet<String>(); for (String m : ms) { m = m.trim(); if (m.length() >= 1) cluster.add(m); } clusters.add(cluster); } // expand tf to db entry mapping for (String tf : tf2db.keySet()) { HashSet<String> expandedSet = new HashSet<String>(); HashSet<String> annotations = tf2db.get(tf); for (String anno : annotations) { for (HashSet<String> cluster : clusters) { if (cluster.contains(anno)) expandedSet.addAll(cluster); } } tf2db.put(tf, expandedSet); } // load encode expts, and motif methods String[] expts = readSmallTextFile(args[2]); HashMap<String, String> expt2tf = new HashMap<String, String>(); for (int i = 0; i < expts.length; i++) { String[] fs = expts[i].split("\t"); expt2tf.put(fs[0].trim(), fs[1].trim()); expts[i] = fs[0].trim(); } String[] methods = readSmallTextFile(args[3]); // load STAMP file for each expt_method pair HashMap<String, Integer> performances = new HashMap<String, Integer>(); File dir = new File(args[4]); for (String expt : expts) { String tf = expt2tf.get(expt); if (tf2db.containsKey(tf)) { each_method: for (String method : methods) { String pair = expt + "." + method; File f = new File(dir, pair + "_match_pairs.txt"); if (!f.exists()) continue; String[] sls = readSmallTextFile(f.getAbsolutePath()); // stampe lines for (int i = 0; i < sls.length; i += STAMP_UNIT_LINE_COUNT) { if (sls[i].startsWith(">")) { int rank = i / STAMP_UNIT_LINE_COUNT; // motif rank in this expt for (int j = 1; j <= stamp_top_count; j++) { // each top db entry in STAMP match results String[] sl_fs = sls[i + j].split("\t"); String entry = sl_fs[0].trim(); if (tf2db.get(tf).contains(entry)) { double p = Double.parseDouble(sl_fs[1]); if (p < stamp_p_value) { performances.put(pair, rank); continue each_method; } } } } } } } } // print out results StringBuilder sb = new StringBuilder("expt\ttf\t"); for (String method : methods) { sb.append(method).append("\t"); } sb.append("\n"); HashMap<String, Integer> tf2count = new HashMap<String, Integer>(); for (String expt : expts) { String tf = expt2tf.get(expt); if (!tf2db.containsKey(tf)) // only count annotated expts/tfs with public known motif continue; if (tf2count.containsKey(tf)) tf2count.put(tf, tf2count.get(tf) + 1); else tf2count.put(tf, 1); sb.append(expt).append("\t").append(tf).append("\t"); for (String method : methods) { String pair = expt + "." + method; int rank = performances.containsKey(pair) ? performances.get(pair) : 99; sb.append(rank).append("\t"); } sb.append("\n"); } CommonUtils.writeFile("method_rank_matrix.txt", sb.toString()); // print out result for each top rank HashMap<String, int[]> performanceByExpt = new HashMap<String, int[]>(); HashMap<String, float[]> performanceByTF = new HashMap<String, float[]>(); sb = new StringBuilder("Rank\t"); StringBuilder sb2 = new StringBuilder("Rank\t"); for (String method : methods) { sb.append(method).append("\t"); sb2.append(method).append("\t"); performanceByExpt.put(method, new int[9]); performanceByTF.put(method, new float[9]); } sb.append("\n"); sb2.append("\n"); for (String expt : expts) { String tf = expt2tf.get(expt); for (String method : methods) { String pair = expt + "." + method; int rank = performances.containsKey(pair) ? performances.get(pair) : 99; for (int r = 0; r < 9; r++) { if (rank <= r) { performanceByExpt.get(method)[r]++; performanceByTF.get(method)[r] += 1.0 / tf2count.get(tf); } } } } for (int r = 0; r < 9; r++) { sb.append("Top" + r + "\t"); sb2.append("Top" + r + "\t"); for (String method : methods) { int[] scores = performanceByExpt.get(method); float[] scores_tf = performanceByTF.get(method); sb.append(scores[r]).append("\t"); sb2.append(String.format("%.2f\t", scores_tf[r])); } sb.append("\n"); sb2.append("\n"); } CommonUtils.writeFile("method_expt_scores.txt", sb.toString()); CommonUtils.writeFile("method_tf_scores.txt", sb2.toString()); }