Пример #1
0
 LambdaConv24(SAM<String> p) {
   assertTrue(p.m("42"));
 }
Пример #2
0
  public static void main(String[] args) throws Exception {

    if (args.length != 2) {
      System.err.println("USAGE: java -jar SAMSort.jar <BWA PAIRED SAM file> <configFile>");
    } else {
      System.err.println("Running: java -jar SAMSort.jar " + args[0] + " " + args[1]);
      // args[1]: conf file
      Constants.loadConstants(args[1], false);

      StringBuffer headers = new StringBuffer();

      ArrayList<SAM> list = new ArrayList<SAM>();

      BufferedReader br = new BufferedReader(new FileReader(args[0]));
      int i = 0;
      int count = 0;
      String curline = null;
      String curline2 = null;

      SAM s1 = null;
      SAM s2 = null;

      boolean done = false;

      // int cordantCount = 0;
      // int discordantCount = 0;

      int totalPairCount = 0;
      int discordantPairCount = 0;
      int singleEndMappedPairCount = 0;
      int unmappedPairCount = 0;

      BufferedWriter unmapped_bw = null;
      BufferedWriter se_bw = null;

      while (!done) {
        curline = br.readLine();
        if (curline == null) break;
        else if (!curline.startsWith("@")) {
          s1 = new SAM(curline);
          // skip secondary. Load until primary
          while (s1.isSecondary()) {
            curline = br.readLine();
            if (curline == null) {
              done = true;
              break;
            }
            s1 = new SAM(curline);
          }

          if (done) break;

          curline2 = br.readLine();
          s2 = new SAM(curline2);
          // skip secondary. Load until primary
          while (s2.isSecondary()) {
            curline = br.readLine();
            if (curline == null) {
              done = true;
              break;
            }
            s2 = new SAM(curline);
          }

          if (done) break;

          totalPairCount++;

          // only collect discordant
          SAMPair tmppair = new SAMPair(s1, s2);
          if (tmppair.isDiscordant()) {
            discordantPairCount++;
            list.add(s1);
            list.add(s2);
            s1 = null;
            s2 = null;
            if (list.size() == 200000) {
              Collections.sort(list);
              BufferedWriter bw = new BufferedWriter(new FileWriter(args[0] + ".part_" + i));
              for (SAM s : list) {
                bw.write(s.getSamline() + "\n");
              }

              bw.close();
              bw = null;
              i++;
              list = new ArrayList<SAM>();
            }
          } else if (!tmppair.isBothUnmapped()) { // single-end mapped
            if (se_bw == null) {
              se_bw = new BufferedWriter(new FileWriter(args[0] + ".singleEndMapped"));
              se_bw.write(headers.toString());
            }
            singleEndMappedPairCount++;
            se_bw.write(s1.getSamline() + "\n");
            se_bw.write(s2.getSamline() + "\n");
            s1 = null;
            s2 = null;
          } else { // both unmapped
            if (unmapped_bw == null) {
              unmapped_bw = new BufferedWriter(new FileWriter(args[0] + ".unmapped"));
              unmapped_bw.write(headers.toString());
            }
            unmappedPairCount++;
            unmapped_bw.write(s1.getSamline() + "\n");
            unmapped_bw.write(s2.getSamline() + "\n");
            s1 = null;
            s2 = null;
          }

        } else {
          headers.append(curline + "\n");
        }
      }
      br.close();
      br = null;

      if (se_bw != null) se_bw.close();
      if (unmapped_bw != null) unmapped_bw.close();

      if (list.size() > 0) {
        Collections.sort(list);
        BufferedWriter bw = new BufferedWriter(new FileWriter(args[0] + ".part_" + i));
        // System.err.println(count + "\t" + set.size());
        for (SAM s : list) {
          bw.write(s.getSamline() + "\n");
        }
        bw.close();
        bw = null;
        i++;
        list = null;
      }

      Map<SAM, Queue<SAM>> map = new TreeMap<SAM, Queue<SAM>>();
      // Set<SAM> set = new TreeSet<SAM>();
      // System.err.println("openning " + i + " files");
      BufferedReader[] brArr = new BufferedReader[i];
      for (int j = 0; j < i; j++) {
        brArr[j] = new BufferedReader(new FileReader(args[0] + ".part_" + j));
        String tmpline = brArr[j].readLine();
        SAM tmp = new SAM(tmpline, j);
        if (!map.containsKey(tmp)) map.put(tmp, new LinkedList<SAM>());
        else {
          // System.err.println("here");
          map.get(tmp).add(tmp); // brArr[j].pushBack(tmpline);
        }
        // System.err.println(map.get(tmp).size());
      }
      // System.err.println(map.size());

      BufferedWriter bw = new BufferedWriter(new FileWriter(args[0] + ".discordant.midsorted"));
      bw.write(headers.toString());
      SAM s = null;

      // int counter = 0;
      Queue<SAM> q = null;
      while (!map.isEmpty()) {
        s = map.keySet().iterator().next();
        q = map.get(s);
        // map.remove(s);
        bw.write(s.getSamline());
        bw.write("\n");
        if (q.size() == 0) {
          map.remove(s);
          int tmpIndex = s.getIndex();
          String tmpline = brArr[tmpIndex].readLine();
          if (tmpline != null) {
            s = new SAM(tmpline, tmpIndex);
            if (!map.containsKey(s)) {
              // System.err.print("put b4: " + map.size());
              map.put(s, new LinkedList<SAM>());
              // System.err.println("\ta4: " + map.size());
            } else {
              // System.err.print("here2: ");
              map.get(s).add(s);
              // System.err.println(map.size());
            }
          } // else{
          // System.err.println("reader[" + tmpIndex + "] : NULL");
          // }
        } else {
          // System.err.print("B4 Q size: " + q.size() + " map size: " + map.size() );
          map.remove(s);
          map.put(q.remove(), q);
          // System.err.println("\tA4 Q size: " + q.size() + " map size: " + map.size() );
          int tmpIndex = s.getIndex();
          String tmpline = brArr[tmpIndex].readLine();
          if (tmpline != null) {
            s = new SAM(tmpline, tmpIndex);
            if (!map.containsKey(s)) {
              // System.err.print("put b4: " + map.size());
              map.put(s, new LinkedList<SAM>());
              // System.err.println("\ta4: " + map.size());
            } else {
              // System.err.print("here2: ");
              map.get(s).add(s);
              // System.err.println(map.size());
            }
          } // else{
          //  System.err.println("reader[" + tmpIndex + "] : NULL");
          // }

        }
        // counter++;

      }
      // System.err.println(counter);
      bw.close();

      for (int j = 0; j < brArr.length; j++) {
        brArr[j].close();
        new File(args[0] + ".part_" + j).delete();
      }

      //	int totalPairCount = 0;
      // int discordantPairCount = 0;
      System.out.println("------ DONE WITH REMOVAL OF CONCORDANT & UNMAPPED PAIRS ---------");
      System.out.println("------ Processed a total of\t" + totalPairCount + " read-pairs");
      System.out.println(
          "------ Retained a total of\t"
              + discordantPairCount
              + " read-pairs writtend out to "
              + args[0]
              + ".discordant.midsorted");
      System.out.println(
          "------ Single-end mapped :\t"
              + singleEndMappedPairCount
              + " read-pairs written out to "
              + args[0]
              + ".singleEndMapped");
      System.out.println(
          "------ Unmapped (both ends) : \t"
              + unmappedPairCount
              + " read-pairs written out to "
              + args[0]
              + ".unmapped");
    }
  }