示例#1
0
  /**
   * Convert a BAM file containing paried-end tags to the ascii "pair" format used for HiC.
   *
   * @param inputBam
   * @param outputFile
   * @throws IOException
   */
  public static void filterBam(String inputBam, String outputFile, List<Chromosome> chromosomes)
      throws IOException {

    CloseableIterator<Alignment> iter = null;
    AlignmentReader reader = null;
    PrintWriter pw = null;

    HashSet allChroms = new HashSet(chromosomes);

    try {
      pw = new PrintWriter(new FileWriter(outputFile));
      reader = AlignmentReaderFactory.getReader(inputBam, false);
      iter = reader.iterator();
      while (iter.hasNext()) {

        Alignment alignment = iter.next();
        ReadMate mate = alignment.getMate();

        // Filter unpaired and "normal" pairs.  Only interested in abnormals
        if (alignment.isPaired()
            && alignment.isMapped()
            && alignment.getMappingQuality() > 10
            && mate != null
            && mate.isMapped()
            && allChroms.contains(alignment.getChr())
            && allChroms.contains(mate.getChr())
            && (!alignment.getChr().equals(mate.getChr())
                || alignment.getInferredInsertSize() > 1000)) {

          // Each pair is represented twice in the file,  keep the record with the "leftmost"
          // coordinate
          if (alignment.getStart() < mate.getStart()) {
            String strand = alignment.isNegativeStrand() ? "-" : "+";
            String mateStrand = mate.isNegativeStrand() ? "-" : "+";
            pw.println(
                alignment.getReadName()
                    + "\t"
                    + alignment.getChr()
                    + "\t"
                    + alignment.getStart()
                    + "\t"
                    + strand
                    + "\t.\t"
                    + mate.getChr()
                    + "\t"
                    + mate.getStart()
                    + "\t"
                    + mateStrand);
          }
        }
      }
    } finally {
      pw.close();
      iter.close();
      reader.close();
    }
  }