Example #1
0
  @Test
  public void testEncodeDecode() throws Exception {
    String testFile = TestUtils.DATA_DIR + "sam/NA12878.muc1.test.sam";
    SAMReader reader = new SAMReader(testFile);

    Iterator<PicardAlignment> inputAlignmentIterator = reader.iterator();
    ArrayList<PicardAlignment> inputAlignmentList = new ArrayList<PicardAlignment>();

    while (inputAlignmentIterator.hasNext()) {
      PicardAlignment al = inputAlignmentIterator.next();
      inputAlignmentList.add(al);
    }
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    FeatureEncoder<PicardAlignment> alignmentEncoder = new SamAlignmentEncoder();
    alignmentEncoder.encodeAll(bos, reader.iterator());

    ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());

    FeatureDecoder<PicardAlignment> alignmentDecoder = new AlignmentDecoder();
    Iterator<PicardAlignment> decodedAlignments = alignmentDecoder.decodeAll(bis, false);

    int ind = 0;
    while (decodedAlignments.hasNext()) {

      Alignment act = decodedAlignments.next();
      Alignment exp = inputAlignmentList.get(ind++);
      TestUtils.assertFeaturesEqual(exp, act);
      assertEquals(exp.getCigarString(), act.getCigarString());
    }

    assertEquals("Different number of alignments read in as out", inputAlignmentList.size(), ind);
  }
Example #2
0
  /**
   * Convert a BAM file containing paried-end tags to the ascii "pair" format used for HiC.
   *
   * @param inputBam
   * @param outputFile
   * @throws IOException
   */
  public static void filterBam(String inputBam, String outputFile, List<Chromosome> chromosomes)
      throws IOException {

    CloseableIterator<Alignment> iter = null;
    AlignmentReader reader = null;
    PrintWriter pw = null;

    HashSet allChroms = new HashSet(chromosomes);

    try {
      pw = new PrintWriter(new FileWriter(outputFile));
      reader = AlignmentReaderFactory.getReader(inputBam, false);
      iter = reader.iterator();
      while (iter.hasNext()) {

        Alignment alignment = iter.next();
        ReadMate mate = alignment.getMate();

        // Filter unpaired and "normal" pairs.  Only interested in abnormals
        if (alignment.isPaired()
            && alignment.isMapped()
            && alignment.getMappingQuality() > 10
            && mate != null
            && mate.isMapped()
            && allChroms.contains(alignment.getChr())
            && allChroms.contains(mate.getChr())
            && (!alignment.getChr().equals(mate.getChr())
                || alignment.getInferredInsertSize() > 1000)) {

          // Each pair is represented twice in the file,  keep the record with the "leftmost"
          // coordinate
          if (alignment.getStart() < mate.getStart()) {
            String strand = alignment.isNegativeStrand() ? "-" : "+";
            String mateStrand = mate.isNegativeStrand() ? "-" : "+";
            pw.println(
                alignment.getReadName()
                    + "\t"
                    + alignment.getChr()
                    + "\t"
                    + alignment.getStart()
                    + "\t"
                    + strand
                    + "\t.\t"
                    + mate.getChr()
                    + "\t"
                    + mate.getStart()
                    + "\t"
                    + mateStrand);
          }
        }
      }
    } finally {
      pw.close();
      iter.close();
      reader.close();
    }
  }