@Test public void testEncodeDecode() throws Exception { String testFile = TestUtils.DATA_DIR + "sam/NA12878.muc1.test.sam"; SAMReader reader = new SAMReader(testFile); Iterator<PicardAlignment> inputAlignmentIterator = reader.iterator(); ArrayList<PicardAlignment> inputAlignmentList = new ArrayList<PicardAlignment>(); while (inputAlignmentIterator.hasNext()) { PicardAlignment al = inputAlignmentIterator.next(); inputAlignmentList.add(al); } ByteArrayOutputStream bos = new ByteArrayOutputStream(); FeatureEncoder<PicardAlignment> alignmentEncoder = new SamAlignmentEncoder(); alignmentEncoder.encodeAll(bos, reader.iterator()); ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); FeatureDecoder<PicardAlignment> alignmentDecoder = new AlignmentDecoder(); Iterator<PicardAlignment> decodedAlignments = alignmentDecoder.decodeAll(bis, false); int ind = 0; while (decodedAlignments.hasNext()) { Alignment act = decodedAlignments.next(); Alignment exp = inputAlignmentList.get(ind++); TestUtils.assertFeaturesEqual(exp, act); assertEquals(exp.getCigarString(), act.getCigarString()); } assertEquals("Different number of alignments read in as out", inputAlignmentList.size(), ind); }
/** * Convert a BAM file containing paried-end tags to the ascii "pair" format used for HiC. * * @param inputBam * @param outputFile * @throws IOException */ public static void filterBam(String inputBam, String outputFile, List<Chromosome> chromosomes) throws IOException { CloseableIterator<Alignment> iter = null; AlignmentReader reader = null; PrintWriter pw = null; HashSet allChroms = new HashSet(chromosomes); try { pw = new PrintWriter(new FileWriter(outputFile)); reader = AlignmentReaderFactory.getReader(inputBam, false); iter = reader.iterator(); while (iter.hasNext()) { Alignment alignment = iter.next(); ReadMate mate = alignment.getMate(); // Filter unpaired and "normal" pairs. Only interested in abnormals if (alignment.isPaired() && alignment.isMapped() && alignment.getMappingQuality() > 10 && mate != null && mate.isMapped() && allChroms.contains(alignment.getChr()) && allChroms.contains(mate.getChr()) && (!alignment.getChr().equals(mate.getChr()) || alignment.getInferredInsertSize() > 1000)) { // Each pair is represented twice in the file, keep the record with the "leftmost" // coordinate if (alignment.getStart() < mate.getStart()) { String strand = alignment.isNegativeStrand() ? "-" : "+"; String mateStrand = mate.isNegativeStrand() ? "-" : "+"; pw.println( alignment.getReadName() + "\t" + alignment.getChr() + "\t" + alignment.getStart() + "\t" + strand + "\t.\t" + mate.getChr() + "\t" + mate.getStart() + "\t" + mateStrand); } } } } finally { pw.close(); iter.close(); reader.close(); } }