@Test(description = "Write SAM records with null SAMFileHeader") public void samNullHeaderRoundTrip() throws Exception { final File input = new File(TEST_DATA_DIR, "roundtrip.sam"); final SamReader reader = SamReaderFactory.makeDefault().open(input); final File outputFile = File.createTempFile("nullheader-out", ".sam"); outputFile.delete(); outputFile.deleteOnExit(); FileOutputStream os = new FileOutputStream(outputFile); final SAMFileWriterFactory factory = new SAMFileWriterFactory(); final SAMFileWriter writer = factory.makeSAMWriter(reader.getFileHeader(), false, os); for (SAMRecord rec : reader) { rec.setHeader(null); writer.addAlignment(rec); } writer.close(); os.close(); InputStream is = new FileInputStream(input); String originalsam = IOUtil.readFully(is); is.close(); is = new FileInputStream(outputFile); String writtensam = IOUtil.readFully(is); is.close(); Assert.assertEquals(writtensam, originalsam); }
@Test(dataProvider = "LIBSTest") public void testLIBS(LIBSTest params) { final int locus = 44367788; SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength); read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength)); read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength)); read.setCigarString(params.cigar); // create the iterator by state with the fake reads and fake records li = makeLTBS(Arrays.asList(read), createTestReadProperties()); final LIBS_position tester = new LIBS_position(read); while (li.hasNext()) { AlignmentContext alignmentContext = li.next(); ReadBackedPileup p = alignmentContext.getBasePileup(); Assert.assertTrue(p.getNumberOfElements() == 1); PileupElement pe = p.iterator().next(); tester.stepForwardOnGenome(); Assert.assertEquals(pe.isBeforeDeletedBase(), tester.isBeforeDeletedBase); Assert.assertEquals(pe.isBeforeDeletionStart(), tester.isBeforeDeletionStart); Assert.assertEquals(pe.isAfterDeletedBase(), tester.isAfterDeletedBase); Assert.assertEquals(pe.isAfterDeletionEnd(), tester.isAfterDeletionEnd); Assert.assertEquals(pe.isBeforeInsertion(), tester.isBeforeInsertion); Assert.assertEquals(pe.isAfterInsertion(), tester.isAfterInsertion); Assert.assertEquals(pe.isNextToSoftClip(), tester.isNextToSoftClip); Assert.assertEquals(pe.getOffset(), tester.getCurrentReadOffset()); } }
/** * Compare two records based on their duplicate scores. If the scores are equal, we break ties * based on mapping quality (added to the mate's mapping quality if paired and mapped), then * library/read name. * * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to to * compute the mate's score will return the score computed on both ends. * * <p>We allow different scoring strategies. We return <0 if rec1 has a better strategy than rec2. */ public static int compare( final SAMRecord rec1, final SAMRecord rec2, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) { int cmp; // always prefer paired over non-paired if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag()) return rec1.getReadPairedFlag() ? 1 : -1; cmp = computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar) - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar); /** * Finally, use library ID and read name This is important because we cannot control the order * in which reads appear for reads that are comparable up to now (i.e. cmp == 0). We want to * deterministically choose them, and so we need this. */ if (0 == cmp) cmp = SAMUtils.getCanonicalRecordName(rec1).compareTo(SAMUtils.getCanonicalRecordName(rec2)); return cmp; }
@Test public void testWholeIndelReadInIsolation() { final int firstLocus = 44367789; // create a test version of the Reads object ReadProperties readAttributes = createTestReadProperties(); SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76); indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76)); indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76)); indelOnlyRead.setCigarString("76I"); List<SAMRecord> reads = Arrays.asList(indelOnlyRead); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads, readAttributes); // Traditionally, reads that end with indels bleed into the pileup at the following locus. // Verify that the next pileup contains this read // and considers it to be an indel-containing read. Assert.assertTrue( li.hasNext(), "Should have found a whole-indel read in the normal base pileup without extended events enabled"); AlignmentContext alignmentContext = li.next(); Assert.assertEquals( alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location."); ReadBackedPileup basePileup = alignmentContext.getBasePileup(); Assert.assertEquals(basePileup.getReads().size(), 1, "Pileup is of incorrect size"); Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect"); }
public void addAlignment(SAMRecord samRecord) { final SAMReaderID id = toolkit.getReaderIDForRead(samRecord); String rg = samRecord.getStringAttribute("RG"); if (rg != null) { String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg); samRecord.setAttribute("RG", rg_orig); } addAlignment(samRecord, id); }
/** Return the sort key used for the given sort order. Useful in error messages. */ public String getSortKey(final SAMRecord rec) { switch (sortOrder) { case coordinate: return rec.getReferenceName() + ":" + rec.getAlignmentStart(); case queryname: return rec.getReadName(); case unsorted: default: return null; } }
private static void clearAttributes( SAMRecord rec, List<String> optFieldTags, List<Object> optFieldValues) { ListIterator<String> iter = saveTags.listIterator(); while (iter.hasNext()) { String tag = iter.next(); Object attr = rec.getAttribute(tag); if (null != attr) { optFieldTags.add(tag); optFieldValues.add(attr); } } rec.clearAttributes(); }
/** * Record any index information for a given BAM record * * @param rec The BAM record. Requires rec.getFileSource() is non-null. */ public void processAlignment(final SAMRecord rec) { // metadata indexStats.recordMetaData(rec); if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { return; // do nothing for records without coordinates, but count them } // various checks final int reference = rec.getReferenceIndex(); if (reference != currentReference) { throw new SAMException( "Unexpected reference " + reference + " when constructing index for " + currentReference + " for record " + rec); } binningIndexBuilder.processFeature( new BinningIndexBuilder.FeatureToBeIndexed() { @Override public int getStart() { return rec.getAlignmentStart(); } @Override public int getEnd() { return rec.getAlignmentEnd(); } @Override public Integer getIndexingBin() { final Integer binNumber = rec.getIndexingBin(); return (binNumber == null ? rec.computeIndexingBin() : binNumber); } @Override public Chunk getChunk() { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException( "No source (virtual file offsets); needed for indexing on BAM Record " + rec); } return ((BAMFileSpan) source.getFilePointer()).getSingleChunk(); } }); }
private int compareCoordinates(final SAMRecord record1, final SAMRecord record2) { final int seqIndex1 = record1.getReferenceIndex(); final int seqIndex2 = record2.getReferenceIndex(); if (seqIndex1 == -1) { return ((seqIndex2 == -1) ? 0 : -1); } else if (seqIndex2 == -1) { return 1; } int result = seqIndex1 - seqIndex2; if (result != 0) { return result; } result = record1.getAlignmentStart() - record2.getAlignmentStart(); return result; }
public void acceptRecord(final SAMRecordAndReference args) { final SAMRecord rec = args.getSamRecord(); final ReferenceSequence ref = args.getReferenceSequence(); if (rec.getReadPairedFlag()) { if (rec.getFirstOfPairFlag()) { firstOfPairCollector.addRecord(rec, ref); } else { secondOfPairCollector.addRecord(rec, ref); } pairCollector.addRecord(rec, ref); } else { unpairedCollector.addRecord(rec, ref); } }
private void checkPassesFilter( final boolean expected, final SAMRecord record, final String sequence, final int startPos, final int endPos, final boolean contained) { final boolean passes = passesFilter(record, sequence, startPos, endPos, contained); if (passes != expected) { System.out.println( "Error: Record erroneously " + (passes ? "passed" : "failed") + " filter."); System.out.println(" Record: " + record.getSAMString()); System.out.println( " Filter: " + sequence + ":" + startPos + "-" + endPos + " (" + (contained ? "contained" : "overlapping") + ")"); assertEquals(passes, expected); } }
/** Calculates a score for the read which is the sum of scores over Q15. */ private static int getSumOfBaseQualities(final SAMRecord rec) { int score = 0; for (final byte b : rec.getBaseQualities()) { if (b >= 15) score += b; } return score; }
/** Tests that we can successfully merge two files with */ @Test public void testMerging() { File INPUT[] = { new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome1to10.bam"), new File(TEST_DATA_DIR, "SamFileHeaderMergerTest/Chromosome5to9.bam") }; final List<SAMFileReader> readers = new ArrayList<SAMFileReader>(); final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(); for (final File inFile : INPUT) { IOUtil.assertFileIsReadable(inFile); final SAMFileReader in = new SAMFileReader(inFile); // We are now checking for zero-length reads, so suppress complaint about that. in.setValidationStringency(ValidationStringency.SILENT); readers.add(in); headers.add(in.getFileHeader()); } final MergingSamRecordIterator iterator; final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.unsorted, headers, true); iterator = new MergingSamRecordIterator(headerMerger, readers, false); headerMerger.getMergedHeader(); // count the total reads, and record read counts for each sequence Map<Integer, Integer> seqCounts = new HashMap<Integer, Integer>(); int totalCount = 0; while (iterator.hasNext()) { SAMRecord r = iterator.next(); if (seqCounts.containsKey(r.getReferenceIndex())) { seqCounts.put(r.getReferenceIndex(), seqCounts.get(r.getReferenceIndex()) + 1); } else { seqCounts.put(r.getReferenceIndex(), 1); } ++totalCount; } assertEquals(totalCount, 1500); for (Integer i : seqCounts.keySet()) { if (i < 4 || i > 8) { // seqeunce 5 - 9 should have 200 reads (indices 4 - 8) assertEquals(seqCounts.get(i).intValue(), 100); } else { // the others should have 100 assertEquals(seqCounts.get(i).intValue(), 200); } } }
/** * Finds the adaptor boundary around the read and returns the first base inside the adaptor that * is closest to the read boundary. If the read is in the positive strand, this is the first base * after the end of the fragment (Picard calls it 'insert'), if the read is in the negative * strand, this is the first base before the beginning of the fragment. * * <p>There are two cases we need to treat here: * * <p>1) Our read is in the reverse strand : * * <p><----------------------| * |---------------------> * * <p>in these cases, the adaptor boundary is at the mate start (minus one) * * <p>2) Our read is in the forward strand : * * <p>|----------------------> * <----------------------| * * <p>in these cases the adaptor boundary is at the start of the read plus the inferred insert * size (plus one) * * @param read the read being tested for the adaptor boundary * @return the reference coordinate for the adaptor boundary (effectively the first base IN the * adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another * contig. */ public static Integer getAdaptorBoundary(final SAMRecord read) { final int MAXIMUM_ADAPTOR_LENGTH = 8; final int insertSize = Math.abs( read .getInferredInsertSize()); // the inferred insert size can be negative if the mate // is mapped before the read (so we take the absolute // value) if (insertSize == 0 || read .getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or // unmapped pairs return null; Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first // base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) else adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header) if ((adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH)) adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what // we belive is the maximum size of an adaptor return adaptorBoundary; }
private static void resetAttributes( SAMRecord rec, List<String> optFieldTags, List<Object> optFieldValues) { ListIterator<String> iterTags = optFieldTags.listIterator(); ListIterator<Object> iterValues = optFieldValues.listIterator(); while (iterTags.hasNext()) { rec.setAttribute(iterTags.next(), iterValues.next()); } }
public void addRecord(final SAMRecord record, final ReferenceSequence ref) { if (record.isSecondaryOrSupplementary()) { // only want 1 count per read so skip non primary alignments return; } collectReadData(record, ref); collectQualityData(record, ref); }
/** * checks if the read has a platform tag in the readgroup equal to 'name'. Assumes that 'name' is * upper-cased. * * @param read the read to test * @param name the upper-cased platform name to test * @return whether or not name == PL tag in the read group of read */ public static boolean isPlatformRead(SAMRecord read, String name) { SAMReadGroupRecord readGroup = read.getReadGroup(); if (readGroup != null) { Object readPlatformAttr = readGroup.getAttribute("PL"); if (readPlatformAttr != null) return readPlatformAttr.toString().toUpperCase().contains(name); } return false; }
protected int doWork() { IoUtil.assertFileIsReadable(INPUT); IoUtil.assertFileIsWritable(OUTPUT); final SAMFileReader in = new SAMFileReader(INPUT); // create the read group we'll be using final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID); rg.setLibrary(RGLB); rg.setPlatform(RGPL); rg.setSample(RGSM); rg.setPlatformUnit(RGPU); if (RGCN != null) rg.setSequencingCenter(RGCN); if (RGDS != null) rg.setDescription(RGDS); if (RGDT != null) rg.setRunDate(RGDT); log.info( String.format( "Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample())); // create the new header and output file final SAMFileHeader inHeader = in.getFileHeader(); final SAMFileHeader outHeader = inHeader.clone(); outHeader.setReadGroups(Arrays.asList(rg)); if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER); final SAMFileWriter outWriter = new SAMFileWriterFactory() .makeSAMOrBAMWriter( outHeader, outHeader.getSortOrder() == inHeader.getSortOrder(), OUTPUT); final ProgressLogger progress = new ProgressLogger(log); for (final SAMRecord read : in) { read.setAttribute(SAMTag.RG.name(), RGID); outWriter.addAlignment(read); progress.record(read); } // cleanup in.close(); outWriter.close(); return 0; }
/** * HACK: This is used to make a copy of a read. Really, SAMRecord should provide a copy * constructor or a factory method. */ public static SAMRecord cloneSAMRecord(final SAMRecord originalRead) { if (originalRead == null) { return null; } try { return (SAMRecord) originalRead.clone(); } catch (final CloneNotSupportedException e) { throw new IllegalStateException(e); } }
/** * Returns the duplicate score computed from the given fragment. value should be capped by * Short.MAX_VALUE/2 since the score from two reads will be added and an overflow will be * * <p>If true is given to assumeMateCigar, then any score that can use the mate cigar to compute * the mate's score will return the score computed on both ends. */ public static short computeDuplicateScore( final SAMRecord record, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) { Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore); if (storedScore == null) { short score = 0; switch (scoringStrategy) { case SUM_OF_BASE_QUALITIES: // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2 // and risk overflow. score += (short) Math.min(getSumOfBaseQualities(record), Short.MAX_VALUE / 2); break; case TOTAL_MAPPED_REFERENCE_LENGTH: if (!record.getReadUnmappedFlag()) { // no need to remember the score since this scoring mechanism is symmetric score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2); } if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { score += (short) Math.min( SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2); } break; // The RANDOM score gives the same score to both reads so that they get filtered together. // it's not critical do use the readName since the scores from both ends get added, but it // seem // to be clearer this way. case RANDOM: // start with a random number between Short.MIN_VALUE/4 and Short.MAX_VALUE/4 score += (short) (hasher.hashUnencodedChars(record.getReadName()) & 0b11_1111_1111_1111); // subtract Short.MIN_VALUE/4 from it to end up with a number between // 0 and Short.MAX_VALUE/2. This number can be then discounted in case the read is // not passing filters. We need to stay far from overflow so that when we add the two // scores from the two read mates we do not overflow since that could cause us to chose a // failing read-pair instead of a passing one. score -= Short.MIN_VALUE / 4; } // make sure that filter-failing records are heavily discounted. (the discount can happen // twice, once // for each mate, so need to make sure we do not subtract more than Short.MIN_VALUE overall.) score += record.getReadFailsVendorQualityCheckFlag() ? (short) (Short.MIN_VALUE / 2) : 0; storedScore = score; record.setTransientAttribute(Attr.DuplicateScore, storedScore); } return storedScore; }
private int countAlignmentsInWindow( int reference, int window, SAMFileReader reader, int expectedCount) { final int SIXTEEN_K = 1 << 14; // 1 << LinearIndex.BAM_LIDX_SHIFT final int start = window >> 14; // window * SIXTEEN_K; final int stop = ((window + 1) >> 14) - 1; // (window + 1 * SIXTEEN_K) - 1; final String chr = reader.getFileHeader().getSequence(reference).getSequenceName(); // get records for the entire linear index window SAMRecordIterator iter = reader.queryOverlapping(chr, start, stop); SAMRecord rec; int count = 0; while (iter.hasNext()) { rec = iter.next(); count++; if (expectedCount == -1) System.err.println(rec.getReadName()); } iter.close(); return count; }
/** * Record any index information for a given BAM record. If this alignment starts a new reference, * write out the old reference. Requires a non-null value for rec.getFileSource(). * * @param rec The BAM record */ public void processAlignment(final SAMRecord rec) { try { final int reference = rec.getReferenceIndex(); if (reference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && reference != currentReference) { // process any completed references advanceToReference(reference); } indexBuilder.processAlignment(rec); } catch (final Exception e) { throw new SAMException("Exception creating BAM index for record " + rec, e); } }
private boolean passesFilter( final SAMRecord record, final String sequence, final int startPos, final int endPos, final boolean contained) { if (record == null) { return false; } if (!safeEquals(record.getReferenceName(), sequence)) { return false; } final int alignmentStart = record.getAlignmentStart(); int alignmentEnd = record.getAlignmentEnd(); if (alignmentStart <= 0) { assertTrue(record.getReadUnmappedFlag()); return false; } if (alignmentEnd <= 0) { // For indexing-only records, treat as single base alignment. assertTrue(record.getReadUnmappedFlag()); alignmentEnd = alignmentStart; } if (contained) { if (startPos != 0 && alignmentStart < startPos) { return false; } if (endPos != 0 && alignmentEnd > endPos) { return false; } } else { if (startPos != 0 && alignmentEnd < startPos) { return false; } if (endPos != 0 && alignmentStart > endPos) { return false; } } return true; }
/** * Test to make sure that reads supporting only an indel (example cigar string: 76I) are * represented properly */ @Test public void testWholeIndelReadRepresentedTest() { final int firstLocus = 44367788, secondLocus = firstLocus + 1; SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, secondLocus, 1); read1.setReadBases(Utils.dupBytes((byte) 'A', 1)); read1.setBaseQualities(Utils.dupBytes((byte) '@', 1)); read1.setCigarString("1I"); List<SAMRecord> reads = Arrays.asList(read1); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads, createTestReadProperties()); while (li.hasNext()) { AlignmentContext alignmentContext = li.next(); ReadBackedPileup p = alignmentContext.getBasePileup(); Assert.assertTrue(p.getNumberOfElements() == 1); PileupElement pe = p.iterator().next(); Assert.assertTrue(pe.isBeforeInsertion()); Assert.assertFalse(pe.isAfterInsertion()); Assert.assertEquals(pe.getEventBases(), "A"); } SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "read2", 0, secondLocus, 10); read2.setReadBases(Utils.dupBytes((byte) 'A', 10)); read2.setBaseQualities(Utils.dupBytes((byte) '@', 10)); read2.setCigarString("10I"); reads = Arrays.asList(read2); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads, createTestReadProperties()); while (li.hasNext()) { AlignmentContext alignmentContext = li.next(); ReadBackedPileup p = alignmentContext.getBasePileup(); Assert.assertTrue(p.getNumberOfElements() == 1); PileupElement pe = p.iterator().next(); Assert.assertTrue(pe.isBeforeInsertion()); Assert.assertFalse(pe.isAfterInsertion()); Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA"); } }
private static void removeMateInfo(SAMRecord rec) { if (rec.getReadPairedFlag()) { // Remove all information of its mate // flag rec.setProperPairFlag(false); // not paired any more rec.setMateUnmappedFlag(false); rec.setMateNegativeStrandFlag(false); // entries rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX); rec.setMateAlignmentStart(0); rec.setInferredInsertSize(0); // TODO: remove tags and values that are mate pair inclined. } }
public boolean filterOut(final SAMRecord read) { int alignedLength = 0; int softClipBlocks = 0; int minSoftClipBlocks = doNotRequireSoftclipsOnBothEnds ? 1 : 2; CigarOperator lastOperator = null; for (final CigarElement element : read.getCigar().getCigarElements()) { if (element.getOperator() == CigarOperator.S) { // Treat consecutive S blocks as a single one if (lastOperator != CigarOperator.S) { softClipBlocks += 1; } } else if (element .getOperator() .consumesReadBases()) { // M, I, X, and EQ (S was already accounted for above) alignedLength += element.getLength(); } lastOperator = element.getOperator(); } return (alignedLength < tooShort && softClipBlocks >= minSoftClipBlocks); }
@Test public void testIndelsInRegularPileup() { final byte[] bases = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; final byte[] indelBases = new byte[] {'A', 'A', 'A', 'A', 'C', 'T', 'A', 'A', 'A', 'A', 'A', 'A'}; // create a test version of the Reads object ReadProperties readAttributes = createTestReadProperties(); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header, "before", 0, 1, 10); before.setReadBases(bases); before.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); before.setCigarString("10M"); SAMRecord during = ArtificialSAMUtils.createArtificialRead(header, "during", 0, 2, 10); during.setReadBases(indelBases); during.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); during.setCigarString("4M2I6M"); SAMRecord after = ArtificialSAMUtils.createArtificialRead(header, "after", 0, 3, 10); after.setReadBases(bases); after.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); after.setCigarString("10M"); List<SAMRecord> reads = Arrays.asList(before, during, after); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads, readAttributes); boolean foundIndel = false; while (li.hasNext()) { AlignmentContext context = li.next(); ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10); for (PileupElement p : pileup) { if (p.isBeforeInsertion()) { foundIndel = true; Assert.assertEquals(p.getEventLength(), 2, "Wrong event length"); Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect"); break; } } } Assert.assertTrue(foundIndel, "Indel in pileup not found"); }
@Test public void testXandEQOperators() { final byte[] bases1 = new byte[] {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; final byte[] bases2 = new byte[] {'A', 'A', 'A', 'C', 'A', 'A', 'A', 'A', 'A', 'C'}; // create a test version of the Reads object ReadProperties readAttributes = createTestReadProperties(); SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header, "r1", 0, 1, 10); r1.setReadBases(bases1); r1.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); r1.setCigarString("10M"); SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header, "r2", 0, 1, 10); r2.setReadBases(bases2); r2.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); r2.setCigarString("3=1X5=1X"); SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header, "r3", 0, 1, 10); r3.setReadBases(bases2); r3.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); r3.setCigarString("3=1X5M1X"); SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header, "r4", 0, 1, 10); r4.setReadBases(bases2); r4.setBaseQualities(new byte[] {20, 20, 20, 20, 20, 20, 20, 20, 20, 20}); r4.setCigarString("10M"); List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads, readAttributes); while (li.hasNext()) { AlignmentContext context = li.next(); ReadBackedPileup pileup = context.getBasePileup(); Assert.assertEquals(pileup.depthOfCoverage(), 4); } }
/** Steps forward on the genome. Returns false when done reading the read, true otherwise. */ public boolean stepForwardOnGenome() { if (currentOperatorIndex == numOperators) return false; CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex); if (currentPositionOnOperator >= curElement.getLength()) { if (++currentOperatorIndex == numOperators) return false; curElement = read.getCigar().getCigarElement(currentOperatorIndex); currentPositionOnOperator = 0; } switch (curElement.getOperator()) { case I: // insertion w.r.t. the reference if (!sawMop) break; case S: // soft clip currentReadOffset += curElement.getLength(); case H: // hard clip case P: // padding currentOperatorIndex++; return stepForwardOnGenome(); case D: // deletion w.r.t. the reference case N: // reference skip (looks and gets processed just like a "deletion", just different // logical meaning) currentPositionOnOperator++; break; case M: case EQ: case X: sawMop = true; currentReadOffset++; currentPositionOnOperator++; break; default: throw new IllegalStateException("No support for cigar op: " + curElement.getOperator()); } final boolean isFirstOp = currentOperatorIndex == 0; final boolean isLastOp = currentOperatorIndex == numOperators - 1; final boolean isFirstBaseOfOp = currentPositionOnOperator == 1; final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength(); isBeforeDeletionStart = isBeforeOp( read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp); isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D); isAfterDeletionEnd = isAfterOp( read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp); isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D); isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp) || (!sawMop && curElement.getOperator() == CigarOperator.I); isAfterInsertion = isAfterOp( read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp); isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp) || isAfterOp( read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp); return true; }
public LIBS_position(final SAMRecord read) { this.read = read; numOperators = read.getCigar().numCigarElements(); }