private boolean isAssemblyTriggerCandidate(SAMRecord read, CompareToReference2 c2r) { boolean isCandidate = false; // Increment candidate count for indels if (read.getCigarString().contains("I") || read.getCigarString().contains("D")) { isCandidate = true; } // Increment candidate count for substantial high quality soft clipping // TODO: Check for chimera directly? if (read.getCigarString().contains("S")) { if (c2r.numHighQualityMismatches(read, MIN_CANDIDATE_BASE_QUALITY) > (readLength / 10)) { isCandidate = true; } } // Increment candidate count if read contains at least 3 high quality mismatches if (SAMRecordUtils.getIntAttribute(read, "NM") >= 3) { if (c2r.numHighQualityMismatches(read, MIN_CANDIDATE_BASE_QUALITY) > 3) { isCandidate = true; } } return isCandidate; }
/** Note: this is the only getKey function that handles unmapped reads specially! */ public static long getKey(final SAMRecord rec) { final int refIdx = rec.getReferenceIndex(); final int start = rec.getAlignmentStart(); if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) return getKey(refIdx, start); // Put unmapped reads at the end, but don't give them all the exact same // key so that they can be distributed to different reducers. // // A random number would probably be best, but to ensure that the same // record always gets the same key we use a fast hash instead. // // We avoid using hashCode(), because it's not guaranteed to have the // same value across different processes. int hash = 0; byte[] var; if ((var = rec.getVariableBinaryRepresentation()) != null) { // Undecoded BAM record: just hash its raw data. hash = (int) MurmurHash3.murmurhash3(var, hash); } else { // Decoded BAM record or any SAM record: hash a few representative // fields together. hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash); } return getKey0(Integer.MAX_VALUE, hash); }
/** * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY * * @param read */ public GATKSAMRecord(final SAMRecord read) { super(read.getHeader()); super.setReferenceIndex(read.getReferenceIndex()); super.setAlignmentStart(read.getAlignmentStart()); super.setReadName(read.getReadName()); super.setMappingQuality(read.getMappingQuality()); // indexing bin done below super.setCigar(read.getCigar()); super.setFlags(read.getFlags()); super.setMateReferenceIndex(read.getMateReferenceIndex()); super.setMateAlignmentStart(read.getMateAlignmentStart()); super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); if (samAttr == null) { clearAttributes(); } else { setAttributes(samAttr); } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } super.setFileSource(read.getFileSource()); super.setReadName(read.getReadName()); super.setCigarString(read.getCigarString()); super.setReadBases(read.getReadBases()); super.setBaseQualities(read.getBaseQualities()); // From SAMRecord constructor: Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); }
private boolean isHardClipped(SAMRecord read) { return read.getCigarString().contains("H"); }