/** Note: this is the only getKey function that handles unmapped reads specially! */ public static long getKey(final SAMRecord rec) { final int refIdx = rec.getReferenceIndex(); final int start = rec.getAlignmentStart(); if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) return getKey(refIdx, start); // Put unmapped reads at the end, but don't give them all the exact same // key so that they can be distributed to different reducers. // // A random number would probably be best, but to ensure that the same // record always gets the same key we use a fast hash instead. // // We avoid using hashCode(), because it's not guaranteed to have the // same value across different processes. int hash = 0; byte[] var; if ((var = rec.getVariableBinaryRepresentation()) != null) { // Undecoded BAM record: just hash its raw data. hash = (int) MurmurHash3.murmurhash3(var, hash); } else { // Decoded BAM record or any SAM record: hash a few representative // fields together. hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash); hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash); } return getKey0(Integer.MAX_VALUE, hash); }
/** * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY * * @param read */ public GATKSAMRecord(final SAMRecord read) { super(read.getHeader()); super.setReferenceIndex(read.getReferenceIndex()); super.setAlignmentStart(read.getAlignmentStart()); super.setReadName(read.getReadName()); super.setMappingQuality(read.getMappingQuality()); // indexing bin done below super.setCigar(read.getCigar()); super.setFlags(read.getFlags()); super.setMateReferenceIndex(read.getMateReferenceIndex()); super.setMateAlignmentStart(read.getMateAlignmentStart()); super.setInferredInsertSize(read.getInferredInsertSize()); SAMReadGroupRecord samRG = read.getReadGroup(); SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read); if (samAttr == null) { clearAttributes(); } else { setAttributes(samAttr); } if (samRG != null) { GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG); setReadGroup(rg); } super.setFileSource(read.getFileSource()); super.setReadName(read.getReadName()); super.setCigarString(read.getCigarString()); super.setReadBases(read.getReadBases()); super.setBaseQualities(read.getBaseQualities()); // From SAMRecord constructor: Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read)); }
private boolean isOutOfOrder(final SAMRecord last, final SAMRecord cur) { if (last == null || cur.getReadUnmappedFlag()) return false; else { if (last.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || last.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) throw new UserException.MalformedBAM( last, String.format("read %s has inconsistent mapping information.", last.format())); if (cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) throw new UserException.MalformedBAM( last, String.format("read %s has inconsistent mapping information.", cur.format())); return (last.getReferenceIndex() > cur.getReferenceIndex()) || (last.getReferenceIndex().equals(cur.getReferenceIndex()) && last.getAlignmentStart() > cur.getAlignmentStart()); } }