Ejemplo n.º 1
0
  private boolean isAssemblyTriggerCandidate(SAMRecord read, CompareToReference2 c2r) {
    boolean isCandidate = false;

    // Increment candidate count for indels
    if (read.getCigarString().contains("I") || read.getCigarString().contains("D")) {
      isCandidate = true;
    }

    // Increment candidate count for substantial high quality soft clipping
    // TODO: Check for chimera directly?
    if (read.getCigarString().contains("S")) {
      if (c2r.numHighQualityMismatches(read, MIN_CANDIDATE_BASE_QUALITY) > (readLength / 10)) {
        isCandidate = true;
      }
    }

    // Increment candidate count if read contains at least 3 high quality mismatches
    if (SAMRecordUtils.getIntAttribute(read, "NM") >= 3) {
      if (c2r.numHighQualityMismatches(read, MIN_CANDIDATE_BASE_QUALITY) > 3) {
        isCandidate = true;
      }
    }

    return isCandidate;
  }
Ejemplo n.º 2
0
  /** Note: this is the only getKey function that handles unmapped reads specially! */
  public static long getKey(final SAMRecord rec) {
    final int refIdx = rec.getReferenceIndex();
    final int start = rec.getAlignmentStart();

    if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) return getKey(refIdx, start);

    // Put unmapped reads at the end, but don't give them all the exact same
    // key so that they can be distributed to different reducers.
    //
    // A random number would probably be best, but to ensure that the same
    // record always gets the same key we use a fast hash instead.
    //
    // We avoid using hashCode(), because it's not guaranteed to have the
    // same value across different processes.

    int hash = 0;
    byte[] var;
    if ((var = rec.getVariableBinaryRepresentation()) != null) {
      // Undecoded BAM record: just hash its raw data.
      hash = (int) MurmurHash3.murmurhash3(var, hash);
    } else {
      // Decoded BAM record or any SAM record: hash a few representative
      // fields together.
      hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash);
    }
    return getKey0(Integer.MAX_VALUE, hash);
  }
Ejemplo n.º 3
0
  /**
   * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
   *
   * @param read
   */
  public GATKSAMRecord(final SAMRecord read) {
    super(read.getHeader());
    super.setReferenceIndex(read.getReferenceIndex());
    super.setAlignmentStart(read.getAlignmentStart());
    super.setReadName(read.getReadName());
    super.setMappingQuality(read.getMappingQuality());
    // indexing bin done below
    super.setCigar(read.getCigar());
    super.setFlags(read.getFlags());
    super.setMateReferenceIndex(read.getMateReferenceIndex());
    super.setMateAlignmentStart(read.getMateAlignmentStart());
    super.setInferredInsertSize(read.getInferredInsertSize());
    SAMReadGroupRecord samRG = read.getReadGroup();
    SAMBinaryTagAndValue samAttr = GATKBin.getReadBinaryAttributes(read);
    if (samAttr == null) {
      clearAttributes();
    } else {
      setAttributes(samAttr);
    }
    if (samRG != null) {
      GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
      setReadGroup(rg);
    }

    super.setFileSource(read.getFileSource());
    super.setReadName(read.getReadName());
    super.setCigarString(read.getCigarString());
    super.setReadBases(read.getReadBases());
    super.setBaseQualities(read.getBaseQualities());
    // From SAMRecord constructor: Do this after the above because setCigarString will clear it.
    GATKBin.setReadIndexingBin(this, GATKBin.getReadIndexingBin(read));
  }
Ejemplo n.º 4
0
 private boolean isHardClipped(SAMRecord read) {
   return read.getCigarString().contains("H");
 }