Пример #1
0
  /** Note: this is the only getKey function that handles unmapped reads specially! */
  public static long getKey(final SAMRecord rec) {
    final int refIdx = rec.getReferenceIndex();
    final int start = rec.getAlignmentStart();

    if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) return getKey(refIdx, start);

    // Put unmapped reads at the end, but don't give them all the exact same
    // key so that they can be distributed to different reducers.
    //
    // A random number would probably be best, but to ensure that the same
    // record always gets the same key we use a fast hash instead.
    //
    // We avoid using hashCode(), because it's not guaranteed to have the
    // same value across different processes.

    int hash = 0;
    byte[] var;
    if ((var = rec.getVariableBinaryRepresentation()) != null) {
      // Undecoded BAM record: just hash its raw data.
      hash = (int) MurmurHash3.murmurhash3(var, hash);
    } else {
      // Decoded BAM record or any SAM record: hash a few representative
      // fields together.
      hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash);
      hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash);
    }
    return getKey0(Integer.MAX_VALUE, hash);
  }