예제 #1
0
  private int guessNextBAMPos(long cpVirt, int up, int cSize) {
    // What we're actually searching for is what's at offset [4], not [0]. So
    // skip ahead by 4, thus ensuring that whenever we find a valid [0] it's
    // at position up or greater.
    up += 4;

    try {
      while (up + SHORTEST_POSSIBLE_BAM_RECORD - 4 < cSize) {
        bgzf.seek(cpVirt | up);
        bgzf.read(buf.array(), 0, 8);

        // If the first two checks fail we have what looks like a valid
        // reference sequence ID. Assume we're at offset [4] or [24], i.e.
        // the ID of either this read or its mate, respectively. So check
        // the next integer ([8] or [28]) to make sure it's a 0-based
        // leftmost coordinate.
        final int id = buf.getInt(0);
        final int pos = buf.getInt(4);
        if (id < -1 || id > referenceSequenceCount || pos < -1) {
          ++up;
          continue;
        }

        // Okay, we could be at [4] or [24]. Assuming we're at [4], check
        // that [24] is valid. Assume [4] because we should hit it first:
        // the only time we expect to hit [24] is at the beginning of the
        // split, as part of the first read we should skip.

        bgzf.seek(cpVirt | up + 20);
        bgzf.read(buf.array(), 0, 8);

        final int nid = buf.getInt(0);
        final int npos = buf.getInt(4);
        if (nid < -1 || nid > referenceSequenceCount || npos < -1) {
          ++up;
          continue;
        }

        // So far so good: [4] and [24] seem okay. Now do something a bit
        // more involved: make sure that [36 + [12]&0xff - 1] == 0: that
        // is, the name of the read should be null terminated.

        // Move up to 0 just to make it less likely that we get confused
        // with offsets. Remember where we should continue from if we
        // reject this up.
        final int nextUP = up + 1;
        up -= 4;

        bgzf.seek(cpVirt | up + 12);
        bgzf.read(buf.array(), 0, 4);

        final int nameLength = buf.getInt(0) & 0xff;
        if (nameLength < 1) {
          // Names are null-terminated so length must be at least one
          up = nextUP;
          continue;
        }

        final int nullTerminator = up + 36 + nameLength - 1;

        if (nullTerminator >= cSize) {
          // This BAM record can't fit here. But maybe there's another in
          // the remaining space, so try again.
          up = nextUP;
          continue;
        }

        bgzf.seek(cpVirt | nullTerminator);
        bgzf.read(buf.array(), 0, 1);

        if (buf.get(0) != 0) {
          up = nextUP;
          continue;
        }

        // All of [4], [24], and [36 + [12]&0xff] look good. If [0] is also
        // sensible, that's good enough for us. "Sensible" to us means the
        // following:
        //
        // [0] >= 4*([16]&0xffff) + [20] + ([20]+1)/2 + 4*8 + ([12]&0xff)

        // Note that [0] is "length of the _remainder_ of the alignment
        // record", which is why this uses 4*8 instead of 4*9.
        int zeroMin = 4 * 8 + nameLength;

        bgzf.seek(cpVirt | up + 16);
        bgzf.read(buf.array(), 0, 8);

        zeroMin += (buf.getInt(0) & 0xffff) * 4;
        zeroMin += buf.getInt(4) + (buf.getInt(4) + 1) / 2;

        bgzf.seek(cpVirt | up);
        bgzf.read(buf.array(), 0, 4);

        if (buf.getInt(0) < zeroMin) {
          up = nextUP;
          continue;
        }
        return up;
      }
    } catch (IOException e) {
    }
    return -1;
  }