private int guessNextBAMPos(long cpVirt, int up, int cSize) { // What we're actually searching for is what's at offset [4], not [0]. So // skip ahead by 4, thus ensuring that whenever we find a valid [0] it's // at position up or greater. up += 4; try { while (up + SHORTEST_POSSIBLE_BAM_RECORD - 4 < cSize) { bgzf.seek(cpVirt | up); bgzf.read(buf.array(), 0, 8); // If the first two checks fail we have what looks like a valid // reference sequence ID. Assume we're at offset [4] or [24], i.e. // the ID of either this read or its mate, respectively. So check // the next integer ([8] or [28]) to make sure it's a 0-based // leftmost coordinate. final int id = buf.getInt(0); final int pos = buf.getInt(4); if (id < -1 || id > referenceSequenceCount || pos < -1) { ++up; continue; } // Okay, we could be at [4] or [24]. Assuming we're at [4], check // that [24] is valid. Assume [4] because we should hit it first: // the only time we expect to hit [24] is at the beginning of the // split, as part of the first read we should skip. bgzf.seek(cpVirt | up + 20); bgzf.read(buf.array(), 0, 8); final int nid = buf.getInt(0); final int npos = buf.getInt(4); if (nid < -1 || nid > referenceSequenceCount || npos < -1) { ++up; continue; } // So far so good: [4] and [24] seem okay. Now do something a bit // more involved: make sure that [36 + [12]&0xff - 1] == 0: that // is, the name of the read should be null terminated. // Move up to 0 just to make it less likely that we get confused // with offsets. Remember where we should continue from if we // reject this up. final int nextUP = up + 1; up -= 4; bgzf.seek(cpVirt | up + 12); bgzf.read(buf.array(), 0, 4); final int nameLength = buf.getInt(0) & 0xff; if (nameLength < 1) { // Names are null-terminated so length must be at least one up = nextUP; continue; } final int nullTerminator = up + 36 + nameLength - 1; if (nullTerminator >= cSize) { // This BAM record can't fit here. But maybe there's another in // the remaining space, so try again. up = nextUP; continue; } bgzf.seek(cpVirt | nullTerminator); bgzf.read(buf.array(), 0, 1); if (buf.get(0) != 0) { up = nextUP; continue; } // All of [4], [24], and [36 + [12]&0xff] look good. If [0] is also // sensible, that's good enough for us. "Sensible" to us means the // following: // // [0] >= 4*([16]&0xffff) + [20] + ([20]+1)/2 + 4*8 + ([12]&0xff) // Note that [0] is "length of the _remainder_ of the alignment // record", which is why this uses 4*8 instead of 4*9. int zeroMin = 4 * 8 + nameLength; bgzf.seek(cpVirt | up + 16); bgzf.read(buf.array(), 0, 8); zeroMin += (buf.getInt(0) & 0xffff) * 4; zeroMin += buf.getInt(4) + (buf.getInt(4) + 1) / 2; bgzf.seek(cpVirt | up); bgzf.read(buf.array(), 0, 4); if (buf.getInt(0) < zeroMin) { up = nextUP; continue; } return up; } } catch (IOException e) { } return -1; }