protected BAMIndexContent query(
      final int referenceSequence, final int startPos, final int endPos) {
    seek(4);

    final List<Chunk> metaDataChunks = new ArrayList<Chunk>();

    final int sequenceCount = readInteger();

    if (referenceSequence >= sequenceCount) {
      return null;
    }

    final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos);
    if (regionBins == null) {
      return null;
    }

    skipToSequence(referenceSequence);

    final int binCount = readInteger();
    boolean metaDataSeen = false;
    final Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) + 1];
    for (int binNumber = 0; binNumber < binCount; binNumber++) {
      final int indexBin = readInteger();
      final int nChunks = readInteger();
      List<Chunk> chunks = null;
      // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
      Chunk lastChunk = null;
      if (regionBins.get(indexBin)) {
        chunks = new ArrayList<Chunk>(nChunks);
        for (int ci = 0; ci < nChunks; ci++) {
          final long chunkBegin = readLong();
          final long chunkEnd = readLong();
          lastChunk = new Chunk(chunkBegin, chunkEnd);
          chunks.add(lastChunk);
        }
      } else if (indexBin == GenomicIndexUtil.MAX_BINS) {
        // meta data - build the bin so that the count of bins is correct;
        // but don't attach meta chunks to the bin, or normal queries will be off
        for (int ci = 0; ci < nChunks; ci++) {
          final long chunkBegin = readLong();
          final long chunkEnd = readLong();
          lastChunk = new Chunk(chunkBegin, chunkEnd);
          metaDataChunks.add(lastChunk);
        }
        metaDataSeen = true;
        continue; // don't create a Bin
      } else {
        skipBytes(16 * nChunks);
        chunks = Collections.emptyList();
      }
      final Bin bin = new Bin(referenceSequence, indexBin);
      bin.setChunkList(chunks);
      bin.setLastChunk(lastChunk);
      bins[indexBin] = bin;
    }

    final int nLinearBins = readInteger();

    final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos);
    final int regionLinearBinStop =
        endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins - 1;
    final int actualStop = Math.min(regionLinearBinStop, nLinearBins - 1);

    long[] linearIndexEntries = new long[0];
    if (regionLinearBinStart < nLinearBins) {
      linearIndexEntries = new long[actualStop - regionLinearBinStart + 1];
      skipBytes(8 * regionLinearBinStart);
      for (int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++)
        linearIndexEntries[linearBin - regionLinearBinStart] = readLong();
    }

    final LinearIndex linearIndex =
        new LinearIndex(referenceSequence, regionLinearBinStart, linearIndexEntries);

    return new BAMIndexContent(
        referenceSequence,
        bins,
        binCount - (metaDataSeen ? 1 : 0),
        new BAMIndexMetaData(metaDataChunks),
        linearIndex);
  }
Beispiel #2
0
  @Test(enabled = false, dataProvider = "indexComparisonData")
  /** Test linear index at all references and windows, comparing with existing index */
  public void compareLinearIndex(String testName, String bamFile, String bamIndexFile)
      throws IOException {
    // compare index generated from bamFile with existing bamIndex file
    // by testing all the references' windows and comparing the counts

    // 1. generate bai file
    // 2. count its references
    // 3. count bamIndex references comparing counts

    // 1. generate bai file
    File bam = new File(bamFile);
    assertTrue(bam.exists(), testName + " input bam file doesn't exist: " + bamFile);

    File indexFile1 = createIndexFile(bam);
    assertTrue(
        indexFile1.exists(), testName + " generated bam file's index doesn't exist: " + indexFile1);

    // 2. count its references
    File indexFile2 = new File(bamIndexFile);
    assertTrue(indexFile2.exists(), testName + " input index file doesn't exist: " + indexFile2);

    final CachingBAMFileIndex existingIndex1 =
        new CachingBAMFileIndex(indexFile1, null); // todo null sequence dictionary?
    final CachingBAMFileIndex existingIndex2 = new CachingBAMFileIndex(indexFile2, null);
    final int n_ref = existingIndex1.getNumberOfReferences();
    assertEquals(n_ref, existingIndex2.getNumberOfReferences());

    final SAMFileReader reader1 = new SAMFileReader(bam, indexFile1, false);
    final SAMFileReader reader2 = new SAMFileReader(bam, indexFile2, false);

    System.out.println(
        "Comparing " + n_ref + " references in " + indexFile1 + " and " + indexFile2);

    for (int i = 0; i < n_ref; i++) {
      final BAMIndexContent content1 = existingIndex1.getQueryResults(i);
      final BAMIndexContent content2 = existingIndex2.getQueryResults(i);
      if (content1 == null) {
        assertTrue(
            content2 == null,
            "No content for 1st bam index, but content for second at reference" + i);
        continue;
      }
      int[] counts1 = new int[LinearIndex.MAX_LINEAR_INDEX_SIZE];
      int[] counts2 = new int[LinearIndex.MAX_LINEAR_INDEX_SIZE];
      LinearIndex li1 = content1.getLinearIndex();
      LinearIndex li2 = content2.getLinearIndex();
      // todo not li1 and li2 sizes may differ. Implies 0's in the smaller index windows
      // 3. count bamIndex references comparing counts
      int baiSize = Math.max(li1.size(), li2.size());
      for (int win = 0; win < baiSize; win++) {
        counts1[win] = countAlignmentsInWindow(i, win, reader1, 0);
        counts2[win] = countAlignmentsInWindow(i, win, reader2, counts1[win]);
        assertEquals(
            counts2[win], counts1[win], "Counts don't match for reference " + i + " window " + win);
      }
    }

    indexFile1.deleteOnExit();
  }