protected BAMIndexContent query( final int referenceSequence, final int startPos, final int endPos) { seek(4); final List<Chunk> metaDataChunks = new ArrayList<Chunk>(); final int sequenceCount = readInteger(); if (referenceSequence >= sequenceCount) { return null; } final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos); if (regionBins == null) { return null; } skipToSequence(referenceSequence); final int binCount = readInteger(); boolean metaDataSeen = false; final Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) + 1]; for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(); final int nChunks = readInteger(); List<Chunk> chunks = null; // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks); Chunk lastChunk = null; if (regionBins.get(indexBin)) { chunks = new ArrayList<Chunk>(nChunks); for (int ci = 0; ci < nChunks; ci++) { final long chunkBegin = readLong(); final long chunkEnd = readLong(); lastChunk = new Chunk(chunkBegin, chunkEnd); chunks.add(lastChunk); } } else if (indexBin == GenomicIndexUtil.MAX_BINS) { // meta data - build the bin so that the count of bins is correct; // but don't attach meta chunks to the bin, or normal queries will be off for (int ci = 0; ci < nChunks; ci++) { final long chunkBegin = readLong(); final long chunkEnd = readLong(); lastChunk = new Chunk(chunkBegin, chunkEnd); metaDataChunks.add(lastChunk); } metaDataSeen = true; continue; // don't create a Bin } else { skipBytes(16 * nChunks); chunks = Collections.emptyList(); } final Bin bin = new Bin(referenceSequence, indexBin); bin.setChunkList(chunks); bin.setLastChunk(lastChunk); bins[indexBin] = bin; } final int nLinearBins = readInteger(); final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos); final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins - 1; final int actualStop = Math.min(regionLinearBinStop, nLinearBins - 1); long[] linearIndexEntries = new long[0]; if (regionLinearBinStart < nLinearBins) { linearIndexEntries = new long[actualStop - regionLinearBinStart + 1]; skipBytes(8 * regionLinearBinStart); for (int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++) linearIndexEntries[linearBin - regionLinearBinStart] = readLong(); } final LinearIndex linearIndex = new LinearIndex(referenceSequence, regionLinearBinStart, linearIndexEntries); return new BAMIndexContent( referenceSequence, bins, binCount - (metaDataSeen ? 1 : 0), new BAMIndexMetaData(metaDataChunks), linearIndex); }
@Test(enabled = false, dataProvider = "indexComparisonData") /** Test linear index at all references and windows, comparing with existing index */ public void compareLinearIndex(String testName, String bamFile, String bamIndexFile) throws IOException { // compare index generated from bamFile with existing bamIndex file // by testing all the references' windows and comparing the counts // 1. generate bai file // 2. count its references // 3. count bamIndex references comparing counts // 1. generate bai file File bam = new File(bamFile); assertTrue(bam.exists(), testName + " input bam file doesn't exist: " + bamFile); File indexFile1 = createIndexFile(bam); assertTrue( indexFile1.exists(), testName + " generated bam file's index doesn't exist: " + indexFile1); // 2. count its references File indexFile2 = new File(bamIndexFile); assertTrue(indexFile2.exists(), testName + " input index file doesn't exist: " + indexFile2); final CachingBAMFileIndex existingIndex1 = new CachingBAMFileIndex(indexFile1, null); // todo null sequence dictionary? final CachingBAMFileIndex existingIndex2 = new CachingBAMFileIndex(indexFile2, null); final int n_ref = existingIndex1.getNumberOfReferences(); assertEquals(n_ref, existingIndex2.getNumberOfReferences()); final SAMFileReader reader1 = new SAMFileReader(bam, indexFile1, false); final SAMFileReader reader2 = new SAMFileReader(bam, indexFile2, false); System.out.println( "Comparing " + n_ref + " references in " + indexFile1 + " and " + indexFile2); for (int i = 0; i < n_ref; i++) { final BAMIndexContent content1 = existingIndex1.getQueryResults(i); final BAMIndexContent content2 = existingIndex2.getQueryResults(i); if (content1 == null) { assertTrue( content2 == null, "No content for 1st bam index, but content for second at reference" + i); continue; } int[] counts1 = new int[LinearIndex.MAX_LINEAR_INDEX_SIZE]; int[] counts2 = new int[LinearIndex.MAX_LINEAR_INDEX_SIZE]; LinearIndex li1 = content1.getLinearIndex(); LinearIndex li2 = content2.getLinearIndex(); // todo not li1 and li2 sizes may differ. Implies 0's in the smaller index windows // 3. count bamIndex references comparing counts int baiSize = Math.max(li1.size(), li2.size()); for (int win = 0; win < baiSize; win++) { counts1[win] = countAlignmentsInWindow(i, win, reader1, 0); counts2[win] = countAlignmentsInWindow(i, win, reader2, counts1[win]); assertEquals( counts2[win], counts1[win], "Counts don't match for reference " + i + " window " + win); } } indexFile1.deleteOnExit(); }