/** * Computes an allele biased version of the given pileup * * @param pileup the original pileup * @param downsamplingFraction the fraction of total reads to remove per allele * @return allele biased pileup */ public static ReadBackedPileup createAlleleBiasedBasePileup( final ReadBackedPileup pileup, final double downsamplingFraction) { // special case removal of all or no reads if (downsamplingFraction <= 0.0) return pileup; if (downsamplingFraction >= 1.0) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>()); final PileupElementList[] alleleStratifiedElements = new PileupElementList[4]; for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList(); // start by stratifying the reads by the alleles they represent at this position for (final PileupElement pe : pileup) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe); } // make a listing of allele counts and calculate the total count final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements); final int totalAlleleCount = (int) MathUtils.sum(alleleCounts); // do smart down-sampling final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove); for (int i = 0; i < 4; i++) { final PileupElementList alleleList = alleleStratifiedElements[i]; // if we don't need to remove any reads, then don't if (alleleCounts[i] > targetAlleleCounts[i]) readsToRemove.addAll( downsampleElements( alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i])); } // we need to keep the reads sorted because the FragmentUtils code will expect them in // coordinate order and will fail otherwise final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove); for (final PileupElement pe : pileup) { if (!readsToRemove.contains(pe)) { readsToKeep.add(pe); } } return new ReadBackedPileupImpl( pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep)); }