private static double computeGCContent(ReferenceContext ref) { int gc = 0, at = 0; for (byte base : ref.getBases()) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); if (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) gc++; else if (baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal()) at++; else ; // ignore } int sum = gc + at; return (100.0 * gc) / (sum == 0 ? 1 : sum); }
/** * Computes an allele biased version of the given pileup * * @param pileup the original pileup * @param downsamplingFraction the fraction of total reads to remove per allele * @return allele biased pileup */ public static ReadBackedPileup createAlleleBiasedBasePileup( final ReadBackedPileup pileup, final double downsamplingFraction) { // special case removal of all or no reads if (downsamplingFraction <= 0.0) return pileup; if (downsamplingFraction >= 1.0) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>()); final PileupElementList[] alleleStratifiedElements = new PileupElementList[4]; for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList(); // start by stratifying the reads by the alleles they represent at this position for (final PileupElement pe : pileup) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe); } // make a listing of allele counts and calculate the total count final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements); final int totalAlleleCount = (int) MathUtils.sum(alleleCounts); // do smart down-sampling final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove); for (int i = 0; i < 4; i++) { final PileupElementList alleleList = alleleStratifiedElements[i]; // if we don't need to remove any reads, then don't if (alleleCounts[i] > targetAlleleCounts[i]) readsToRemove.addAll( downsampleElements( alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i])); } // we need to keep the reads sorted because the FragmentUtils code will expect them in // coordinate order and will fail otherwise final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove); for (final PileupElement pe : pileup) { if (!readsToRemove.contains(pe)) { readsToKeep.add(pe); } } return new ReadBackedPileupImpl( pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep)); }
/** * For each site of interest, annotate based on the requested annotation types * * @param tracker the meta-data tracker * @param ref the reference base * @param context the context for the given locus * @return 1 if the locus was successfully processed, 0 if otherwise */ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; // get the variant contexts for all the variants at the location Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation()); if (VCs.isEmpty()) return 0; Collection<VariantContext> annotatedVCs = VCs; // if the reference base is not ambiguous, we can annotate Map<String, AlignmentContext> stratifiedContexts; if (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup()); annotatedVCs = new ArrayList<>(VCs.size()); for (VariantContext vc : VCs) annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); } for (VariantContext annotatedVC : annotatedVCs) vcfWriter.add(annotatedVC); return 1; }