private static double computeGCContent(ReferenceContext ref) { int gc = 0, at = 0; for (byte base : ref.getBases()) { int baseIndex = BaseUtils.simpleBaseToBaseIndex(base); if (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) gc++; else if (baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal()) at++; else ; // ignore } int sum = gc + at; return (100.0 * gc) / (sum == 0 ? 1 : sum); }
/** * Computes an allele biased version of the given pileup * * @param pileup the original pileup * @param downsamplingFraction the fraction of total reads to remove per allele * @return allele biased pileup */ public static ReadBackedPileup createAlleleBiasedBasePileup( final ReadBackedPileup pileup, final double downsamplingFraction) { // special case removal of all or no reads if (downsamplingFraction <= 0.0) return pileup; if (downsamplingFraction >= 1.0) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>()); final PileupElementList[] alleleStratifiedElements = new PileupElementList[4]; for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList(); // start by stratifying the reads by the alleles they represent at this position for (final PileupElement pe : pileup) { final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe); } // make a listing of allele counts and calculate the total count final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements); final int totalAlleleCount = (int) MathUtils.sum(alleleCounts); // do smart down-sampling final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove); for (int i = 0; i < 4; i++) { final PileupElementList alleleList = alleleStratifiedElements[i]; // if we don't need to remove any reads, then don't if (alleleCounts[i] > targetAlleleCounts[i]) readsToRemove.addAll( downsampleElements( alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i])); } // we need to keep the reads sorted because the FragmentUtils code will expect them in // coordinate order and will fail otherwise final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove); for (final PileupElement pe : pileup) { if (!readsToRemove.contains(pe)) { readsToKeep.add(pe); } } return new ReadBackedPileupImpl( pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep)); }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0; Collection<VariantContext> contexts = getVariantContexts(tracker, ref); for (VariantContext vc : contexts) { VariantContextBuilder builder = new VariantContextBuilder(vc); // set the appropriate sample name if necessary if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) { Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make(); builder.genotypes(g); } final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); writeRecord(withID, tracker, ref.getLocus()); } return 1; }
@Override public CallableBaseState map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { CalledState state; if (BaseUtils.isNBase(ref.getBase())) { state = CalledState.REF_N; } else { // count up the depths of all and QC+ bases int rawDepth = 0, QCDepth = 0, lowMAPQDepth = 0; for (PileupElement e : context.getBasePileup()) { rawDepth++; if (e.getMappingQual() <= maxLowMAPQ) lowMAPQDepth++; if (e.getMappingQual() >= minMappingQuality && (e.getQual() >= minBaseQuality || e.isDeletion())) { QCDepth++; } } // System.out.printf("%s rawdepth = %d QCDepth = %d lowMAPQ = %d%n", context.getLocation(), // rawDepth, QCDepth, lowMAPQDepth); if (rawDepth == 0) { state = CalledState.NO_COVERAGE; } else if (rawDepth >= minDepthLowMAPQ && MathUtils.ratio(lowMAPQDepth, rawDepth) >= maxLowMAPQFraction) { state = CalledState.POOR_MAPPING_QUALITY; } else if (QCDepth < minDepth) { state = CalledState.LOW_COVERAGE; } else if (rawDepth >= maxDepth && maxDepth != -1) { state = CalledState.EXCESSIVE_COVERAGE; } else { state = CalledState.CALLABLE; } } return new CallableBaseState(getToolkit().getGenomeLocParser(), context.getLocation(), state); }
/** * For each site of interest, annotate based on the requested annotation types * * @param tracker the meta-data tracker * @param ref the reference base * @param context the context for the given locus * @return 1 if the locus was successfully processed, 0 if otherwise */ public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null) return 0; // get the variant contexts for all the variants at the location Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation()); if (VCs.isEmpty()) return 0; Collection<VariantContext> annotatedVCs = VCs; // if the reference base is not ambiguous, we can annotate Map<String, AlignmentContext> stratifiedContexts; if (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) { stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup()); annotatedVCs = new ArrayList<>(VCs.size()); for (VariantContext vc : VCs) annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); } for (VariantContext annotatedVC : annotatedVCs) vcfWriter.add(annotatedVC); return 1; }