Esempio n. 1
0
  private static double computeGCContent(ReferenceContext ref) {
    int gc = 0, at = 0;

    for (byte base : ref.getBases()) {
      int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
      if (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) gc++;
      else if (baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal())
        at++;
      else ; // ignore
    }

    int sum = gc + at;
    return (100.0 * gc) / (sum == 0 ? 1 : sum);
  }
  /**
   * Computes an allele biased version of the given pileup
   *
   * @param pileup the original pileup
   * @param downsamplingFraction the fraction of total reads to remove per allele
   * @return allele biased pileup
   */
  public static ReadBackedPileup createAlleleBiasedBasePileup(
      final ReadBackedPileup pileup, final double downsamplingFraction) {
    // special case removal of all or no reads
    if (downsamplingFraction <= 0.0) return pileup;
    if (downsamplingFraction >= 1.0)
      return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());

    final PileupElementList[] alleleStratifiedElements = new PileupElementList[4];
    for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList();

    // start by stratifying the reads by the alleles they represent at this position
    for (final PileupElement pe : pileup) {
      final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
      if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe);
    }

    // make a listing of allele counts and calculate the total count
    final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements);
    final int totalAlleleCount = (int) MathUtils.sum(alleleCounts);

    // do smart down-sampling
    final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor
    final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);

    final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove);
    for (int i = 0; i < 4; i++) {
      final PileupElementList alleleList = alleleStratifiedElements[i];
      // if we don't need to remove any reads, then don't
      if (alleleCounts[i] > targetAlleleCounts[i])
        readsToRemove.addAll(
            downsampleElements(
                alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i]));
    }

    // we need to keep the reads sorted because the FragmentUtils code will expect them in
    // coordinate order and will fail otherwise
    final List<PileupElement> readsToKeep =
        new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove);
    for (final PileupElement pe : pileup) {
      if (!readsToRemove.contains(pe)) {
        readsToKeep.add(pe);
      }
    }

    return new ReadBackedPileupImpl(
        pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
  }
Esempio n. 3
0
  /**
   * For each site of interest, annotate based on the requested annotation types
   *
   * @param tracker the meta-data tracker
   * @param ref the reference base
   * @param context the context for the given locus
   * @return 1 if the locus was successfully processed, 0 if otherwise
   */
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    // get the variant contexts for all the variants at the location
    Collection<VariantContext> VCs =
        tracker.getValues(variantCollection.variants, context.getLocation());
    if (VCs.isEmpty()) return 0;

    Collection<VariantContext> annotatedVCs = VCs;

    // if the reference base is not ambiguous, we can annotate
    Map<String, AlignmentContext> stratifiedContexts;
    if (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
      annotatedVCs = new ArrayList<>(VCs.size());
      for (VariantContext vc : VCs)
        annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
    }

    for (VariantContext annotatedVC : annotatedVCs) vcfWriter.add(annotatedVC);

    return 1;
  }