Beispiel #1
0
  private static double computeGCContent(ReferenceContext ref) {
    int gc = 0, at = 0;

    for (byte base : ref.getBases()) {
      int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
      if (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) gc++;
      else if (baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal())
        at++;
      else ; // ignore
    }

    int sum = gc + at;
    return (100.0 * gc) / (sum == 0 ? 1 : sum);
  }
  /**
   * Computes an allele biased version of the given pileup
   *
   * @param pileup the original pileup
   * @param downsamplingFraction the fraction of total reads to remove per allele
   * @return allele biased pileup
   */
  public static ReadBackedPileup createAlleleBiasedBasePileup(
      final ReadBackedPileup pileup, final double downsamplingFraction) {
    // special case removal of all or no reads
    if (downsamplingFraction <= 0.0) return pileup;
    if (downsamplingFraction >= 1.0)
      return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());

    final PileupElementList[] alleleStratifiedElements = new PileupElementList[4];
    for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList();

    // start by stratifying the reads by the alleles they represent at this position
    for (final PileupElement pe : pileup) {
      final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
      if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe);
    }

    // make a listing of allele counts and calculate the total count
    final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements);
    final int totalAlleleCount = (int) MathUtils.sum(alleleCounts);

    // do smart down-sampling
    final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor
    final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);

    final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove);
    for (int i = 0; i < 4; i++) {
      final PileupElementList alleleList = alleleStratifiedElements[i];
      // if we don't need to remove any reads, then don't
      if (alleleCounts[i] > targetAlleleCounts[i])
        readsToRemove.addAll(
            downsampleElements(
                alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i]));
    }

    // we need to keep the reads sorted because the FragmentUtils code will expect them in
    // coordinate order and will fail otherwise
    final List<PileupElement> readsToKeep =
        new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove);
    for (final PileupElement pe : pileup) {
      if (!readsToRemove.contains(pe)) {
        readsToKeep.add(pe);
      }
    }

    return new ReadBackedPileupImpl(
        pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
  }
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
  @Override
  public CallableBaseState map(
      RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    CalledState state;

    if (BaseUtils.isNBase(ref.getBase())) {
      state = CalledState.REF_N;
    } else {
      // count up the depths of all and QC+ bases
      int rawDepth = 0, QCDepth = 0, lowMAPQDepth = 0;
      for (PileupElement e : context.getBasePileup()) {
        rawDepth++;

        if (e.getMappingQual() <= maxLowMAPQ) lowMAPQDepth++;

        if (e.getMappingQual() >= minMappingQuality
            && (e.getQual() >= minBaseQuality || e.isDeletion())) {
          QCDepth++;
        }
      }

      // System.out.printf("%s rawdepth = %d QCDepth = %d lowMAPQ = %d%n", context.getLocation(),
      // rawDepth, QCDepth, lowMAPQDepth);
      if (rawDepth == 0) {
        state = CalledState.NO_COVERAGE;
      } else if (rawDepth >= minDepthLowMAPQ
          && MathUtils.ratio(lowMAPQDepth, rawDepth) >= maxLowMAPQFraction) {
        state = CalledState.POOR_MAPPING_QUALITY;
      } else if (QCDepth < minDepth) {
        state = CalledState.LOW_COVERAGE;
      } else if (rawDepth >= maxDepth && maxDepth != -1) {
        state = CalledState.EXCESSIVE_COVERAGE;
      } else {
        state = CalledState.CALLABLE;
      }
    }

    return new CallableBaseState(getToolkit().getGenomeLocParser(), context.getLocation(), state);
  }
Beispiel #5
0
  /**
   * For each site of interest, annotate based on the requested annotation types
   *
   * @param tracker the meta-data tracker
   * @param ref the reference base
   * @param context the context for the given locus
   * @return 1 if the locus was successfully processed, 0 if otherwise
   */
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    // get the variant contexts for all the variants at the location
    Collection<VariantContext> VCs =
        tracker.getValues(variantCollection.variants, context.getLocation());
    if (VCs.isEmpty()) return 0;

    Collection<VariantContext> annotatedVCs = VCs;

    // if the reference base is not ambiguous, we can annotate
    Map<String, AlignmentContext> stratifiedContexts;
    if (BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1) {
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context.getBasePileup());
      annotatedVCs = new ArrayList<>(VCs.size());
      for (VariantContext vc : VCs)
        annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
    }

    for (VariantContext annotatedVC : annotatedVCs) vcfWriter.add(annotatedVC);

    return 1;
  }