コード例 #1
0
  public double[] computeReadHaplotypeLikelihoods(
      ReadBackedPileup pileup, HashMap<Allele, Haplotype> haplotypesInVC) {
    double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
    double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
    int i = 0;
    for (GATKSAMRecord read : pileup.getReads()) {
      if (ReadUtils.is454Read(read)) {
        continue;
      }
      // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
      // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
      int j = 0;
      for (Map.Entry<Allele, Haplotype> a : haplotypesInVC.entrySet()) {
        readLikelihoods[i][j] = computeReadLikelihoodGivenHaplotype(a.getValue(), read);
        if (DEBUG) {
          System.out.print(read.getReadName() + " ");

          System.out.format(
              "%d %d S:%d US:%d E:%d UE:%d C:%s %3.4f\n",
              i,
              j,
              read.getAlignmentStart(),
              read.getUnclippedStart(),
              read.getAlignmentEnd(),
              read.getUnclippedEnd(),
              read.getCigarString(),
              readLikelihoods[i][j]);
        }
        j++;
      }
      i++;
    }

    for (i = 0; i < haplotypesInVC.size(); i++) {
      for (int j = i; j < haplotypesInVC.size(); j++) {
        // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
        // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
        // readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
        double[] readLikelihood = new double[2]; // diploid sample
        for (int readIdx = 0; readIdx < pileup.getReads().size(); readIdx++) {
          readLikelihood[0] = -readLikelihoods[readIdx][i] / 10;
          readLikelihood[1] = -readLikelihoods[readIdx][j] / 10;

          // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+x0^x2)-log10(2)
          // First term is approximated by Jacobian log with table lookup.
          // Second term is a constant added to both likelihoods so will be ignored
          haplotypeLikehoodMatrix[i][j] +=
              MathUtils.approximateLog10SumLog10(readLikelihood[0], readLikelihood[1]);
        }
      }
    }

    return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
  }
コード例 #2
0
  /**
   * Computes an allele biased version of the given pileup
   *
   * @param pileup the original pileup
   * @param downsamplingFraction the fraction of total reads to remove per allele
   * @return allele biased pileup
   */
  public static ReadBackedPileup createAlleleBiasedBasePileup(
      final ReadBackedPileup pileup, final double downsamplingFraction) {
    // special case removal of all or no reads
    if (downsamplingFraction <= 0.0) return pileup;
    if (downsamplingFraction >= 1.0)
      return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());

    final PileupElementList[] alleleStratifiedElements = new PileupElementList[4];
    for (int i = 0; i < 4; i++) alleleStratifiedElements[i] = new PileupElementList();

    // start by stratifying the reads by the alleles they represent at this position
    for (final PileupElement pe : pileup) {
      final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
      if (baseIndex != -1) alleleStratifiedElements[baseIndex].add(pe);
    }

    // make a listing of allele counts and calculate the total count
    final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements);
    final int totalAlleleCount = (int) MathUtils.sum(alleleCounts);

    // do smart down-sampling
    final int numReadsToRemove = (int) (totalAlleleCount * downsamplingFraction); // floor
    final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);

    final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove);
    for (int i = 0; i < 4; i++) {
      final PileupElementList alleleList = alleleStratifiedElements[i];
      // if we don't need to remove any reads, then don't
      if (alleleCounts[i] > targetAlleleCounts[i])
        readsToRemove.addAll(
            downsampleElements(
                alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i]));
    }

    // we need to keep the reads sorted because the FragmentUtils code will expect them in
    // coordinate order and will fail otherwise
    final List<PileupElement> readsToKeep =
        new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove);
    for (final PileupElement pe : pileup) {
      if (!readsToRemove.contains(pe)) {
        readsToKeep.add(pe);
      }
    }

    return new ReadBackedPileupImpl(
        pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
  }
コード例 #3
0
  public Event map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

    boolean hasIndel = false;
    boolean hasInsertion = false;
    boolean hasPointEvent = false;

    int furthestStopPos = -1;

    // look at the rods for indels or SNPs
    if (tracker != null) {
      for (VariantContext vc : tracker.getValues(known)) {
        switch (vc.getType()) {
          case INDEL:
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          case SNP:
            hasPointEvent = true;
            break;
          case MIXED:
            hasPointEvent = true;
            hasIndel = true;
            if (vc.isSimpleInsertion()) hasInsertion = true;
            break;
          default:
            break;
        }
        if (hasIndel) furthestStopPos = vc.getEnd();
      }
    }

    // look at the normal context to get deletions and positions with high entropy
    final ReadBackedPileup pileup = context.getBasePileup();

    int mismatchQualities = 0, totalQualities = 0;
    final byte refBase = ref.getBase();
    for (PileupElement p : pileup) {

      // check the ends of the reads to see how far they extend
      furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd());

      // is it a deletion or insertion?
      if (p.isDeletion() || p.isBeforeInsertion()) {
        hasIndel = true;
        if (p.isBeforeInsertion()) hasInsertion = true;
      }

      // look for mismatches
      else if (lookForMismatchEntropy) {
        if (p.getBase() != refBase) mismatchQualities += p.getQual();
        totalQualities += p.getQual();
      }
    }

    // make sure we're supposed to look for high entropy
    if (lookForMismatchEntropy
        && pileup.getNumberOfElements() >= minReadsAtLocus
        && (double) mismatchQualities / (double) totalQualities >= mismatchThreshold)
      hasPointEvent = true;

    // return null if no event occurred
    if (!hasIndel && !hasPointEvent) return null;

    // return null if we didn't find any usable reads/rods associated with the event
    if (furthestStopPos == -1) return null;

    GenomeLoc eventLoc = context.getLocation();
    if (hasInsertion)
      eventLoc =
          getToolkit()
              .getGenomeLocParser()
              .createGenomeLoc(eventLoc.getContig(), eventLoc.getStart(), eventLoc.getStart() + 1);

    EVENT_TYPE eventType =
        (hasIndel
            ? (hasPointEvent ? EVENT_TYPE.BOTH : EVENT_TYPE.INDEL_EVENT)
            : EVENT_TYPE.POINT_EVENT);

    return new Event(eventLoc, furthestStopPos, eventType);
  }