/**
   * P(somatic | D) = P(somatic) * P(D | somatic) = P(somatic) * P(D | normals are ref) * P(D |
   * tumors are non-ref)
   *
   * <p>P(! somatic | D) = P(! somatic) * P(D | ! somatic) = P(! somatic) * * ( P(D | normals are
   * non-ref) * P(D | tumors are non-ref) [germline] + P(D | normals are ref) * P(D | tumors are
   * ref)) [no-variant at all]
   *
   * @param vc
   * @return
   */
  private double calcLog10pSomatic(final VariantContext vc) {
    // walk over tumors
    double log10pNonRefInTumors = log10pNonRefInSamples(vc, tumorSample);
    double log10pRefInTumors = log10pRefInSamples(vc, tumorSample);

    // walk over normals
    double log10pNonRefInNormals = log10pNonRefInSamples(vc, normalSample);
    double log10pRefInNormals = log10pRefInSamples(vc, normalSample);

    // priors
    double log10pSomaticPrior = QualityUtils.qualToErrorProbLog10(somaticPriorQ);
    double log10pNotSomaticPrior = Math.log10(1 - QualityUtils.qualToErrorProb(somaticPriorQ));

    double log10pNotSomaticGermline = log10pNonRefInNormals + log10pNonRefInTumors;
    double log10pNotSomaticNoVariant = log10pRefInNormals + log10pRefInTumors;

    double log10pNotSomatic =
        log10pNotSomaticPrior
            + MathUtils.log10sumLog10(
                new double[] {log10pNotSomaticGermline, log10pNotSomaticNoVariant});
    double log10pSomatic = log10pSomaticPrior + log10pNonRefInTumors + log10pRefInNormals;
    double lod = log10pSomatic - log10pNotSomatic;

    return Double.isInfinite(lod) ? -10000 : lod;
  }
  static {
    baseMatchArray = new double[MAX_CACHED_QUAL + 1];
    baseMismatchArray = new double[MAX_CACHED_QUAL + 1];
    for (int k = 1; k <= MAX_CACHED_QUAL; k++) {
      double baseProb = QualityUtils.qualToProb((byte) k);

      baseMatchArray[k] = probToQual(baseProb);
      baseMismatchArray[k] = (double) (k);
    }
  }
  /**
   * Initializes the matrix that holds all the constants related to the editing distance between the
   * read and the haplotype.
   *
   * @param haplotypeBases the bases of the haplotype
   * @param readBases the bases of the read
   * @param readQuals the base quality scores of the read
   * @param startIndex where to start updating the distanceMatrix (in case this read is similar to
   *     the previous read)
   */
  public void initializePriors(
      final byte[] haplotypeBases,
      final byte[] readBases,
      final byte[] readQuals,
      final int startIndex) {

    // initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
    // Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and
    // columns below 2.

    for (int i = 0; i < readBases.length; i++) {
      final byte x = readBases[i];
      final byte qual = readQuals[i];
      for (int j = startIndex; j < haplotypeBases.length; j++) {
        final byte y = haplotypeBases[j];
        prior[i + 1][j + 1] =
            (x == y || x == (byte) 'N' || y == (byte) 'N'
                ? QualityUtils.qualToProb(qual)
                : (QualityUtils.qualToErrorProb(qual)
                    / (doNotUseTristateCorrection ? 1.0 : TRISTATE_CORRECTION)));
      }
    }
  }
Example #4
0
  @Override
  public Map<String, Object> annotate(
      final RefMetaDataTracker tracker,
      final AnnotatorCompatible walker,
      final ReferenceContext ref,
      final Map<String, AlignmentContext> stratifiedContexts,
      final VariantContext vc,
      final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {

    final GenotypesContext genotypes = vc.getGenotypes();
    if (genotypes == null || genotypes.size() < MIN_SAMPLES) {
      if (!warningLogged) {
        logger.warn("Too few genotypes");
        warningLogged = true;
      }
      return null;
    }

    int refCount = 0;
    int hetCount = 0;
    int homCount = 0;
    for (final Genotype g : genotypes) {
      if (g.isNoCall()) continue;

      // TODO - fix me:
      // Right now we just ignore genotypes that are not confident, but this throws off
      //  our HW ratios.  More analysis is needed to determine the right thing to do when
      //  the genotyper cannot decide whether a given sample is het or hom var.
      if (g.getLog10PError() > MIN_LOG10_PERROR) continue;

      if (g.isHomRef()) refCount++;
      else if (g.isHet()) hetCount++;
      else homCount++;
    }

    if (refCount + hetCount + homCount == 0) return null;

    double pvalue = HardyWeinbergCalculation.hwCalculate(refCount, hetCount, homCount);
    // System.out.println(refCount + " " + hetCount + " " + homCount + " " + pvalue);
    Map<String, Object> map = new HashMap<>();
    map.put(getKeyNames().get(0), String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue)));
    return map;
  }