示例#1
0
  private Map<String, Object> calculateIC(final VariantContext vc) {
    final GenotypesContext genotypes =
        (founderIds == null || founderIds.isEmpty())
            ? vc.getGenotypes()
            : vc.getGenotypes(founderIds);
    if (genotypes == null || genotypes.size() < MIN_SAMPLES) return null;

    int idxAA = 0, idxAB = 1, idxBB = 2;

    if (!vc.isBiallelic()) {
      // for non-bliallelic case, do test with most common alt allele.
      // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB.
      int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
      idxAA = idxVector[0];
      idxAB = idxVector[1];
      idxBB = idxVector[2];
    }

    double refCount = 0.0;
    double hetCount = 0.0;
    double homCount = 0.0;
    int N = 0; // number of samples that have likelihoods
    for (final Genotype g : genotypes) {
      if (g.isNoCall() || !g.hasLikelihoods()) continue;

      if (g.getPloidy() != 2) // only work for diploid samples
      continue;

      N++;
      final double[] normalizedLikelihoods =
          MathUtils.normalizeFromLog10(g.getLikelihoods().getAsVector());
      refCount += normalizedLikelihoods[idxAA];
      hetCount += normalizedLikelihoods[idxAB];
      homCount += normalizedLikelihoods[idxBB];
    }

    if (N < MIN_SAMPLES) {
      return null;
    }

    final double p =
        (2.0 * refCount + hetCount)
            / (2.0 * (refCount + hetCount + homCount)); // expected reference allele frequency
    final double q = 1.0 - p; // expected alternative allele frequency
    final double F = 1.0 - (hetCount / (2.0 * p * q * (double) N)); // inbreeding coefficient

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), String.format("%.4f", F));
    return map;
  }
示例#2
0
  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }