コード例 #1
0
ファイル: MVLikelihoodRatio.java プロジェクト: a3a/gatk
  public Map<String, Object> annotate(
      RefMetaDataTracker tracker,
      AnnotatorCompatible walker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      VariantContext vc) {
    if (mendelianViolation == null) {
      if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
        mendelianViolation =
            new MendelianViolation(((VariantAnnotator) walker).minGenotypeQualityP);
      } else {
        throw new UserException(
            "Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio.");
      }
    }

    Map<String, Object> toRet = new HashMap<String, Object>(1);
    boolean hasAppropriateGenotypes =
        vc.hasGenotype(motherId)
            && vc.getGenotype(motherId).hasLikelihoods()
            && vc.hasGenotype(fatherId)
            && vc.getGenotype(fatherId).hasLikelihoods()
            && vc.hasGenotype(childId)
            && vc.getGenotype(childId).hasLikelihoods();
    if (hasAppropriateGenotypes)
      toRet.put(
          "MVLR", mendelianViolation.violationLikelihoodRatio(vc, motherId, fatherId, childId));

    return toRet;
  }
コード例 #2
0
ファイル: SelectVariants.java プロジェクト: singerma/gatk
  private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) return false;

    // if we're not looking for specific samples then the fact that we have both VCs is enough to
    // call it concordant.
    if (NO_SAMPLES_SPECIFIED) return true;

    // make a list of all samples contained in this variant VC that are being tracked by the user
    // command line arguments.
    Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (String sample : variantSamples) {
      boolean foundSample = false;
      for (VariantContext compVC : compVCs) {
        Genotype varG = vc.getGenotype(sample);
        Genotype compG = compVC.getGenotype(sample);
        if (haveSameGenotypes(varG, compG)) {
          foundSample = true;
          break;
        }
      }
      // if at least one sample doesn't have the same genotype, we don't have concordance
      if (!foundSample) {
        return false;
      }
    }
    return true;
  }
コード例 #3
0
ファイル: SelectVariants.java プロジェクト: singerma/gatk
  /**
   * Checks if vc has a variant call for (at least one of) the samples.
   *
   * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for
   *     discordances to (e.g. HapMap)
   * @param compVCs the comparison VariantContext (discordance
   * @return
   */
  private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null) return false;

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty());

    // check if we find it in the variant rod
    Map<String, Genotype> genotypes = vc.getGenotypes(samples);
    for (Genotype g : genotypes.values()) {
      if (sampleHasVariant(g)) {
        // There is a variant called (or filtered with not exclude filtered option set) that is not
        // HomRef for at least one of the samples.
        if (compVCs == null) return true;
        // Look for this sample in the all vcs of the comp ROD track.
        boolean foundVariant = false;
        for (VariantContext compVC : compVCs) {
          if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
            foundVariant = true;
            break;
          }
        }
        // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
        if (!foundVariant) return true;
      }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
  }
コード例 #4
0
ファイル: PhasingUtils.java プロジェクト: nh13/gatk
  static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
    // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
      return false;
    }

    return true;
  }
コード例 #5
0
ファイル: SelectVariants.java プロジェクト: singerma/gatk
  /**
   * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN,
   * AC, AF).
   *
   * @param vc the VariantContext record to subset
   * @param samples the samples to extract
   * @return the subsetted VariantContext
   */
  private VariantContext subsetRecord(VariantContext vc, Set<String> samples) {
    if (samples == null || samples.isEmpty()) return vc;

    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
    for (Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet()) {
      if (samples.contains(genotypePair.getKey())) genotypes.add(genotypePair.getValue());
    }

    VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles());

    // if we have fewer alternate alleles in the selected VC than in the original VC, we need to
    // strip out the GL/PLs (because they are no longer accurate)
    if (vc.getAlleles().size() != sub.getAlleles().size())
      sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes()));

    HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());

    int depth = 0;
    for (String sample : sub.getSampleNames()) {
      Genotype g = sub.getGenotype(sample);

      if (g.isNotFiltered() && g.isCalled()) {

        String dp = (String) g.getAttribute("DP");
        if (dp != null
            && !dp.equals(VCFConstants.MISSING_DEPTH_v3)
            && !dp.equals(VCFConstants.MISSING_VALUE_v4)) {
          depth += Integer.valueOf(dp);
        }
      }
    }

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY))
        attributes.put("AC_Orig", attributes.get(VCFConstants.ALLELE_COUNT_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY))
        attributes.put("AF_Orig", attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY));
      if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY))
        attributes.put("AN_Orig", attributes.get(VCFConstants.ALLELE_NUMBER_KEY));
    }

    VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
    attributes.put("DP", depth);

    sub = VariantContext.modifyAttributes(sub, attributes);

    return sub;
  }
コード例 #6
0
ファイル: PhasingUtils.java プロジェクト: nh13/gatk
  static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
        return true;
      }
    }

    return false;
  }
コード例 #7
0
ファイル: PhasingUtils.java プロジェクト: nh13/gatk
  static boolean doubleAllelesSegregatePerfectlyAmongSamples(
      VariantContext vc1, VariantContext vc2) {
    // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including
    // reference):
    Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
    Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();

    // Note the segregation of the alleles for the reference genome:
    allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
    allele2ToAllele1.put(vc2.getReference(), vc1.getReference());

    // Note the segregation of the alleles for each sample (and check that it is consistent with the
    // reference and all previous samples).
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next();

        Allele all1To2 = allele1ToAllele2.get(all1);
        if (all1To2 == null) allele1ToAllele2.put(all1, all2);
        else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
        return false;

        Allele all2To1 = allele2ToAllele1.get(all2);
        if (all2To1 == null) allele2ToAllele1.put(all2, all1);
        else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
        return false;
      }
    }

    return true;
  }
コード例 #8
0
ファイル: StandardVCFWriter.java プロジェクト: alexischr/gatk
  /**
   * add the genotype data
   *
   * @param vc the variant context
   * @param genotypeFormatKeys Genotype formatting string
   * @param alleleMap alleles for this context
   * @throws IOException for writer
   */
  private void addGenotypeData(
      VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
      throws IOException {

    for (String sample : mHeader.getGenotypeSamples()) {
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      Genotype g = vc.getGenotype(sample);
      if (g == null) {
        // TODO -- The VariantContext needs to know what the general ploidy is of the samples
        // TODO -- We shouldn't be assuming diploid genotypes here!
        mWriter.write(VCFConstants.EMPTY_GENOTYPE);
        continue;
      }

      List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
      for (String key : genotypeFormatKeys) {

        if (key.equals(VCFConstants.GENOTYPE_KEY)) {
          if (!g.isAvailable()) {
            throw new ReviewedStingException(
                "GTs cannot be missing for some samples if they are available for others in the record");
          }

          writeAllele(g.getAllele(0), alleleMap);
          for (int i = 1; i < g.getPloidy(); i++) {
            mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
            writeAllele(g.getAllele(i), alleleMap);
          }

          continue;
        }

        Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;

        // some exceptions
        if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
          if (Math.abs(g.getNegLog10PError() - Genotype.NO_NEG_LOG_10PERROR) < 1e-6)
            val = VCFConstants.MISSING_VALUE_v4;
          else {
            val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
          }
        } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
          val =
              g.isFiltered()
                  ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters()))
                  : (g.filtersWereApplied()
                      ? VCFConstants.PASSES_FILTERS_v4
                      : VCFConstants.UNFILTERED);
        }

        VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
        if (metaData != null) {
          int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
          if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) {
            // If we have a missing field but multiple values are expected, we need to construct a
            // new string with all fields.
            // For example, if Number=2, the string has to be ".,."
            StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
            for (int i = 1; i < numInFormatField; i++) {
              sb.append(",");
              sb.append(VCFConstants.MISSING_VALUE_v4);
            }
            val = sb.toString();
          }
        }

        // assume that if key is absent, then the given string encoding suffices
        String outputValue = formatVCFField(val);
        if (outputValue != null) attrs.add(outputValue);
      }

      // strip off trailing missing values
      for (int i = attrs.size() - 1; i >= 0; i--) {
        if (isMissingValue(attrs.get(i))) attrs.remove(i);
        else break;
      }

      for (int i = 0; i < attrs.size(); i++) {
        if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY))
          mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
        mWriter.write(attrs.get(i));
      }
    }
  }
コード例 #9
0
ファイル: SelectVariants.java プロジェクト: singerma/gatk
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }
コード例 #10
0
ファイル: PhasingUtils.java プロジェクト: nh13/gatk
  static VariantContext reallyMergeIntoMNP(
      VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
    int startInter = vc1.getEnd() + 1;
    int endInter = vc2.getStart() - 1;
    byte[] intermediateBases = null;
    if (startInter <= endInter) {
      intermediateBases =
          referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
      StringUtil.toUpperCase(intermediateBases);
    }
    MergedAllelesData mergeData =
        new MergedAllelesData(
            intermediateBases, vc1, vc2); // ensures that the reference allele is added

    GenotypesContext mergedGenotypes = GenotypesContext.create();
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      List<Allele> mergedAllelesForSample = new LinkedList<Allele>();

      /* NOTE: Since merged alleles are added to mergedAllelesForSample in the SAME order as in the input VC records,
        we preserve phase information (if any) relative to whatever precedes vc1:
      */
      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        Allele mergedAllele = mergeData.ensureMergedAllele(all1, all2);
        mergedAllelesForSample.add(mergedAllele);
      }

      double mergedGQ = Math.max(gt1.getLog10PError(), gt2.getLog10PError());
      Set<String> mergedGtFilters =
          new HashSet<
              String>(); // Since gt1 and gt2 were unfiltered, the Genotype remains unfiltered

      Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
      PhaseAndQuality phaseQual = calcPhaseForMergedGenotypes(gt1, gt2);
      if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);

      Genotype mergedGt =
          new Genotype(
              gt1.getSampleName(),
              mergedAllelesForSample,
              mergedGQ,
              mergedGtFilters,
              mergedGtAttribs,
              phaseQual.isPhased);
      mergedGenotypes.add(mergedGt);
    }

    String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
    double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
    Set<String> mergedFilters =
        new HashSet<
            String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
    Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);

    // ids
    List<String> mergedIDs = new ArrayList<String>();
    if (vc1.hasID()) mergedIDs.add(vc1.getID());
    if (vc2.hasID()) mergedIDs.add(vc2.getID());
    String mergedID =
        mergedIDs.isEmpty()
            ? VCFConstants.EMPTY_ID_FIELD
            : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);

    VariantContextBuilder mergedBuilder =
        new VariantContextBuilder(
                mergedName,
                vc1.getChr(),
                vc1.getStart(),
                vc2.getEnd(),
                mergeData.getAllMergedAlleles())
            .id(mergedID)
            .genotypes(mergedGenotypes)
            .log10PError(mergedLog10PError)
            .filters(mergedFilters)
            .attributes(mergedAttribs);
    VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
    return mergedBuilder.make();
  }