예제 #1
0
  /**
   * Checks if vc has a variant call for (at least one of) the samples.
   *
   * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for
   *     discordances to (e.g. HapMap)
   * @param compVCs the comparison VariantContext (discordance
   * @return
   */
  private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null) return false;

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty());

    // check if we find it in the variant rod
    Map<String, Genotype> genotypes = vc.getGenotypes(samples);
    for (Genotype g : genotypes.values()) {
      if (sampleHasVariant(g)) {
        // There is a variant called (or filtered with not exclude filtered option set) that is not
        // HomRef for at least one of the samples.
        if (compVCs == null) return true;
        // Look for this sample in the all vcs of the comp ROD track.
        boolean foundVariant = false;
        for (VariantContext compVC : compVCs) {
          if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
            foundVariant = true;
            break;
          }
        }
        // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
        if (!foundVariant) return true;
      }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
  }
예제 #2
0
  private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) return false;

    // if we're not looking for specific samples then the fact that we have both VCs is enough to
    // call it concordant.
    if (NO_SAMPLES_SPECIFIED) return true;

    // make a list of all samples contained in this variant VC that are being tracked by the user
    // command line arguments.
    Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (String sample : variantSamples) {
      boolean foundSample = false;
      for (VariantContext compVC : compVCs) {
        Genotype varG = vc.getGenotype(sample);
        Genotype compG = compVC.getGenotype(sample);
        if (haveSameGenotypes(varG, compG)) {
          foundSample = true;
          break;
        }
      }
      // if at least one sample doesn't have the same genotype, we don't have concordance
      if (!foundSample) {
        return false;
      }
    }
    return true;
  }
예제 #3
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }