Exemple #1
0
  private VariantContext findMatchingComp(
      final VariantContext eval, final Collection<VariantContext> comps) {
    // if no comps, return null
    if (comps == null || comps.isEmpty()) return null;

    // if no eval, return any comp
    if (eval == null) return comps.iterator().next();

    // find all of the matching comps
    VariantContext lenientMatch = null;
    for (final VariantContext comp : comps) {
      switch (doEvalAndCompMatch(comp, eval, requireStrictAlleleMatch)) {
        case STRICT:
          return comp;
        case LENIENT:
          if (lenientMatch == null) lenientMatch = comp;
          break;
        case NO_MATCH:
          // do nothing
      }
    }

    // nothing matched, just return lenientMatch, which might be null
    return lenientMatch;
  }
  /**
   * this is the function that the inheriting class can expect to have called when the command line
   * system has initialized.
   *
   * @return the return code to exit the program with
   */
  protected int execute() throws Exception {
    engine.setParser(parser);
    argumentSources.add(this);

    Walker<?, ?> walker = engine.getWalkerByName(getAnalysisName());

    try {
      // Make sure a valid GATK user key is present, if required.
      authorizeGATKRun();

      engine.setArguments(getArgumentCollection());

      // File lists can require a bit of additional expansion.  Set these explicitly by the engine.
      engine.setSAMFileIDs(
          ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles, parser));

      engine.setWalker(walker);
      walker.setToolkit(engine);

      Collection<ReadFilter> filters = engine.createFilters();
      engine.setFilters(filters);

      // load the arguments into the walker / filters.
      // TODO: The fact that this extra load call exists here when all the parsing happens at the
      // engine
      // TODO: level indicates that we're doing something wrong.  Turn this around so that the GATK
      // can drive
      // TODO: argument processing.
      loadArgumentsIntoObject(walker);
      argumentSources.add(walker);

      Collection<RMDTriplet> rodBindings =
          ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
      engine.setReferenceMetaDataFiles(rodBindings);

      for (ReadFilter filter : filters) {
        loadArgumentsIntoObject(filter);
        argumentSources.add(filter);
      }

      engine.execute();
      generateGATKRunReport(walker);
    } catch (Exception e) {
      generateGATKRunReport(walker, e);
      throw e;
    }

    // always return 0
    return 0;
  }
Exemple #3
0
  private boolean isConcordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null || compVCs == null || compVCs.isEmpty()) return false;

    // if we're not looking for specific samples then the fact that we have both VCs is enough to
    // call it concordant.
    if (NO_SAMPLES_SPECIFIED) return true;

    // make a list of all samples contained in this variant VC that are being tracked by the user
    // command line arguments.
    Set<String> variantSamples = vc.getSampleNames();
    variantSamples.retainAll(samples);

    // check if we can find all samples from the variant rod in the comp rod.
    for (String sample : variantSamples) {
      boolean foundSample = false;
      for (VariantContext compVC : compVCs) {
        Genotype varG = vc.getGenotype(sample);
        Genotype compG = compVC.getGenotype(sample);
        if (haveSameGenotypes(varG, compG)) {
          foundSample = true;
          break;
        }
      }
      // if at least one sample doesn't have the same genotype, we don't have concordance
      if (!foundSample) {
        return false;
      }
    }
    return true;
  }
Exemple #4
0
  /**
   * Checks if vc has a variant call for (at least one of) the samples.
   *
   * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for
   *     discordances to (e.g. HapMap)
   * @param compVCs the comparison VariantContext (discordance
   * @return
   */
  private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null) return false;

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty());

    // check if we find it in the variant rod
    Map<String, Genotype> genotypes = vc.getGenotypes(samples);
    for (Genotype g : genotypes.values()) {
      if (sampleHasVariant(g)) {
        // There is a variant called (or filtered with not exclude filtered option set) that is not
        // HomRef for at least one of the samples.
        if (compVCs == null) return true;
        // Look for this sample in the all vcs of the comp ROD track.
        boolean foundVariant = false;
        for (VariantContext compVC : compVCs) {
          if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
            foundVariant = true;
            break;
          }
        }
        // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
        if (!foundVariant) return true;
      }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
  }
  /**
   * GATK provides the walker as an argument source.
   *
   * @return List of walkers to load dynamically.
   */
  @Override
  protected Class[] getArgumentSources() {
    // No walker info?  No plugins.
    if (getAnalysisName() == null) return new Class[] {};

    Collection<Class> argumentSources = new ArrayList<Class>();

    Walker walker = engine.getWalkerByName(getAnalysisName());
    engine.setArguments(getArgumentCollection());
    engine.setWalker(walker);
    walker.setToolkit(engine);
    argumentSources.add(walker.getClass());

    Collection<ReadFilter> filters = engine.createFilters();
    for (ReadFilter filter : filters) argumentSources.add(filter.getClass());

    Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
    return argumentSources.toArray(argumentSourcesAsArray);
  }
  /** Collect relevant information from each variant in the supplied VCFs */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    for (NewEvaluationContext nec : evaluationContexts.values()) {
      synchronized (nec) {
        nec.update0(tracker, ref, context);
      }
    }

    if (tracker != null) {
      String aastr =
          (ancestralAlignments == null)
              ? null
              : new String(
                  ancestralAlignments
                      .getSubsequenceAt(
                          ref.getLocus().getContig(),
                          ref.getLocus().getStart(),
                          ref.getLocus().getStop())
                      .getBases());

      //      --------- track ---------           sample  - VariantContexts -
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, comps, byFilterIsEnabled, false, false, false);

      // for each eval track
      for (final RodBinding<VariantContext> evalRod : evals) {
        final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap();
        final Map<String, Collection<VariantContext>> evalSet =
            evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap;

        // for each sample stratifier
        for (final String sampleName : sampleNamesForStratification) {
          Collection<VariantContext> evalSetBySample = evalSet.get(sampleName);
          if (evalSetBySample == null) {
            evalSetBySample = new HashSet<VariantContext>(1);
            evalSetBySample.add(null);
          }

          // for each eval in the track
          for (VariantContext eval : evalSetBySample) {
            // deal with ancestral alleles if requested
            if (eval != null && aastr != null) {
              eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make();
            }

            // for each comp track
            for (final RodBinding<VariantContext> compRod : comps) {
              // no sample stratification for comps
              final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod);
              final Collection<VariantContext> compSet =
                  (compSetHash == null || compSetHash.size() == 0)
                      ? Collections.<VariantContext>emptyList()
                      : compVCs.get(compRod).values().iterator().next();

              // find the comp
              final VariantContext comp = findMatchingComp(eval, compSet);

              HashMap<VariantStratifier, List<String>> stateMap =
                  new HashMap<VariantStratifier, List<String>>();
              for (VariantStratifier vs : stratificationObjects) {
                List<String> states =
                    vs.getRelevantStates(
                        ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
                stateMap.put(vs, states);
              }

              ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
              variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);

              HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);

              for (StateKey stateKey : stateKeysHash) {
                NewEvaluationContext nec = evaluationContexts.get(stateKey);

                // eval against the comp
                synchronized (nec) {
                  nec.apply(tracker, ref, context, comp, eval);
                }

                // eval=null against all comps of different type that aren't bound to another eval
                for (VariantContext otherComp : compSet) {
                  if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) {
                    synchronized (nec) {
                      nec.apply(tracker, ref, context, otherComp, null);
                    }
                  }
                }
              }
            }
          }
        }

        if (mergeEvals) break; // stop processing the eval tracks
      }
    }

    return null;
  }
Exemple #7
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }
Exemple #8
0
  /** Collect relevant information from each variant in the supplied VCFs */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    // we track the processed bp and expose this for modules instead of wasting CPU power on
    // calculating
    // the same thing over and over in evals that want the processed bp
    synchronized (this) {
      nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
    }

    if (tracker != null) {
      String aastr =
          (ancestralAlignments == null)
              ? null
              : new String(
                  ancestralAlignments
                      .getSubsequenceAt(
                          ref.getLocus().getContig(),
                          ref.getLocus().getStart(),
                          ref.getLocus().getStop())
                      .getBases());

      //            // update the dynamic stratifications
      //            for (final VariantContext vc : tracker.getValues(evals, ref.getLocus())) {
      //                // don't worry -- DynamicStratification only work with one eval object
      //                for ( final DynamicStratification ds :  dynamicStratifications ) {
      //                    ds.update(vc);
      //                }
      //            }

      //      --------- track ---------           sample  - VariantContexts -
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, comps, byFilterIsEnabled, false, false, false);

      // for each eval track
      for (final RodBinding<VariantContext> evalRod : evals) {
        final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap();
        final Map<String, Collection<VariantContext>> evalSet =
            evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap;

        // for each sample stratifier
        for (final String sampleName : sampleNamesForStratification) {
          Collection<VariantContext> evalSetBySample = evalSet.get(sampleName);
          if (evalSetBySample == null) {
            evalSetBySample = new HashSet<VariantContext>(1);
            evalSetBySample.add(null);
          }

          // for each eval in the track
          for (VariantContext eval : evalSetBySample) {
            // deal with ancestral alleles if requested
            if (eval != null && aastr != null) {
              eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make();
            }

            // for each comp track
            for (final RodBinding<VariantContext> compRod : comps) {
              // no sample stratification for comps
              final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod);
              final Collection<VariantContext> compSet =
                  (compSetHash == null || compSetHash.size() == 0)
                      ? Collections.<VariantContext>emptyList()
                      : compVCs.get(compRod).values().iterator().next();

              // find the comp
              final VariantContext comp = findMatchingComp(eval, compSet);

              for (EvaluationContext nec :
                  getEvaluationContexts(
                      tracker, ref, eval, evalRod.getName(), comp, compRod.getName(), sampleName)) {

                // eval against the comp
                synchronized (nec) {
                  nec.apply(tracker, ref, context, comp, eval);
                }

                // eval=null against all comps of different type that aren't bound to another eval
                for (VariantContext otherComp : compSet) {
                  if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) {
                    synchronized (nec) {
                      nec.apply(tracker, ref, context, otherComp, null);
                    }
                  }
                }
              }
            }
          }
        }

        if (mergeEvals) break; // stop processing the eval tracks
      }
    }

    return null;
  }