Beispiel #1
0
  /** Collect relevant information from each variant in the supplied VCFs */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    // we track the processed bp and expose this for modules instead of wasting CPU power on
    // calculating
    // the same thing over and over in evals that want the processed bp
    synchronized (this) {
      nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
    }

    if (tracker != null) {
      String aastr =
          (ancestralAlignments == null)
              ? null
              : new String(
                  ancestralAlignments
                      .getSubsequenceAt(
                          ref.getLocus().getContig(),
                          ref.getLocus().getStart(),
                          ref.getLocus().getStop())
                      .getBases());

      //            // update the dynamic stratifications
      //            for (final VariantContext vc : tracker.getValues(evals, ref.getLocus())) {
      //                // don't worry -- DynamicStratification only work with one eval object
      //                for ( final DynamicStratification ds :  dynamicStratifications ) {
      //                    ds.update(vc);
      //                }
      //            }

      //      --------- track ---------           sample  - VariantContexts -
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, comps, byFilterIsEnabled, false, false, false);

      // for each eval track
      for (final RodBinding<VariantContext> evalRod : evals) {
        final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap();
        final Map<String, Collection<VariantContext>> evalSet =
            evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap;

        // for each sample stratifier
        for (final String sampleName : sampleNamesForStratification) {
          Collection<VariantContext> evalSetBySample = evalSet.get(sampleName);
          if (evalSetBySample == null) {
            evalSetBySample = new HashSet<VariantContext>(1);
            evalSetBySample.add(null);
          }

          // for each eval in the track
          for (VariantContext eval : evalSetBySample) {
            // deal with ancestral alleles if requested
            if (eval != null && aastr != null) {
              eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make();
            }

            // for each comp track
            for (final RodBinding<VariantContext> compRod : comps) {
              // no sample stratification for comps
              final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod);
              final Collection<VariantContext> compSet =
                  (compSetHash == null || compSetHash.size() == 0)
                      ? Collections.<VariantContext>emptyList()
                      : compVCs.get(compRod).values().iterator().next();

              // find the comp
              final VariantContext comp = findMatchingComp(eval, compSet);

              for (EvaluationContext nec :
                  getEvaluationContexts(
                      tracker, ref, eval, evalRod.getName(), comp, compRod.getName(), sampleName)) {

                // eval against the comp
                synchronized (nec) {
                  nec.apply(tracker, ref, context, comp, eval);
                }

                // eval=null against all comps of different type that aren't bound to another eval
                for (VariantContext otherComp : compSet) {
                  if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) {
                    synchronized (nec) {
                      nec.apply(tracker, ref, context, otherComp, null);
                    }
                  }
                }
              }
            }
          }
        }

        if (mergeEvals) break; // stop processing the eval tracks
      }
    }

    return null;
  }
Beispiel #2
0
  /** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */
  public void initialize() {
    // Just list the modules, and exit quickly.
    if (LIST) {
      variantEvalUtils.listModulesAndExit();
    }

    // maintain the full list of comps
    comps.addAll(compsProvided);
    if (dbsnp.dbsnp.isBound()) {
      comps.add(dbsnp.dbsnp);
      knowns.add(dbsnp.dbsnp);
    }

    // Add a dummy comp track if none exists
    if (comps.size() == 0)
      comps.add(
          new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags()));

    // Set up set of additional knowns
    for (RodBinding<VariantContext> compRod : comps) {
      if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod);
    }

    // Now that we have all the rods categorized, determine the sample list from the eval rods.
    Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(
            vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // Load the sample list, using an intermediate tree set to sort the samples
    final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
    sampleNamesForEvaluation.addAll(
        new TreeSet<String>(
            SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)));
    isSubsettingSamples = !sampleNamesForEvaluation.containsAll(allSampleNames);

    if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
      sampleNamesForStratification.addAll(sampleNamesForEvaluation);
    }
    sampleNamesForStratification.add(ALL_SAMPLE_NAME);

    // Initialize select expressions
    for (VariantContextUtils.JexlVCMatchExp jexl :
        VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
      SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
      jexlExpressions.add(sjexl);
    }

    // Initialize the set of stratifications and evaluations to use
    // The list of stratifiers and evaluators to use
    final List<VariantStratifier> stratificationObjects =
        variantEvalUtils.initializeStratificationObjects(
            NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
    final Set<Class<? extends VariantEvaluator>> evaluationClasses =
        variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);

    checkForIncompatibleEvaluatorsAndStratifiers(stratificationObjects, evaluationClasses);

    for (VariantStratifier vs : stratificationObjects) {
      if (vs.getName().equals("Filter")) byFilterIsEnabled = true;
      else if (vs.getName().equals("Sample")) perSampleIsEnabled = true;
    }

    if (intervalsFile != null) {
      boolean fail = true;
      for (final VariantStratifier vs : stratificationObjects) {
        if (vs.getClass().equals(IntervalStratification.class)) fail = false;
      }
      if (fail)
        throw new UserException.BadArgumentValue(
            "ST", "stratIntervals argument provided but -ST IntervalStratification not provided");
    }

    // Initialize the evaluation contexts
    createStratificationStates(stratificationObjects, evaluationClasses);

    // Load ancestral alignments
    if (ancestralAlignmentsFile != null) {
      try {
        ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
      } catch (FileNotFoundException e) {
        throw new ReviewedStingException(
            String.format(
                "The ancestral alignments file, '%s', could not be found",
                ancestralAlignmentsFile.getAbsolutePath()));
      }
    }

    // initialize CNVs
    if (knownCNVsFile != null) {
      knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
    }
  }