Esempio n. 1
0
 /**
  * Given specific eval and comp VCs and the sample name, return an iterable over all of the
  * applicable state keys.
  *
  * <p>this code isn't structured yet for efficiency. Here we currently are doing the following
  * inefficient algorithm:
  *
  * <p>for each strat: get list of relevant states that eval and comp according to strat add this
  * list of states to a list of list states
  *
  * <p>then
  *
  * <p>ask the strat manager to look up all of the keys associated with the combinations of these
  * states. For example, suppose we have a single variant S. We have active strats EvalRod,
  * CompRod, and Novelty. We produce a list that looks like:
  *
  * <p>L = [[Eval], [Comp], [All, Novel]]
  *
  * <p>We then go through the strat manager tree to produce the keys associated with these states:
  *
  * <p>K = [0, 1] where EVAL x COMP x ALL = 0 and EVAL x COMP x NOVEL = 1
  *
  * <p>It's clear that a better
  *
  * <p>TODO -- create an inline version that doesn't create the intermediate list of list
  *
  * @param tracker
  * @param ref
  * @param eval
  * @param evalName
  * @param comp
  * @param compName
  * @param sampleName
  * @return
  */
 protected Collection<EvaluationContext> getEvaluationContexts(
     final RefMetaDataTracker tracker,
     final ReferenceContext ref,
     final VariantContext eval,
     final String evalName,
     final VariantContext comp,
     final String compName,
     final String sampleName) {
   final List<List<Object>> states = new LinkedList<List<Object>>();
   for (final VariantStratifier vs : stratManager.getStratifiers()) {
     states.add(vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName));
   }
   return stratManager.values(states);
 }
Esempio n. 2
0
 final void checkForIncompatibleEvaluatorsAndStratifiers(
     final List<VariantStratifier> stratificationObjects,
     Set<Class<? extends VariantEvaluator>> evaluationClasses) {
   for (final VariantStratifier vs : stratificationObjects) {
     for (Class<? extends VariantEvaluator> ec : evaluationClasses)
       if (vs.getIncompatibleEvaluators().contains(ec))
         throw new UserException.BadArgumentValue(
             "ST and ET",
             "The selected stratification "
                 + vs.getName()
                 + " and evaluator "
                 + ec.getSimpleName()
                 + " are incompatible due to combinatorial memory requirements."
                 + " Please disable one");
   }
 }
  /**
   * Output the finalized report
   *
   * @param result an integer that doesn't get used for anything
   */
  public void onTraversalDone(Integer result) {
    logger.info("Finalizing variant report");

    for (StateKey stateKey : evaluationContexts.keySet()) {
      NewEvaluationContext nec = evaluationContexts.get(stateKey);

      for (VariantEvaluator ve : nec.getEvaluationClassList().values()) {
        ve.finalizeEvaluation();

        AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
        Map<Field, DataPoint> datamap = scanner.getData();

        for (Field field : datamap.keySet()) {
          try {
            field.setAccessible(true);

            if (field.get(ve) instanceof TableType) {
              TableType t = (TableType) field.get(ve);

              String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
              final DataPoint dataPointAnn = datamap.get(field);

              GATKReportTable table;
              if (!report.hasTable(subTableName)) {
                report.addTable(subTableName, dataPointAnn.description());
                table = report.getTable(subTableName);

                table.addPrimaryKey("entry", false);
                table.addColumn(subTableName, subTableName);

                for (VariantStratifier vs : stratificationObjects) {
                  table.addColumn(vs.getName(), "unknown");
                }

                table.addColumn("row", "unknown");

                for (Object o : t.getColumnKeys()) {
                  String c;

                  if (o instanceof String) {
                    c = (String) o;
                  } else {
                    c = o.toString();
                  }

                  table.addColumn(c, 0.0);
                }
              } else {
                table = report.getTable(subTableName);
              }

              for (int row = 0; row < t.getRowKeys().length; row++) {
                String r = (String) t.getRowKeys()[row];

                for (VariantStratifier vs : stratificationObjects) {
                  final String columnName = vs.getName();
                  table.set(stateKey.toString() + r, columnName, stateKey.get(columnName));
                }

                for (int col = 0; col < t.getColumnKeys().length; col++) {
                  String c;
                  if (t.getColumnKeys()[col] instanceof String) {
                    c = (String) t.getColumnKeys()[col];
                  } else {
                    c = t.getColumnKeys()[col].toString();
                  }

                  String newStateKey = stateKey.toString() + r;
                  table.set(newStateKey, c, t.getCell(row, col));

                  table.set(newStateKey, "row", r);
                }
              }
            } else {
              GATKReportTable table = report.getTable(ve.getClass().getSimpleName());

              for (VariantStratifier vs : stratificationObjects) {
                String columnName = vs.getName();

                table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
              }

              table.set(stateKey.toString(), field.getName(), field.get(ve));
            }
          } catch (IllegalAccessException e) {
            throw new StingException("IllegalAccessException: " + e);
          }
        }
      }
    }

    report.print(out);
  }
  /** Collect relevant information from each variant in the supplied VCFs */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    for (NewEvaluationContext nec : evaluationContexts.values()) {
      synchronized (nec) {
        nec.update0(tracker, ref, context);
      }
    }

    if (tracker != null) {
      String aastr =
          (ancestralAlignments == null)
              ? null
              : new String(
                  ancestralAlignments
                      .getSubsequenceAt(
                          ref.getLocus().getContig(),
                          ref.getLocus().getStart(),
                          ref.getLocus().getStop())
                      .getBases());

      //      --------- track ---------           sample  - VariantContexts -
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
      HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs =
          variantEvalUtils.bindVariantContexts(
              tracker, ref, comps, byFilterIsEnabled, false, false, false);

      // for each eval track
      for (final RodBinding<VariantContext> evalRod : evals) {
        final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap();
        final Map<String, Collection<VariantContext>> evalSet =
            evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap;

        // for each sample stratifier
        for (final String sampleName : sampleNamesForStratification) {
          Collection<VariantContext> evalSetBySample = evalSet.get(sampleName);
          if (evalSetBySample == null) {
            evalSetBySample = new HashSet<VariantContext>(1);
            evalSetBySample.add(null);
          }

          // for each eval in the track
          for (VariantContext eval : evalSetBySample) {
            // deal with ancestral alleles if requested
            if (eval != null && aastr != null) {
              eval = new VariantContextBuilder(eval).attribute("ANCESTRALALLELE", aastr).make();
            }

            // for each comp track
            for (final RodBinding<VariantContext> compRod : comps) {
              // no sample stratification for comps
              final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod);
              final Collection<VariantContext> compSet =
                  (compSetHash == null || compSetHash.size() == 0)
                      ? Collections.<VariantContext>emptyList()
                      : compVCs.get(compRod).values().iterator().next();

              // find the comp
              final VariantContext comp = findMatchingComp(eval, compSet);

              HashMap<VariantStratifier, List<String>> stateMap =
                  new HashMap<VariantStratifier, List<String>>();
              for (VariantStratifier vs : stratificationObjects) {
                List<String> states =
                    vs.getRelevantStates(
                        ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
                stateMap.put(vs, states);
              }

              ArrayList<StateKey> stateKeys = new ArrayList<StateKey>();
              variantEvalUtils.initializeStateKeys(stateMap, null, null, stateKeys);

              HashSet<StateKey> stateKeysHash = new HashSet<StateKey>(stateKeys);

              for (StateKey stateKey : stateKeysHash) {
                NewEvaluationContext nec = evaluationContexts.get(stateKey);

                // eval against the comp
                synchronized (nec) {
                  nec.apply(tracker, ref, context, comp, eval);
                }

                // eval=null against all comps of different type that aren't bound to another eval
                for (VariantContext otherComp : compSet) {
                  if (otherComp != comp && !compHasMatchingEval(otherComp, evalSetBySample)) {
                    synchronized (nec) {
                      nec.apply(tracker, ref, context, otherComp, null);
                    }
                  }
                }
              }
            }
          }
        }

        if (mergeEvals) break; // stop processing the eval tracks
      }
    }

    return null;
  }
  /** Initialize the stratifications, evaluations, evaluation contexts, and reporting object */
  public void initialize() {
    // Just list the modules, and exit quickly.
    if (LIST) {
      variantEvalUtils.listModulesAndExit();
    }

    // maintain the full list of comps
    comps.addAll(compsProvided);
    if (dbsnp.dbsnp.isBound()) {
      comps.add(dbsnp.dbsnp);
      knowns.add(dbsnp.dbsnp);
    }

    // Add a dummy comp track if none exists
    if (comps.size() == 0)
      comps.add(
          new RodBinding<VariantContext>(VariantContext.class, "none", "UNBOUND", "", new Tags()));

    // Set up set of additional knowns
    for (RodBinding<VariantContext> compRod : comps) {
      if (KNOWN_NAMES.contains(compRod.getName())) knowns.add(compRod);
    }

    // Now that we have all the rods categorized, determine the sample list from the eval rods.
    Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // Load the sample list
    sampleNamesForEvaluation.addAll(
        SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
    numSamples = NUM_SAMPLES > 0 ? NUM_SAMPLES : sampleNamesForEvaluation.size();

    if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
      sampleNamesForStratification.addAll(sampleNamesForEvaluation);
    }
    sampleNamesForStratification.add(ALL_SAMPLE_NAME);

    // Initialize select expressions
    for (VariantContextUtils.JexlVCMatchExp jexl :
        VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
      SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
      jexlExpressions.add(sjexl);
    }

    // Initialize the set of stratifications and evaluations to use
    stratificationObjects =
        variantEvalUtils.initializeStratificationObjects(
            this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
    Set<Class<? extends VariantEvaluator>> evaluationObjects =
        variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
    for (VariantStratifier vs : getStratificationObjects()) {
      if (vs.getName().equals("Filter")) byFilterIsEnabled = true;
      else if (vs.getName().equals("Sample")) perSampleIsEnabled = true;
    }

    if (intervalsFile != null) {
      boolean fail = true;
      for (final VariantStratifier vs : stratificationObjects) {
        if (vs.getClass().equals(IntervalStratification.class)) fail = false;
      }
      if (fail)
        throw new UserException.BadArgumentValue(
            "ST", "stratIntervals argument provided but -ST IntervalStratification not provided");
    }

    // Initialize the evaluation contexts
    evaluationContexts =
        variantEvalUtils.initializeEvaluationContexts(
            stratificationObjects, evaluationObjects, null, null);

    // Initialize report table
    report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);

    // Load ancestral alignments
    if (ancestralAlignmentsFile != null) {
      try {
        ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
      } catch (FileNotFoundException e) {
        throw new ReviewedStingException(
            String.format(
                "The ancestral alignments file, '%s', could not be found",
                ancestralAlignmentsFile.getAbsolutePath()));
      }
    }

    // initialize CNVs
    if (knownCNVsFile != null) {
      knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
    }
  }