Exemplo n.º 1
0
  /**
   * Checks if vc has a variant call for (at least one of) the samples.
   *
   * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for
   *     discordances to (e.g. HapMap)
   * @param compVCs the comparison VariantContext (discordance
   * @return
   */
  private boolean isDiscordant(VariantContext vc, Collection<VariantContext> compVCs) {
    if (vc == null) return false;

    // if we're not looking at specific samples then the absence of a compVC means discordance
    if (NO_SAMPLES_SPECIFIED) return (compVCs == null || compVCs.isEmpty());

    // check if we find it in the variant rod
    Map<String, Genotype> genotypes = vc.getGenotypes(samples);
    for (Genotype g : genotypes.values()) {
      if (sampleHasVariant(g)) {
        // There is a variant called (or filtered with not exclude filtered option set) that is not
        // HomRef for at least one of the samples.
        if (compVCs == null) return true;
        // Look for this sample in the all vcs of the comp ROD track.
        boolean foundVariant = false;
        for (VariantContext compVC : compVCs) {
          if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
            foundVariant = true;
            break;
          }
        }
        // if (at least one sample) was not found in all VCs of the comp ROD, we have discordance
        if (!foundVariant) return true;
      }
    }
    return false; // we only get here if all samples have a variant in the comp rod.
  }
  /**
   * Compares the covariate report lists.
   *
   * @param diffs map where to annotate the difference.
   * @param other the argument collection to compare against.
   * @param thisRole the name for this argument collection that makes sense to the user.
   * @param otherRole the name for the other argument collection that makes sense to the end user.
   * @return <code>true</code> if a difference was found.
   */
  @Requires("diffs != null && other != null && thisRole != null && otherRole != null")
  private boolean compareRequestedCovariates(
      final Map<String, String> diffs,
      final RecalibrationArgumentCollection other,
      final String thisRole,
      final String otherRole) {

    final Set<String> beforeNames = new HashSet<>(this.COVARIATES.length);
    final Set<String> afterNames = new HashSet<>(other.COVARIATES.length);
    Utils.addAll(beforeNames, this.COVARIATES);
    Utils.addAll(afterNames, other.COVARIATES);
    final Set<String> intersect = new HashSet<>(Math.min(beforeNames.size(), afterNames.size()));
    intersect.addAll(beforeNames);
    intersect.retainAll(afterNames);

    String diffMessage = null;
    if (intersect.size()
        == 0) { // In practice this is not possible due to required covariates but...
      diffMessage =
          String.format(
              "There are no common covariates between '%s' and '%s'"
                  + " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.",
              thisRole,
              otherRole,
              thisRole,
              Utils.join(", ", this.COVARIATES),
              otherRole,
              Utils.join(",", other.COVARIATES));
    } else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) {
      beforeNames.removeAll(intersect);
      afterNames.removeAll(intersect);
      diffMessage =
          String.format(
              "There are differences in the set of covariates requested in the"
                  + " '%s' and '%s' recalibrator reports. "
                  + " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.",
              thisRole,
              otherRole,
              thisRole,
              Utils.join(", ", beforeNames),
              otherRole,
              Utils.join(", ", afterNames));
    }
    if (diffMessage != null) {
      diffs.put("covariate", diffMessage);
      return true;
    } else {
      return false;
    }
  }
 /**
  * Annotates a map with any difference encountered in a simple value report argument that differs
  * between this an another {@link RecalibrationArgumentCollection} instance.
  *
  * <p>The key of the new entry would be the name of that argument in the report file. The value is
  * a message that explains the difference to the end user.
  *
  * <p>
  *
  * <p>This method should not return any exception.
  *
  * @param diffs where to annotate the differences.
  * @param name the name of the report argument to compare.
  * @param thisValue this argument collection value for that argument.
  * @param otherValue the other collection value for that argument.
  * @param thisRole the name used to refer to this RAC report that makes sense to the end user.
  * @param otherRole the name used to refer to the other RAC report that makes sense to the end
  *     user.
  * @type T the argument Object value type.
  * @return <code>true</code> if a difference has been spotted, thus <code>diff</code> has been
  *     modified.
  */
 private <T> boolean compareSimpleReportArgument(
     final Map<String, String> diffs,
     final String name,
     final T thisValue,
     final T otherValue,
     final String thisRole,
     final String otherRole) {
   if (thisValue == null && otherValue == null) {
     return false;
   } else if (thisValue != null && thisValue.equals(otherValue)) {
     return false;
   } else {
     diffs.put(
         name,
         String.format(
             "differences between '%s' {%s} and '%s' {%s}.",
             thisRole,
             thisValue == null ? "" : thisValue,
             otherRole,
             otherValue == null ? "" : otherValue));
     return true;
   }
 }
  /**
   * Output the finalized report
   *
   * @param result an integer that doesn't get used for anything
   */
  public void onTraversalDone(Integer result) {
    logger.info("Finalizing variant report");

    for (StateKey stateKey : evaluationContexts.keySet()) {
      NewEvaluationContext nec = evaluationContexts.get(stateKey);

      for (VariantEvaluator ve : nec.getEvaluationClassList().values()) {
        ve.finalizeEvaluation();

        AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
        Map<Field, DataPoint> datamap = scanner.getData();

        for (Field field : datamap.keySet()) {
          try {
            field.setAccessible(true);

            if (field.get(ve) instanceof TableType) {
              TableType t = (TableType) field.get(ve);

              String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
              final DataPoint dataPointAnn = datamap.get(field);

              GATKReportTable table;
              if (!report.hasTable(subTableName)) {
                report.addTable(subTableName, dataPointAnn.description());
                table = report.getTable(subTableName);

                table.addPrimaryKey("entry", false);
                table.addColumn(subTableName, subTableName);

                for (VariantStratifier vs : stratificationObjects) {
                  table.addColumn(vs.getName(), "unknown");
                }

                table.addColumn("row", "unknown");

                for (Object o : t.getColumnKeys()) {
                  String c;

                  if (o instanceof String) {
                    c = (String) o;
                  } else {
                    c = o.toString();
                  }

                  table.addColumn(c, 0.0);
                }
              } else {
                table = report.getTable(subTableName);
              }

              for (int row = 0; row < t.getRowKeys().length; row++) {
                String r = (String) t.getRowKeys()[row];

                for (VariantStratifier vs : stratificationObjects) {
                  final String columnName = vs.getName();
                  table.set(stateKey.toString() + r, columnName, stateKey.get(columnName));
                }

                for (int col = 0; col < t.getColumnKeys().length; col++) {
                  String c;
                  if (t.getColumnKeys()[col] instanceof String) {
                    c = (String) t.getColumnKeys()[col];
                  } else {
                    c = t.getColumnKeys()[col].toString();
                  }

                  String newStateKey = stateKey.toString() + r;
                  table.set(newStateKey, c, t.getCell(row, col));

                  table.set(newStateKey, "row", r);
                }
              }
            } else {
              GATKReportTable table = report.getTable(ve.getClass().getSimpleName());

              for (VariantStratifier vs : stratificationObjects) {
                String columnName = vs.getName();

                table.set(stateKey.toString(), columnName, stateKey.get(vs.getName()));
              }

              table.set(stateKey.toString(), field.getName(), field.get(ve));
            }
          } catch (IllegalAccessException e) {
            throw new StingException("IllegalAccessException: " + e);
          }
        }
      }
    }

    report.print(out);
  }
Exemplo n.º 5
0
  /** Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */
  public void initialize() {
    // Get list of samples to include in the output
    List<String> rodNames = Arrays.asList(variantCollection.variants.getName());

    Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
    TreeSet<String> vcfSamples =
        new TreeSet<String>(
            SampleUtils.getSampleList(
                vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));

    Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
    Collection<String> samplesFromExpressions =
        SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);

    // first, add any requested samples
    samples.addAll(samplesFromFile);
    samples.addAll(samplesFromExpressions);
    samples.addAll(sampleNames);

    // if none were requested, we want all of them
    if (samples.isEmpty()) {
      samples.addAll(vcfSamples);
      NO_SAMPLES_SPECIFIED = true;
    }

    // now, exclude any requested samples
    Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
    samples.removeAll(XLsamplesFromFile);
    samples.removeAll(XLsampleNames);

    if (samples.size() == 0 && !NO_SAMPLES_SPECIFIED)
      throw new UserException(
          "All samples requested to be included were also requested to be excluded.");

    for (String sample : samples) logger.info("Including sample '" + sample + "'");

    // if user specified types to include, add these, otherwise, add all possible variant context
    // types to list of vc types to include
    if (TYPES_TO_INCLUDE.isEmpty()) {

      for (VariantContext.Type t : VariantContext.Type.values()) selectedTypes.add(t);

    } else {
      for (VariantContext.Type t : TYPES_TO_INCLUDE) selectedTypes.add(t);
    }
    // Initialize VCF header
    Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
    headerLines.add(new VCFHeaderLine("source", "SelectVariants"));

    if (KEEP_ORIGINAL_CHR_COUNTS) {
      headerLines.add(
          new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
      headerLines.add(
          new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
      headerLines.add(
          new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
    }
    vcfWriter.writeHeader(new VCFHeader(headerLines, samples));

    for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
      // It's not necessary that the user supply select names for the JEXL expressions, since those
      // expressions will only be needed for omitting records.  Make up the select names here.
      selectNames.add(String.format("select-%d", i));
    }

    jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS);

    // Look at the parameters to decide which analysis to perform
    DISCORDANCE_ONLY = discordanceTrack.isBound();
    if (DISCORDANCE_ONLY)
      logger.info(
          "Selecting only variants discordant with the track: " + discordanceTrack.getName());

    CONCORDANCE_ONLY = concordanceTrack.isBound();
    if (CONCORDANCE_ONLY)
      logger.info(
          "Selecting only variants concordant with the track: " + concordanceTrack.getName());

    if (MENDELIAN_VIOLATIONS) {
      if (FAMILY_STRUCTURE_FILE != null) {
        try {
          for (final String line : new XReadLines(FAMILY_STRUCTURE_FILE)) {
            MendelianViolation mv =
                new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD);
            if (samples.contains(mv.getSampleChild())
                && samples.contains(mv.getSampleDad())
                && samples.contains(mv.getSampleMom())) mvSet.add(mv);
          }
        } catch (FileNotFoundException e) {
          throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e);
        }
        if (outMVFile != null)
          try {
            outMVFileStream = new PrintStream(outMVFile);
          } catch (FileNotFoundException e) {
            throw new UserException.CouldNotCreateOutputFile(
                outMVFile, "Can't open output file", e);
          }
      } else
        mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
    } else if (!FAMILY_STRUCTURE.isEmpty()) {
      mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
      MENDELIAN_VIOLATIONS = true;
    }

    SELECT_RANDOM_NUMBER = numRandom > 0;
    if (SELECT_RANDOM_NUMBER) {
      logger.info("Selecting " + numRandom + " variants at random from the variant track");
      variantArray = new RandomVariantStructure[numRandom];
    }

    SELECT_RANDOM_FRACTION = fractionRandom > 0;
    if (SELECT_RANDOM_FRACTION)
      logger.info(
          "Selecting approximately "
              + 100.0 * fractionRandom
              + "% of the variants at random from the variant track");
  }