예제 #1
0
파일: SnpEff.java 프로젝트: wenjiany/gatk
  @Override
  public void initialize(
      AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) {
    // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified
    // -A SnpEff
    // without providing a SnpEff rod via --snpEffFile):
    if (!isValidRodBinding(walker.getSnpEffRodBinding())) {
      canAnnotate = false;
      return;
    }

    RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();

    // Make sure that the SnpEff version number and command-line header lines are present in the VCF
    // header of
    // the SnpEff rod, and that the file was generated by a supported version of SnpEff:
    VCFHeader snpEffVCFHeader =
        GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName()))
            .get(snpEffRodBinding.getName());
    VCFHeaderLine snpEffVersionLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
    VCFHeaderLine snpEffCommandLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);

    if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) {
      canAnnotate = false;
      return;
    }

    // If everything looks ok, add the SnpEff version number and command-line header lines to the
    // header of the VCF output file, changing the key names so that our output file won't be
    // mistaken in the future for a SnpEff output file:
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));

    // Can only be called from VariantAnnotator
    if (!(walker instanceof VariantAnnotator)) {
      if (walker != null)
        logger.warn(
            "Annotation will not be calculated, must be called from VariantAnnotator, not "
                + walker.getClass().getName());
      else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
      canAnnotate = false;
      return;
    }
  }
예제 #2
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
예제 #3
0
  /**
   * Gets the header lines for the VCF writer
   *
   * @return A set of VCF header lines
   */
  private static Set<VCFHeaderLine> getHeaderInfo() {
    Set<VCFHeaderLine> headerLines = new HashSet<>();

    // INFO fields for overall data
    headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AVG_INTERVAL_DP_KEY));
    headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY));
    headerLines.add(
        new VCFInfoHeaderLine(
            "Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));

    // FORMAT fields for each genotype
    headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
    headerLines.add(
        GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.LOW_COVERAGE_LOCI));
    headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ZERO_COVERAGE_LOCI));

    // FILTER fields
    for (CallableStatus stat : CallableStatus.values())
      headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));

    return headerLines;
  }
예제 #4
0
  /** Prepare the output file and the list of available features. */
  public void initialize() {

    if (LIST) {
      AnnotationHelpUtils.listAnnotations();
      System.exit(0);
    }

    // get the list of all sample names from the variant VCF input rod, if applicable
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);

    if (USE_ALL_ANNOTATIONS)
      engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
    else
      engine =
          new VariantAnnotatorEngine(
              annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
    engine.initializeExpressions(expressionsToUse);
    engine.setExpressionAlleleConcordance(expressionAlleleConcordance);

    // setup the header fields
    // note that if any of the definitions conflict with our new ones, then we want to overwrite the
    // old ones
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    hInfo.addAll(engine.getVCFAnnotationDescriptions());
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
      if (isUniqueHeaderLine(line, hInfo)) hInfo.add(line);
    }
    // for the expressions, pull the info header line from the header of the resource rod
    for (final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions()) {
      // special case the ID field
      if (expression.fieldName.equals("ID")) {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                1,
                VCFHeaderLineType.String,
                "ID field transferred from external VCF resource"));
        continue;
      }
      VCFInfoHeaderLine targetHeaderLine = null;
      for (final VCFHeaderLine line :
          GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName()))) {
        if (line instanceof VCFInfoHeaderLine) {
          final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine) line;
          if (infoline.getID().equals(expression.fieldName)) {
            targetHeaderLine = infoline;
            break;
          }
        }
      }

      if (targetHeaderLine != null) {
        if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER)
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCount(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
        else
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCountType(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
      } else {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                VCFHeaderLineCount.UNBOUNDED,
                VCFHeaderLineType.String,
                "Value transferred from another external VCF resource"));
      }
    }

    engine.makeHeaderInfoMap(hInfo);
    engine.invokeAnnotationInitializationMethods(hInfo);

    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }
  /** Parse the familial relationship specification, and initialize VCF writer */
  public void initialize() {
    List<String> rodNames = new ArrayList<String>();
    rodNames.add(variantCollection.variants.getName());

    Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(
            vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // set up tumor and normal samples
    if (!vcfSamples.contains(normalSample))
      throw new UserException.BadArgumentValue(
          "--normalSample",
          "the normal sample " + normalSample + " doesn't match any sample from the input VCF");
    if (!vcfSamples.contains(tumorSample))
      throw new UserException.BadArgumentValue(
          "--tumorSample",
          "the tumor sample " + tumorSample + " doesn't match any sample from the input VCF");

    logger.info("Normal sample: " + normalSample);
    logger.info("Tumor  sample: " + tumorSample);

    Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
    headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit()));
    headerLines.add(
        new VCFInfoHeaderLine(
            VCFConstants.SOMATIC_KEY,
            0,
            VCFHeaderLineType.Flag,
            "Is this a confidently called somatic mutation"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_LOD_TAG_NAME,
            1,
            VCFHeaderLineType.Float,
            "log10 probability that the site is a somatic mutation"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_AC_TAG_NAME,
            1,
            VCFHeaderLineType.Integer,
            "Allele count of samples with somatic event"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_NONREF_TAG_NAME,
            1,
            VCFHeaderLineType.Integer,
            "Number of samples with somatic event"));

    samples.add(normalSample);
    samples.add(tumorSample);
    vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
  }