示例#1
0
文件: SnpEff.java 项目: wenjiany/gatk
  @Override
  public void initialize(
      AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) {
    // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified
    // -A SnpEff
    // without providing a SnpEff rod via --snpEffFile):
    if (!isValidRodBinding(walker.getSnpEffRodBinding())) {
      canAnnotate = false;
      return;
    }

    RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();

    // Make sure that the SnpEff version number and command-line header lines are present in the VCF
    // header of
    // the SnpEff rod, and that the file was generated by a supported version of SnpEff:
    VCFHeader snpEffVCFHeader =
        GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName()))
            .get(snpEffRodBinding.getName());
    VCFHeaderLine snpEffVersionLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
    VCFHeaderLine snpEffCommandLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);

    if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) {
      canAnnotate = false;
      return;
    }

    // If everything looks ok, add the SnpEff version number and command-line header lines to the
    // header of the VCF output file, changing the key names so that our output file won't be
    // mistaken in the future for a SnpEff output file:
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));

    // Can only be called from VariantAnnotator
    if (!(walker instanceof VariantAnnotator)) {
      if (walker != null)
        logger.warn(
            "Annotation will not be calculated, must be called from VariantAnnotator, not "
                + walker.getClass().getName());
      else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
      canAnnotate = false;
      return;
    }
  }
示例#2
0
文件: SnpEff.java 项目: wenjiany/gatk
 private boolean isValidRodBinding(RodBinding<VariantContext> snpEffRodBinding) {
   if (snpEffRodBinding == null || !snpEffRodBinding.isBound()) {
     logger.warn(
         "The SnpEff annotator requires that a SnpEff VCF output file be provided "
             + "as a rodbinding on the command line via the --snpEffFile option, but "
             + "no SnpEff rodbinding was found.");
     return false;
   }
   return true;
 }
示例#3
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
示例#4
0
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
示例#5
0
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0;

    Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

    for (VariantContext vc : contexts) {
      VariantContextBuilder builder = new VariantContextBuilder(vc);

      // set the appropriate sample name if necessary
      if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) {
        Genotype g =
            new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make();
        builder.genotypes(g);
      }

      final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make());
      writeRecord(withID, tracker, ref.getLocus());
    }

    return 1;
  }
  public void initialize() {

    // Initialize VCF header
    if (vcfWriter != null) {
      Map<String, VCFHeader> header =
          GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
      samples =
          SampleUtils.getSampleList(
              header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
      Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
      headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
      headerLines.add(
          GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY));
      vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
    }

    // Filling in SNP calling arguments for UG
    UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
    uac.outputMode = OutputMode.EMIT_ALL_SITES;
    uac.alleles = alleles;

    // TODO -- if we change this tool to actually validate against the called allele, then this if
    // statement is needed;
    // TODO -- for now, though, we need to be able to validate the right allele (because we only
    // test isVariant below) [EB]
    // if (!bamIsTruth)
    uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;

    if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
    if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
    else uac.MAX_DELETION_FRACTION = 1.0;
    if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
    if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf;

    final GenomeAnalysisEngine toolkit = getToolkit();
    uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
    snpEngine =
        new UnifiedGenotypingEngine(
            uac, FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger), toolkit);

    // Adding the INDEL calling arguments for UG
    UnifiedArgumentCollection uac_indel = uac.clone();
    uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
    indelEngine =
        new UnifiedGenotypingEngine(
            uac_indel,
            FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger),
            toolkit);

    // make sure we have callConf set to the threshold set by the UAC so we can use it later.
    callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;
  }