@Override public void initialize( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified // -A SnpEff // without providing a SnpEff rod via --snpEffFile): if (!isValidRodBinding(walker.getSnpEffRodBinding())) { canAnnotate = false; return; } RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); // Make sure that the SnpEff version number and command-line header lines are present in the VCF // header of // the SnpEff rod, and that the file was generated by a supported version of SnpEff: VCFHeader snpEffVCFHeader = GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())) .get(snpEffRodBinding.getName()); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) { canAnnotate = false; return; } // If everything looks ok, add the SnpEff version number and command-line header lines to the // header of the VCF output file, changing the key names so that our output file won't be // mistaken in the future for a SnpEff output file: headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); // Can only be called from VariantAnnotator if (!(walker instanceof VariantAnnotator)) { if (walker != null) logger.warn( "Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName()); else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator"); canAnnotate = false; return; } }
private boolean isValidRodBinding(RodBinding<VariantContext> snpEffRodBinding) { if (snpEffRodBinding == null || !snpEffRodBinding.isBound()) { logger.warn( "The SnpEff annotator requires that a SnpEff VCF output file be provided " + "as a rodbinding on the command line via the --snpEffFile option, but " + "no SnpEff rodbinding was found."); return false; } return true; }
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }
private Collection<VariantContext> getVariantContexts( RefMetaDataTracker tracker, ReferenceContext ref) { List<Feature> features = tracker.getValues(variants, ref.getLocus()); List<VariantContext> VCs = new ArrayList<VariantContext>(features.size()); for (Feature record : features) { if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) { // we need to special case the HapMap format because indels aren't handled correctly if (record instanceof RawHapMapFeature) { // is it an indel? RawHapMapFeature hapmap = (RawHapMapFeature) record; if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) { // get the dbsnp object corresponding to this record (needed to help us distinguish // between insertions and deletions) VariantContext dbsnpVC = getDbsnp(hapmap.getName()); if (dbsnpVC == null || dbsnpVC.isMixed()) continue; Map<String, Allele> alleleMap = new HashMap<String, Allele>(2); alleleMap.put( RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); alleleMap.put( RawHapMapFeature.INSERTION, Allele.create( (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions hapmap.updatePosition(dbsnpVC.getStart()); if (hapmap.getStart() < ref.getWindow().getStart()) { logger.warn( "Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); continue; } } } // ok, we might actually be able to turn this record in a variant context VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref); if (vc != null) // sometimes the track has odd stuff in it that can't be converted VCs.add(vc); } } return VCs; }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker == null || !BaseUtils.isRegularBase(ref.getBase())) return 0; Collection<VariantContext> contexts = getVariantContexts(tracker, ref); for (VariantContext vc : contexts) { VariantContextBuilder builder = new VariantContextBuilder(vc); // set the appropriate sample name if necessary if (sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName())) { Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make(); builder.genotypes(g); } final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); writeRecord(withID, tracker, ref.getLocus()); } return 1; }
public void initialize() { // Initialize VCF header if (vcfWriter != null) { Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); samples = SampleUtils.getSampleList( header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true); headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); headerLines.add( GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.GENOTYPE_AND_VALIDATE_STATUS_KEY)); vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); } // Filling in SNP calling arguments for UG UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); uac.outputMode = OutputMode.EMIT_ALL_SITES; uac.alleles = alleles; // TODO -- if we change this tool to actually validate against the called allele, then this if // statement is needed; // TODO -- for now, though, we need to be able to validate the right allele (because we only // test isVariant below) [EB] // if (!bamIsTruth) uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; else uac.MAX_DELETION_FRACTION = 1.0; if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf; final GenomeAnalysisEngine toolkit = getToolkit(); uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; snpEngine = new UnifiedGenotypingEngine( uac, FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger), toolkit); // Adding the INDEL calling arguments for UG UnifiedArgumentCollection uac_indel = uac.clone(); uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; indelEngine = new UnifiedGenotypingEngine( uac_indel, FixedAFCalculatorProvider.createThreadSafeProvider(toolkit, uac, logger), toolkit); // make sure we have callConf set to the threshold set by the UAC so we can use it later. callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING; }