@Override public void initialize( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified // -A SnpEff // without providing a SnpEff rod via --snpEffFile): if (!isValidRodBinding(walker.getSnpEffRodBinding())) { canAnnotate = false; return; } RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); // Make sure that the SnpEff version number and command-line header lines are present in the VCF // header of // the SnpEff rod, and that the file was generated by a supported version of SnpEff: VCFHeader snpEffVCFHeader = GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())) .get(snpEffRodBinding.getName()); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) { canAnnotate = false; return; } // If everything looks ok, add the SnpEff version number and command-line header lines to the // header of the VCF output file, changing the key names so that our output file won't be // mistaken in the future for a SnpEff output file: headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); // Can only be called from VariantAnnotator if (!(walker instanceof VariantAnnotator)) { if (walker != null) logger.warn( "Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName()); else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator"); canAnnotate = false; return; } }
private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }
/** * Gets the header lines for the VCF writer * * @return A set of VCF header lines */ private static Set<VCFHeaderLine> getHeaderInfo() { Set<VCFHeaderLine> headerLines = new HashSet<>(); // INFO fields for overall data headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AVG_INTERVAL_DP_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.INTERVAL_GC_CONTENT_KEY)); headerLines.add( new VCFInfoHeaderLine( "Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode")); // FORMAT fields for each genotype headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); headerLines.add( GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.AVG_INTERVAL_DP_BY_SAMPLE_KEY)); headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.LOW_COVERAGE_LOCI)); headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.ZERO_COVERAGE_LOCI)); // FILTER fields for (CallableStatus stat : CallableStatus.values()) headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description)); return headerLines; }
/** Prepare the output file and the list of available features. */ public void initialize() { if (LIST) { AnnotationHelpUtils.listAnnotations(); System.exit(0); } // get the list of all sample names from the variant VCF input rod, if applicable final List<String> rodName = Arrays.asList(variantCollection.variants.getName()); final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); if (USE_ALL_ANNOTATIONS) engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); else engine = new VariantAnnotatorEngine( annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); engine.setExpressionAlleleConcordance(expressionAlleleConcordance); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the // old ones final Set<VCFHeaderLine> hInfo = new HashSet<>(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) { if (isUniqueHeaderLine(line, hInfo)) hInfo.add(line); } // for the expressions, pull the info header line from the header of the resource rod for (final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions()) { // special case the ID field if (expression.fieldName.equals("ID")) { hInfo.add( new VCFInfoHeaderLine( expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource")); continue; } VCFInfoHeaderLine targetHeaderLine = null; for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName()))) { if (line instanceof VCFInfoHeaderLine) { final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine) line; if (infoline.getID().equals(expression.fieldName)) { targetHeaderLine = infoline; break; } } } if (targetHeaderLine != null) { if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER) hInfo.add( new VCFInfoHeaderLine( expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); else hInfo.add( new VCFInfoHeaderLine( expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); } else { hInfo.add( new VCFInfoHeaderLine( expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); } } engine.makeHeaderInfoMap(hInfo); engine.invokeAnnotationInitializationMethods(hInfo); VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }
/** Parse the familial relationship specification, and initialize VCF writer */ public void initialize() { List<String> rodNames = new ArrayList<String>(); rodNames.add(variantCollection.variants.getName()); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set<String> vcfSamples = SampleUtils.getSampleList( vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // set up tumor and normal samples if (!vcfSamples.contains(normalSample)) throw new UserException.BadArgumentValue( "--normalSample", "the normal sample " + normalSample + " doesn't match any sample from the input VCF"); if (!vcfSamples.contains(tumorSample)) throw new UserException.BadArgumentValue( "--tumorSample", "the tumor sample " + tumorSample + " doesn't match any sample from the input VCF"); logger.info("Normal sample: " + normalSample); logger.info("Tumor sample: " + tumorSample); Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>(); headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit())); headerLines.add( new VCFInfoHeaderLine( VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Is this a confidently called somatic mutation")); headerLines.add( new VCFInfoHeaderLine( SOMATIC_LOD_TAG_NAME, 1, VCFHeaderLineType.Float, "log10 probability that the site is a somatic mutation")); headerLines.add( new VCFInfoHeaderLine( SOMATIC_AC_TAG_NAME, 1, VCFHeaderLineType.Integer, "Allele count of samples with somatic event")); headerLines.add( new VCFInfoHeaderLine( SOMATIC_NONREF_TAG_NAME, 1, VCFHeaderLineType.Integer, "Number of samples with somatic event")); samples.add(normalSample); samples.add(tumorSample); vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); }