private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if (!wroteHeader) { wroteHeader = true; // setup the header fields Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for (VCFHeaderLine field : hInfo) { if (field instanceof VCFFormatHeaderLine) { allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID()); } } samples = new LinkedHashSet<String>(); if (sampleName != null) { samples.add(sampleName); } else { // try VCF first samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if (samples.isEmpty()) { List<Feature> features = tracker.getValues(variants, loc); if (features.size() == 0) throw new IllegalStateException( "No rod data is present, but we just created a VariantContext"); Feature f = features.get(0); if (f instanceof RawHapMapFeature) samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } } vcfwriter.writeHeader(new VCFHeader(hInfo, samples)); } vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); vcfwriter.add(vc); }
@Override public List<String> getKeyNames() { return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(), InfoFieldKey.IMPACT_KEY.getKeyName(), InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), InfoFieldKey.GENE_NAME_KEY.getKeyName(), InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), InfoFieldKey.EXON_ID_KEY.getKeyName()); }
@Override public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList( new VCFInfoHeaderLine( InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"), new VCFInfoHeaderLine( InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())), new VCFInfoHeaderLine( InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())), new VCFInfoHeaderLine( InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine( InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"), new VCFInfoHeaderLine( InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine( InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine( InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine( InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")); }
private List<SnpEffEffect> parseSnpEffRecord(VariantContext snpEffRecord) { List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>(); Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY); if (effectFieldValue == null) { return parsedEffects; } // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a // String. // We can have either in the case of SnpEff, since there may be one or more than one effect in // this record. List<String> individualEffects; if (effectFieldValue instanceof List) { individualEffects = (List<String>) effectFieldValue; } else { individualEffects = Arrays.asList((String) effectFieldValue); } for (String effectString : individualEffects) { String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER); if (effectNameAndMetadata.length != 2) { logger.warn( String.format( "Malformed SnpEff effect field at %s:%d, skipping: %s", snpEffRecord.getChr(), snpEffRecord.getStart(), effectString)); continue; } String effectName = effectNameAndMetadata[0]; String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1); SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata); if (parsedEffect.isWellFormed()) { parsedEffects.add(parsedEffect); } else { logger.warn( String.format( "Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"", snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString)); } } return parsedEffects; }
@Override public void initialize( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified // -A SnpEff // without providing a SnpEff rod via --snpEffFile): if (!isValidRodBinding(walker.getSnpEffRodBinding())) { canAnnotate = false; return; } RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding(); // Make sure that the SnpEff version number and command-line header lines are present in the VCF // header of // the SnpEff rod, and that the file was generated by a supported version of SnpEff: VCFHeader snpEffVCFHeader = GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())) .get(snpEffRodBinding.getName()); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) { canAnnotate = false; return; } // If everything looks ok, add the SnpEff version number and command-line header lines to the // header of the VCF output file, changing the key names so that our output file won't be // mistaken in the future for a SnpEff output file: headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); headerLines.add( new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); // Can only be called from VariantAnnotator if (!(walker instanceof VariantAnnotator)) { if (walker != null) logger.warn( "Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName()); else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator"); canAnnotate = false; return; } }
/** Prepare the output file and the list of available features. */ public void initialize() { if (LIST) { AnnotationHelpUtils.listAnnotations(); System.exit(0); } // get the list of all sample names from the variant VCF input rod, if applicable final List<String> rodName = Arrays.asList(variantCollection.variants.getName()); final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); if (USE_ALL_ANNOTATIONS) engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit()); else engine = new VariantAnnotatorEngine( annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); engine.setExpressionAlleleConcordance(expressionAlleleConcordance); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the // old ones final Set<VCFHeaderLine> hInfo = new HashSet<>(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) { if (isUniqueHeaderLine(line, hInfo)) hInfo.add(line); } // for the expressions, pull the info header line from the header of the resource rod for (final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions()) { // special case the ID field if (expression.fieldName.equals("ID")) { hInfo.add( new VCFInfoHeaderLine( expression.fullName, 1, VCFHeaderLineType.String, "ID field transferred from external VCF resource")); continue; } VCFInfoHeaderLine targetHeaderLine = null; for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName()))) { if (line instanceof VCFInfoHeaderLine) { final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine) line; if (infoline.getID().equals(expression.fieldName)) { targetHeaderLine = infoline; break; } } } if (targetHeaderLine != null) { if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER) hInfo.add( new VCFInfoHeaderLine( expression.fullName, targetHeaderLine.getCount(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); else hInfo.add( new VCFInfoHeaderLine( expression.fullName, targetHeaderLine.getCountType(), targetHeaderLine.getType(), targetHeaderLine.getDescription())); } else { hInfo.add( new VCFInfoHeaderLine( expression.fullName, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Value transferred from another external VCF resource")); } } engine.makeHeaderInfoMap(hInfo); engine.invokeAnnotationInitializationMethods(hInfo); VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); }