private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
Example #2
0
 @Override
 public List<String> getKeyNames() {
   return Arrays.asList(
       InfoFieldKey.EFFECT_KEY.getKeyName(),
       InfoFieldKey.IMPACT_KEY.getKeyName(),
       InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
       InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
       InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
       InfoFieldKey.GENE_NAME_KEY.getKeyName(),
       InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
       InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
       InfoFieldKey.EXON_ID_KEY.getKeyName());
 }
Example #3
0
 @Override
 public List<VCFInfoHeaderLine> getDescriptions() {
   return Arrays.asList(
       new VCFInfoHeaderLine(
           InfoFieldKey.EFFECT_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
       new VCFInfoHeaderLine(
           InfoFieldKey.IMPACT_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Impact of the highest-impact effect resulting from the current variant "
               + Arrays.toString(EffectImpact.values())),
       new VCFInfoHeaderLine(
           InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Functional class of the highest-impact effect resulting from the current variant: "
               + Arrays.toString(EffectFunctionalClass.values())),
       new VCFInfoHeaderLine(
           InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Old/New codon for the highest-impact effect resulting from the current variant"),
       new VCFInfoHeaderLine(
           InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
       new VCFInfoHeaderLine(
           InfoFieldKey.GENE_NAME_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Gene name for the highest-impact effect resulting from the current variant"),
       new VCFInfoHeaderLine(
           InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Gene biotype for the highest-impact effect resulting from the current variant"),
       new VCFInfoHeaderLine(
           InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Transcript ID for the highest-impact effect resulting from the current variant"),
       new VCFInfoHeaderLine(
           InfoFieldKey.EXON_ID_KEY.getKeyName(),
           1,
           VCFHeaderLineType.String,
           "Exon ID for the highest-impact effect resulting from the current variant"));
 }
Example #4
0
  private List<SnpEffEffect> parseSnpEffRecord(VariantContext snpEffRecord) {
    List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();

    Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
    if (effectFieldValue == null) {
      return parsedEffects;
    }

    // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a
    // String.
    // We can have either in the case of SnpEff, since there may be one or more than one effect in
    // this record.
    List<String> individualEffects;
    if (effectFieldValue instanceof List) {
      individualEffects = (List<String>) effectFieldValue;
    } else {
      individualEffects = Arrays.asList((String) effectFieldValue);
    }

    for (String effectString : individualEffects) {
      String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);

      if (effectNameAndMetadata.length != 2) {
        logger.warn(
            String.format(
                "Malformed SnpEff effect field at %s:%d, skipping: %s",
                snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
        continue;
      }

      String effectName = effectNameAndMetadata[0];
      String[] effectMetadata =
          effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);

      SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);

      if (parsedEffect.isWellFormed()) {
        parsedEffects.add(parsedEffect);
      } else {
        logger.warn(
            String.format(
                "Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
                snpEffRecord.getChr(),
                snpEffRecord.getStart(),
                parsedEffect.getParseError(),
                effectString));
      }
    }

    return parsedEffects;
  }
Example #5
0
  @Override
  public void initialize(
      AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines) {
    // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified
    // -A SnpEff
    // without providing a SnpEff rod via --snpEffFile):
    if (!isValidRodBinding(walker.getSnpEffRodBinding())) {
      canAnnotate = false;
      return;
    }

    RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();

    // Make sure that the SnpEff version number and command-line header lines are present in the VCF
    // header of
    // the SnpEff rod, and that the file was generated by a supported version of SnpEff:
    VCFHeader snpEffVCFHeader =
        GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName()))
            .get(snpEffRodBinding.getName());
    VCFHeaderLine snpEffVersionLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
    VCFHeaderLine snpEffCommandLine =
        snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);

    if (!isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine)) {
      canAnnotate = false;
      return;
    }

    // If everything looks ok, add the SnpEff version number and command-line header lines to the
    // header of the VCF output file, changing the key names so that our output file won't be
    // mistaken in the future for a SnpEff output file:
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
    headerLines.add(
        new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));

    // Can only be called from VariantAnnotator
    if (!(walker instanceof VariantAnnotator)) {
      if (walker != null)
        logger.warn(
            "Annotation will not be calculated, must be called from VariantAnnotator, not "
                + walker.getClass().getName());
      else logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
      canAnnotate = false;
      return;
    }
  }
Example #6
0
  /** Prepare the output file and the list of available features. */
  public void initialize() {

    if (LIST) {
      AnnotationHelpUtils.listAnnotations();
      System.exit(0);
    }

    // get the list of all sample names from the variant VCF input rod, if applicable
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);

    if (USE_ALL_ANNOTATIONS)
      engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
    else
      engine =
          new VariantAnnotatorEngine(
              annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
    engine.initializeExpressions(expressionsToUse);
    engine.setExpressionAlleleConcordance(expressionAlleleConcordance);

    // setup the header fields
    // note that if any of the definitions conflict with our new ones, then we want to overwrite the
    // old ones
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    hInfo.addAll(engine.getVCFAnnotationDescriptions());
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
      if (isUniqueHeaderLine(line, hInfo)) hInfo.add(line);
    }
    // for the expressions, pull the info header line from the header of the resource rod
    for (final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions()) {
      // special case the ID field
      if (expression.fieldName.equals("ID")) {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                1,
                VCFHeaderLineType.String,
                "ID field transferred from external VCF resource"));
        continue;
      }
      VCFInfoHeaderLine targetHeaderLine = null;
      for (final VCFHeaderLine line :
          GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName()))) {
        if (line instanceof VCFInfoHeaderLine) {
          final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine) line;
          if (infoline.getID().equals(expression.fieldName)) {
            targetHeaderLine = infoline;
            break;
          }
        }
      }

      if (targetHeaderLine != null) {
        if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER)
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCount(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
        else
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCountType(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
      } else {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                VCFHeaderLineCount.UNBOUNDED,
                VCFHeaderLineType.String,
                "Value transferred from another external VCF resource"));
      }
    }

    engine.makeHeaderInfoMap(hInfo);
    engine.invokeAnnotationInitializationMethods(hInfo);

    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }