예제 #1
0
  private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) {
    if (!wroteHeader) {
      wroteHeader = true;

      // setup the header fields
      Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
      hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
      hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));

      allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
      for (VCFHeaderLine field : hInfo) {
        if (field instanceof VCFFormatHeaderLine) {
          allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine) field).getID());
        }
      }

      samples = new LinkedHashSet<String>();
      if (sampleName != null) {
        samples.add(sampleName);
      } else {
        // try VCF first
        samples =
            SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName()));

        if (samples.isEmpty()) {
          List<Feature> features = tracker.getValues(variants, loc);
          if (features.size() == 0)
            throw new IllegalStateException(
                "No rod data is present, but we just created a VariantContext");

          Feature f = features.get(0);
          if (f instanceof RawHapMapFeature)
            samples.addAll(Arrays.asList(((RawHapMapFeature) f).getSampleIDs()));
          else samples.addAll(vc.getSampleNames());
        }
      }

      vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
    }

    vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
    vcfwriter.add(vc);
  }
  /** Parse the familial relationship specification, and initialize VCF writer */
  public void initialize() {
    List<String> rodNames = new ArrayList<String>();
    rodNames.add(variantCollection.variants.getName());

    Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
    Set<String> vcfSamples =
        SampleUtils.getSampleList(
            vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

    // set up tumor and normal samples
    if (!vcfSamples.contains(normalSample))
      throw new UserException.BadArgumentValue(
          "--normalSample",
          "the normal sample " + normalSample + " doesn't match any sample from the input VCF");
    if (!vcfSamples.contains(tumorSample))
      throw new UserException.BadArgumentValue(
          "--tumorSample",
          "the tumor sample " + tumorSample + " doesn't match any sample from the input VCF");

    logger.info("Normal sample: " + normalSample);
    logger.info("Tumor  sample: " + tumorSample);

    Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
    headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit()));
    headerLines.add(
        new VCFInfoHeaderLine(
            VCFConstants.SOMATIC_KEY,
            0,
            VCFHeaderLineType.Flag,
            "Is this a confidently called somatic mutation"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_LOD_TAG_NAME,
            1,
            VCFHeaderLineType.Float,
            "log10 probability that the site is a somatic mutation"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_AC_TAG_NAME,
            1,
            VCFHeaderLineType.Integer,
            "Allele count of samples with somatic event"));
    headerLines.add(
        new VCFInfoHeaderLine(
            SOMATIC_NONREF_TAG_NAME,
            1,
            VCFHeaderLineType.Integer,
            "Number of samples with somatic event"));

    samples.add(normalSample);
    samples.add(tumorSample);
    vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
  }
예제 #3
0
  /** Prepare the output file and the list of available features. */
  public void initialize() {

    if (LIST) {
      AnnotationHelpUtils.listAnnotations();
      System.exit(0);
    }

    // get the list of all sample names from the variant VCF input rod, if applicable
    final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
    final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);

    if (USE_ALL_ANNOTATIONS)
      engine = new VariantAnnotatorEngine(annotationsToExclude, this, getToolkit());
    else
      engine =
          new VariantAnnotatorEngine(
              annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
    engine.initializeExpressions(expressionsToUse);
    engine.setExpressionAlleleConcordance(expressionAlleleConcordance);

    // setup the header fields
    // note that if any of the definitions conflict with our new ones, then we want to overwrite the
    // old ones
    final Set<VCFHeaderLine> hInfo = new HashSet<>();
    hInfo.addAll(engine.getVCFAnnotationDescriptions());
    for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
      if (isUniqueHeaderLine(line, hInfo)) hInfo.add(line);
    }
    // for the expressions, pull the info header line from the header of the resource rod
    for (final VariantAnnotatorEngine.VAExpression expression : engine.getRequestedExpressions()) {
      // special case the ID field
      if (expression.fieldName.equals("ID")) {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                1,
                VCFHeaderLineType.String,
                "ID field transferred from external VCF resource"));
        continue;
      }
      VCFInfoHeaderLine targetHeaderLine = null;
      for (final VCFHeaderLine line :
          GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName()))) {
        if (line instanceof VCFInfoHeaderLine) {
          final VCFInfoHeaderLine infoline = (VCFInfoHeaderLine) line;
          if (infoline.getID().equals(expression.fieldName)) {
            targetHeaderLine = infoline;
            break;
          }
        }
      }

      if (targetHeaderLine != null) {
        if (targetHeaderLine.getCountType() == VCFHeaderLineCount.INTEGER)
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCount(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
        else
          hInfo.add(
              new VCFInfoHeaderLine(
                  expression.fullName,
                  targetHeaderLine.getCountType(),
                  targetHeaderLine.getType(),
                  targetHeaderLine.getDescription()));
      } else {
        hInfo.add(
            new VCFInfoHeaderLine(
                expression.fullName,
                VCFHeaderLineCount.UNBOUNDED,
                VCFHeaderLineType.String,
                "Value transferred from another external VCF resource"));
      }
    }

    engine.makeHeaderInfoMap(hInfo);
    engine.invokeAnnotationInitializationMethods(hInfo);

    VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
    vcfWriter.writeHeader(vcfHeader);
  }