Ejemplo n.º 1
0
  private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
    Set<String> keys = new HashSet<String>();

    boolean sawGoodGT = false;
    boolean sawGoodQual = false;
    boolean sawGenotypeFilter = false;
    for (Genotype g : vc.getGenotypes().values()) {
      keys.addAll(g.getAttributes().keySet());
      if (g.isAvailable()) sawGoodGT = true;
      if (g.hasNegLog10PError()) sawGoodQual = true;
      if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
    }

    if (sawGoodQual) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);

    if (sawGenotypeFilter) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);

    List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));

    // make sure the GT is first
    if (sawGoodGT) {
      List<String> newList = new ArrayList<String>(sortedList.size() + 1);
      newList.add(VCFConstants.GENOTYPE_KEY);
      newList.addAll(sortedList);
      sortedList = newList;
    }

    return sortedList;
  }
Ejemplo n.º 2
0
  /**
   * add the genotype data
   *
   * @param vc the variant context
   * @param genotypeFormatKeys Genotype formatting string
   * @param alleleMap alleles for this context
   * @throws IOException for writer
   */
  private void addGenotypeData(
      VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
      throws IOException {

    for (String sample : mHeader.getGenotypeSamples()) {
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      Genotype g = vc.getGenotype(sample);
      if (g == null) {
        // TODO -- The VariantContext needs to know what the general ploidy is of the samples
        // TODO -- We shouldn't be assuming diploid genotypes here!
        mWriter.write(VCFConstants.EMPTY_GENOTYPE);
        continue;
      }

      List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
      for (String key : genotypeFormatKeys) {

        if (key.equals(VCFConstants.GENOTYPE_KEY)) {
          if (!g.isAvailable()) {
            throw new ReviewedStingException(
                "GTs cannot be missing for some samples if they are available for others in the record");
          }

          writeAllele(g.getAllele(0), alleleMap);
          for (int i = 1; i < g.getPloidy(); i++) {
            mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
            writeAllele(g.getAllele(i), alleleMap);
          }

          continue;
        }

        Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;

        // some exceptions
        if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
          if (Math.abs(g.getNegLog10PError() - Genotype.NO_NEG_LOG_10PERROR) < 1e-6)
            val = VCFConstants.MISSING_VALUE_v4;
          else {
            val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
          }
        } else if (key.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
          val =
              g.isFiltered()
                  ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters()))
                  : (g.filtersWereApplied()
                      ? VCFConstants.PASSES_FILTERS_v4
                      : VCFConstants.UNFILTERED);
        }

        VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
        if (metaData != null) {
          int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
          if (numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4)) {
            // If we have a missing field but multiple values are expected, we need to construct a
            // new string with all fields.
            // For example, if Number=2, the string has to be ".,."
            StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
            for (int i = 1; i < numInFormatField; i++) {
              sb.append(",");
              sb.append(VCFConstants.MISSING_VALUE_v4);
            }
            val = sb.toString();
          }
        }

        // assume that if key is absent, then the given string encoding suffices
        String outputValue = formatVCFField(val);
        if (outputValue != null) attrs.add(outputValue);
      }

      // strip off trailing missing values
      for (int i = attrs.size() - 1; i >= 0; i--) {
        if (isMissingValue(attrs.get(i))) attrs.remove(i);
        else break;
      }

      for (int i = 0; i < attrs.size(); i++) {
        if (i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY))
          mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
        mWriter.write(attrs.get(i));
      }
    }
  }