Esempio n. 1
0
 @Override
 public void update2(
     VariantContext eval,
     VariantContext comp,
     RefMetaDataTracker tracker,
     ReferenceContext ref,
     AlignmentContext context) {
   if (comp != null) { // we only need to consider sites in comp
     if (REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp)))
       nDifferentAlleleSites++;
     else {
       SiteStatus evalStatus = calcSiteStatus(eval);
       final Set<String> evalSamples = getWalker().getSampleNamesForEvaluation();
       if (comp.hasGenotypes() && !evalSamples.isEmpty() && comp.hasGenotypes(evalSamples))
         // if we have genotypes in both eval and comp, subset comp down just the samples in eval
         comp = comp.subContextFromSamples(evalSamples, false);
       SiteStatus compStatus = calcSiteStatus(comp);
       counts[compStatus.ordinal()][evalStatus.ordinal()]++;
     }
   }
 }
  /**
   * Update the attributes of the attributes map given the VariantContext to reflect the proper
   * chromosome-based VCF tags
   *
   * @param vc the VariantContext
   * @param attributes the attributes map to populate; must not be null; may contain old values
   * @param removeStaleValues should we remove stale values from the mapping?
   * @return the attributes map provided as input, returned for programming convenience
   */
  public static Map<String, Object> calculateChromosomeCounts(
      VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
    final int AN = vc.getCalledChrCount();

    // if everyone is a no-call, remove the old attributes if requested
    if (AN == 0 && removeStaleValues) {
      if (attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY))
        attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
      if (attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY))
        attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
      if (attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY))
        attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
      return attributes;
    }

    if (vc.hasGenotypes()) {
      attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN);

      // if there are alternate alleles, record the relevant tags
      if (vc.getAlternateAlleles().size() > 0) {
        final ArrayList<String> alleleFreqs = new ArrayList<String>();
        final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
        for (Allele allele : vc.getAlternateAlleles()) {
          int altChromosomes = vc.getCalledChrCount(allele);
          alleleCounts.add(altChromosomes);
          if (AN == 0) {
            alleleFreqs.add("0.0");
          } else {
            // todo -- this is a performance problem
            final String freq =
                String.format(
                    makePrecisionFormatStringFromDenominatorValue((double) AN),
                    ((double) altChromosomes / (double) AN));
            alleleFreqs.add(freq);
          }
        }

        attributes.put(
            VCFConstants.ALLELE_COUNT_KEY,
            alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
        attributes.put(
            VCFConstants.ALLELE_FREQUENCY_KEY,
            alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
      } else {
        attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0);
        attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
      }
    }

    return attributes;
  }
Esempio n. 3
0
  //
  // helper routines
  //
  private SiteStatus calcSiteStatus(VariantContext vc) {
    if (vc == null) return SiteStatus.NO_CALL;
    if (vc.isFiltered()) return SiteStatus.FILTERED;
    if (vc.isMonomorphicInSamples()) return SiteStatus.MONO;
    if (vc.hasGenotypes())
      return SiteStatus
          .POLY; // must be polymorphic if isMonomorphicInSamples was false and there are genotypes

    if (vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
      int ac = 0;
      if (vc.getNAlleles() > 2) {
        return SiteStatus.POLY;
      } else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
      return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
    } else {
      return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED
          ? SiteStatus.POLY
          : SiteStatus.NO_CALL; // we can't figure out what to do
    }
  }
Esempio n. 4
0
 public static boolean canBeOutputToBeagle(VariantContext v) {
   return v != null && !v.isFiltered() && v.isBiallelic() && v.hasGenotypes();
 }
Esempio n. 5
0
  /**
   * add a record to the file
   *
   * @param vc the Variant Context object
   * @param refBase the ref base used for indels
   * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE
   *     EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER
   *     THE EVENT INSTEAD)
   */
  public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    if (mHeader == null)
      throw new IllegalStateException(
          "The VCF Header must be written before records can be added: " + locationString());

    if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null);

    try {
      vc =
          VariantContext.createVariantContextWithPaddedAlleles(
              vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);

      // if we are doing on the fly indexing, add the record ***before*** we write any bytes
      if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition());

      Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
      alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup

      // CHROM
      mWriter.write(vc.getChr());
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // POS
      mWriter.write(String.valueOf(vc.getStart()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ID
      String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD;
      mWriter.write(ID);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // REF
      alleleMap.put(vc.getReference(), "0");
      String refString = vc.getReference().getDisplayString();
      mWriter.write(refString);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ALT
      if (vc.isVariant()) {
        Allele altAllele = vc.getAlternateAllele(0);
        alleleMap.put(altAllele, "1");
        String alt = altAllele.getDisplayString();
        mWriter.write(alt);

        for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
          altAllele = vc.getAlternateAllele(i);
          alleleMap.put(altAllele, String.valueOf(i + 1));
          alt = altAllele.getDisplayString();
          mWriter.write(",");
          mWriter.write(alt);
        }
      } else {
        mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
      }
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // QUAL
      if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4);
      else mWriter.write(getQualValue(vc.getPhredScaledQual()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // FILTER
      String filters =
          vc.isFiltered()
              ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()))
              : (filtersWereAppliedToContext || vc.filtersWereApplied()
                  ? VCFConstants.PASSES_FILTERS_v4
                  : VCFConstants.UNFILTERED);
      mWriter.write(filters);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // INFO
      Map<String, String> infoFields = new TreeMap<String, String>();
      for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) {
        String key = field.getKey();
        if (key.equals(VariantContext.ID_KEY)
            || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue;

        String outputValue = formatVCFField(field.getValue());
        if (outputValue != null) infoFields.put(key, outputValue);
      }
      writeInfoString(infoFields);

      // FORMAT
      if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) {
        mWriter.write(VCFConstants.FIELD_SEPARATOR);
        mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, ""));
      } else {
        List<String> genotypeAttributeKeys = new ArrayList<String>();
        if (vc.hasGenotypes()) {
          genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
        } else if (mHeader.hasGenotypingData()) {
          // this needs to be done in case all samples are no-calls
          genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
        }

        if (genotypeAttributeKeys.size() > 0) {
          String genotypeFormatString =
              ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
          mWriter.write(VCFConstants.FIELD_SEPARATOR);
          mWriter.write(genotypeFormatString);

          addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
        }
      }

      mWriter.write("\n");
      mWriter.flush(); // necessary so that writing to an output stream will work
    } catch (IOException e) {
      throw new RuntimeException("Unable to write the VCF object to " + locationString());
    }
  }