public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) {
   if (!context.isSNP() || !context.isBiallelic())
     throw new IllegalStateException(
         "Requested SNP substitution type for bialleic non-SNP " + context);
   return BaseUtils.SNPSubstitutionType(
       context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]);
 }
  @Requires({"eval != null", "comp != null"})
  private EvalCompMatchType doEvalAndCompMatch(
      final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
    // find all of the matching comps
    if (comp.getType() != eval.getType()) return EvalCompMatchType.NO_MATCH;

    // find the comp which matches both the reference allele and alternate allele from eval
    final Allele altEval =
        eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
    final Allele altComp =
        comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
    if ((altEval == null && altComp == null)
        || (altEval != null
            && altEval.equals(altComp)
            && eval.getReference().equals(comp.getReference()))) return EvalCompMatchType.STRICT;
    else return requireStrictAlleleMatch ? EvalCompMatchType.NO_MATCH : EvalCompMatchType.LENIENT;
  }
  private Map<String, Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {

    if (!stratifiedContext.hasBasePileup()) return null;

    HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
    for (Allele allele : vc.getAlternateAlleles()) alleleCounts.put(allele.getBases()[0], 0);

    ReadBackedPileup pileup = stratifiedContext.getBasePileup();
    int totalDepth = pileup.size();

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

    if (totalDepth == 0) return map; // done, can not compute FA at 0 coverage!!

    int mq0 = 0; // number of "ref" reads that are acually mq0
    for (PileupElement p : pileup) {
      if (p.getMappingQual() == 0) {
        mq0++;
        continue;
      }
      if (alleleCounts.containsKey(p.getBase())) // non-mq0 read and it's an alt
      alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase()) + 1);
    }

    if (mq0 == totalDepth) return map; // if all reads are mq0, there is nothing left to do

    // we need to add counts in the correct order
    String[] fracs = new String[alleleCounts.size()];
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
      fracs[i] =
          String.format(
              "%.3f",
              ((float) alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))
                  / (totalDepth - mq0));
    }

    map.put(getKeyNames().get(1), fracs);
    return map;
  }
Example #4
0
  public String update1(
      VariantContext vc1,
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      AlignmentContext context) {
    nCalledLoci++;

    // Note from Eric:
    // This is really not correct.  What we really want here is a polymorphic vs. monomorphic count
    // (i.e. on the Genotypes).
    // So in order to maintain consistency with the previous implementation (and the intention of
    // the original author), I've
    // added in a proxy check for monomorphic status here.
    // Protect against case when vc only as no-calls too - can happen if we strafity by sample and
    // sample as a single no-call.
    if (vc1.isMonomorphicInSamples()) {
      nRefLoci++;
    } else {
      switch (vc1.getType()) {
        case NO_VARIATION:
          // shouldn't get here
          break;
        case SNP:
          nVariantLoci++;
          nSNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case MNP:
          nVariantLoci++;
          nMNPs++;
          if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
          break;
        case INDEL:
          nVariantLoci++;
          if (vc1.isSimpleInsertion()) nInsertions++;
          else if (vc1.isSimpleDeletion()) nDeletions++;
          else nComplex++;
          break;
        case MIXED:
          nVariantLoci++;
          nMixed++;
          break;
        case SYMBOLIC:
          nSymbolic++;
          break;
        default:
          throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
      }
    }

    String refStr = vc1.getReference().getBaseString().toUpperCase();

    String aaStr =
        vc1.hasAttribute("ANCESTRALALLELE")
            ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase()
            : null;
    //        if (aaStr.equals(".")) {
    //            aaStr = refStr;
    //        }

    // ref  aa  alt  class
    // A    C   A    der homozygote
    // A    C   C    anc homozygote

    // A    A   A    ref homozygote
    // A    A   C
    // A    C   A
    // A    C   C

    for (final Genotype g : vc1.getGenotypes()) {
      final String altStr =
          vc1.getAlternateAlleles().size() > 0
              ? vc1.getAlternateAllele(0).getBaseString().toUpperCase()
              : null;

      switch (g.getType()) {
        case NO_CALL:
          nNoCalls++;
          break;
        case HOM_REF:
          nHomRef++;

          if (aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case HET:
          nHets++;
          break;
        case HOM_VAR:
          nHomVar++;

          if (aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr)) {
            nHomDerived++;
          }

          break;
        case MIXED:
          break;
        default:
          throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
      }
    }

    return null; // we don't capture any interesting sites
  }
Example #5
0
  /**
   * add a record to the file
   *
   * @param vc the Variant Context object
   * @param refBase the ref base used for indels
   * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE
   *     EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER
   *     THE EVENT INSTEAD)
   */
  public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    if (mHeader == null)
      throw new IllegalStateException(
          "The VCF Header must be written before records can be added: " + locationString());

    if (doNotWriteGenotypes) vc = VariantContext.modifyGenotypes(vc, null);

    try {
      vc =
          VariantContext.createVariantContextWithPaddedAlleles(
              vc, refBase, refBaseShouldBeAppliedToEndOfAlleles);

      // if we are doing on the fly indexing, add the record ***before*** we write any bytes
      if (indexer != null) indexer.addFeature(vc, positionalStream.getPosition());

      Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
      alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup

      // CHROM
      mWriter.write(vc.getChr());
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // POS
      mWriter.write(String.valueOf(vc.getStart()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ID
      String ID = vc.hasID() ? vc.getID() : VCFConstants.EMPTY_ID_FIELD;
      mWriter.write(ID);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // REF
      alleleMap.put(vc.getReference(), "0");
      String refString = vc.getReference().getDisplayString();
      mWriter.write(refString);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // ALT
      if (vc.isVariant()) {
        Allele altAllele = vc.getAlternateAllele(0);
        alleleMap.put(altAllele, "1");
        String alt = altAllele.getDisplayString();
        mWriter.write(alt);

        for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
          altAllele = vc.getAlternateAllele(i);
          alleleMap.put(altAllele, String.valueOf(i + 1));
          alt = altAllele.getDisplayString();
          mWriter.write(",");
          mWriter.write(alt);
        }
      } else {
        mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
      }
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // QUAL
      if (!vc.hasNegLog10PError()) mWriter.write(VCFConstants.MISSING_VALUE_v4);
      else mWriter.write(getQualValue(vc.getPhredScaledQual()));
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // FILTER
      String filters =
          vc.isFiltered()
              ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()))
              : (filtersWereAppliedToContext || vc.filtersWereApplied()
                  ? VCFConstants.PASSES_FILTERS_v4
                  : VCFConstants.UNFILTERED);
      mWriter.write(filters);
      mWriter.write(VCFConstants.FIELD_SEPARATOR);

      // INFO
      Map<String, String> infoFields = new TreeMap<String, String>();
      for (Map.Entry<String, Object> field : vc.getAttributes().entrySet()) {
        String key = field.getKey();
        if (key.equals(VariantContext.ID_KEY)
            || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)
            || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY)) continue;

        String outputValue = formatVCFField(field.getValue());
        if (outputValue != null) infoFields.put(key, outputValue);
      }
      writeInfoString(infoFields);

      // FORMAT
      if (vc.hasAttribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY)) {
        mWriter.write(VCFConstants.FIELD_SEPARATOR);
        mWriter.write(vc.getAttributeAsString(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, ""));
      } else {
        List<String> genotypeAttributeKeys = new ArrayList<String>();
        if (vc.hasGenotypes()) {
          genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
        } else if (mHeader.hasGenotypingData()) {
          // this needs to be done in case all samples are no-calls
          genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
        }

        if (genotypeAttributeKeys.size() > 0) {
          String genotypeFormatString =
              ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
          mWriter.write(VCFConstants.FIELD_SEPARATOR);
          mWriter.write(genotypeFormatString);

          addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
        }
      }

      mWriter.write("\n");
      mWriter.flush(); // necessary so that writing to an output stream will work
    } catch (IOException e) {
      throw new RuntimeException("Unable to write the VCF object to " + locationString());
    }
  }
Example #6
0
  /**
   * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for
   * printing)
   *
   * @param tracker the ROD tracker
   * @param ref reference information
   * @param context alignment info
   * @return 1 if the record was printed to the output file, 0 if otherwise
   */
  @Override
  public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
    if (tracker == null) return 0;

    Collection<VariantContext> vcs =
        tracker.getValues(variantCollection.variants, context.getLocation());

    if (vcs == null || vcs.size() == 0) {
      return 0;
    }

    for (VariantContext vc : vcs) {
      if (MENDELIAN_VIOLATIONS) {
        boolean foundMV = false;
        for (MendelianViolation mv : mvSet) {
          if (mv.isViolation(vc)) {
            foundMV = true;
            // System.out.println(vc.toString());
            if (outMVFile != null)
              outMVFileStream.format(
                  "MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, "
                      + "childG=%s childGL=%s\n",
                  vc.getChr(),
                  vc.getStart(),
                  vc.getReference().getDisplayString(),
                  vc.getAlternateAllele(0).getDisplayString(),
                  vc.getChromosomeCount(vc.getAlternateAllele(0)),
                  mv.getSampleMom(),
                  mv.getSampleDad(),
                  mv.getSampleChild(),
                  vc.getGenotype(mv.getSampleMom()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleDad()).toBriefString(),
                  vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
                  vc.getGenotype(mv.getSampleChild()).toBriefString(),
                  vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString());
          }
        }

        if (!foundMV) break;
      }
      if (DISCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(discordanceTrack, context.getLocation());
        if (!isDiscordant(vc, compVCs)) return 0;
      }
      if (CONCORDANCE_ONLY) {
        Collection<VariantContext> compVCs =
            tracker.getValues(concordanceTrack, context.getLocation());
        if (!isConcordant(vc, compVCs)) return 0;
      }

      if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
        continue;

      if (alleleRestriction.equals(NumberAlleleRestriction.MULTIALLELIC) && vc.isBiallelic())
        continue;

      if (!selectedTypes.contains(vc.getType())) continue;

      VariantContext sub = subsetRecord(vc, samples);
      if ((sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS)
          && (!sub.isFiltered() || !EXCLUDE_FILTERED)) {
        for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
          if (!VariantContextUtils.match(sub, jexl)) {
            return 0;
          }
        }
        if (SELECT_RANDOM_NUMBER) {
          randomlyAddVariant(++variantNumber, sub, ref.getBase());
        } else if (!SELECT_RANDOM_FRACTION
            || (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
          vcfWriter.add(sub);
        }
      }
    }

    return 1;
  }
  private Map<String, Object> annotateIndel(AlignmentContext stratifiedContext, VariantContext vc) {

    if (!stratifiedContext.hasExtendedEventPileup()) {
      return null;
    }

    ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
    if (pileup == null) return null;
    int totalDepth = pileup.size();

    Map<String, Object> map = new HashMap<String, Object>();
    map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

    if (totalDepth == 0) return map;
    int mq0 = 0; // number of "ref" reads that are acually mq0

    HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
    Allele refAllele = vc.getReference();

    for (Allele allele : vc.getAlternateAlleles()) {

      if (allele.isNoCall()) {
        continue; // this does not look so good, should we die???
      }

      alleleCounts.put(getAlleleRepresentation(allele), 0);
    }

    for (ExtendedEventPileupElement e : pileup.toExtendedIterable()) {

      if (e.getMappingQual() == 0) {
        mq0++;
        continue;
      }

      if (e.isInsertion()) {

        final String b = e.getEventBases();
        if (alleleCounts.containsKey(b)) {
          alleleCounts.put(b, alleleCounts.get(b) + 1);
        }

      } else {
        if (e.isDeletion()) {
          if (e.getEventLength() == refAllele.length()) {
            // this is indeed the deletion allele recorded in VC
            final String b = DEL;
            if (alleleCounts.containsKey(b)) {
              alleleCounts.put(b, alleleCounts.get(b) + 1);
            }
          }
          //                    else {
          //                        System.out.print("   deletion of WRONG length found");
          //                    }
        }
      }
    }

    if (mq0 == totalDepth) return map;

    String[] fracs = new String[alleleCounts.size()];
    for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
      fracs[i] =
          String.format(
              "%.3f",
              ((float) alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))
                  / (totalDepth - mq0));

    map.put(getKeyNames().get(1), fracs);

    // map.put(getKeyNames().get(0), counts);
    return map;
  }