Example #1
0
  // protected basic manipulation routines
  private static List<Allele> makeAlleles(Collection<Allele> alleles) {
    final List<Allele> alleleList = new ArrayList<Allele>(alleles.size());

    boolean sawRef = false;
    for (final Allele a : alleles) {
      for (final Allele b : alleleList) {
        if (a.equals(b, true))
          throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
      }

      // deal with the case where the first allele isn't the reference
      if (a.isReference()) {
        if (sawRef)
          throw new IllegalArgumentException(
              "Alleles for a VariantContext must contain at most one reference allele: " + alleles);
        alleleList.add(0, a);
        sawRef = true;
      } else alleleList.add(a);
    }

    if (alleleList.isEmpty())
      throw new IllegalArgumentException(
          "Cannot create a VariantContext with an empty allele list");

    if (alleleList.get(0).isNonReference())
      throw new IllegalArgumentException(
          "Alleles for a VariantContext must contain at least one reference allele: " + alleles);

    return alleleList;
  }
Example #2
0
  @Test
  public void testFixReverseComplementedGenotypes() {

    final Allele refA = Allele.create("A", true);
    final Allele altC = Allele.create("C", false);
    final GenotypesContext originalGenotypes = GenotypesContext.create(3);
    originalGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refA, refA)).make());
    originalGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refA, altC)).make());
    originalGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altC, altC)).make());

    final Allele refT = Allele.create("T", true);
    final Allele altG = Allele.create("G", false);
    final GenotypesContext expectedGenotypes = GenotypesContext.create(3);
    expectedGenotypes.add(new GenotypeBuilder("homref").alleles(Arrays.asList(refT, refT)).make());
    expectedGenotypes.add(new GenotypeBuilder("het").alleles(Arrays.asList(refT, altG)).make());
    expectedGenotypes.add(new GenotypeBuilder("homvar").alleles(Arrays.asList(altG, altG)).make());

    final Map<Allele, Allele> reverseComplementAlleleMap = new HashMap<Allele, Allele>(2);
    reverseComplementAlleleMap.put(refA, refT);
    reverseComplementAlleleMap.put(altC, altG);
    final GenotypesContext actualGenotypes =
        LiftoverVcf.fixGenotypes(originalGenotypes, reverseComplementAlleleMap);

    for (final String sample : Arrays.asList("homref", "het", "homvar")) {
      final List<Allele> expected = expectedGenotypes.get(sample).getAlleles();
      final List<Allele> actual = actualGenotypes.get(sample).getAlleles();
      Assert.assertEquals(expected.get(0), actual.get(0));
      Assert.assertEquals(expected.get(1), actual.get(1));
    }
  }
Example #3
0
  private static Type typeOfBiallelicVariant(Allele ref, Allele allele) {
    if (ref.isSymbolic())
      throw new IllegalStateException(
          "Unexpected error: encountered a record with a symbolic reference allele");

    if (allele.isSymbolic()) return Type.SYMBOLIC;

    if (ref.length() == allele.length()) {
      if (allele.length() == 1) return Type.SNP;
      else return Type.MNP;
    }

    // Important note: previously we were checking that one allele is the prefix of the other.
    // However, that's not an
    // appropriate check as can be seen from the following example:
    // REF = CTTA and ALT = C,CT,CA
    // This should be assigned the INDEL type but was being marked as a MIXED type because of the
    // prefix check.
    // In truth, it should be absolutely impossible to return a MIXED type from this method because
    // it simply
    // performs a pairwise comparison of a single alternate allele against the reference allele
    // (whereas the MIXED type
    // is reserved for cases of multiple alternate alleles of different types).  Therefore, if we've
    // reached this point
    // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
    return Type.INDEL;

    // old incorrect logic:
    // if (oneIsPrefixOfOther(ref, allele))
    //     return Type.INDEL;
    // else
    //     return Type.MIXED;
  }
  protected void printVerboseData(
      String pos,
      VariantContext vc,
      double PofF,
      double phredScaledConfidence,
      final GenotypeLikelihoodsCalculationModel.Model model) {
    Allele refAllele = null, altAllele = null;
    for (Allele allele : vc.getAlleles()) {
      if (allele.isReference()) refAllele = allele;
      else altAllele = allele;
    }

    for (int i = 0; i <= N; i++) {
      StringBuilder AFline = new StringBuilder("AFINFO\t");
      AFline.append(pos);
      AFline.append("\t");
      AFline.append(refAllele);
      AFline.append("\t");
      if (altAllele != null) AFline.append(altAllele);
      else AFline.append("N/A");
      AFline.append("\t");
      AFline.append(i + "/" + N + "\t");
      AFline.append(String.format("%.2f\t", ((float) i) / N));
      AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
      verboseWriter.println(AFline.toString());
    }

    verboseWriter.println("P(f>0) = " + PofF);
    verboseWriter.println("Qscore = " + phredScaledConfidence);
    verboseWriter.println();
  }
Example #5
0
  protected final void printCallInfo(
      final VariantContext vc,
      final double[] log10AlleleFrequencyPriors,
      final long runtimeNano,
      final AFCalcResult result) {
    printCallElement(vc, "type", "ignore", vc.getType());

    int allelei = 0;
    for (final Allele a : vc.getAlleles())
      printCallElement(vc, "allele", allelei++, a.getDisplayString());

    for (final Genotype g : vc.getGenotypes())
      printCallElement(vc, "PL", g.getSampleName(), g.getLikelihoodsString());

    for (int priorI = 0; priorI < log10AlleleFrequencyPriors.length; priorI++)
      printCallElement(vc, "priorI", priorI, log10AlleleFrequencyPriors[priorI]);

    printCallElement(vc, "runtime.nano", "ignore", runtimeNano);
    printCallElement(vc, "log10PosteriorOfAFEq0", "ignore", result.getLog10PosteriorOfAFEq0());
    printCallElement(vc, "log10PosteriorOfAFGt0", "ignore", result.getLog10PosteriorOfAFGT0());

    for (final Allele allele : result.getAllelesUsedInGenotyping()) {
      if (allele.isNonReference()) {
        printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
        printCallElement(
            vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
      }
    }

    callReport.flush();
  }
Example #6
0
 public boolean hasSymbolicAlleles() {
   for (final Allele a : getAlleles()) {
     if (a.isSymbolic()) {
       return true;
     }
   }
   return false;
 }
Example #7
0
  /**
   * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding
   * NO_CALLS)
   *
   * @return chromosome count
   */
  public int getCalledChrCount() {
    int n = 0;

    for (final Genotype g : getGenotypes()) {
      for (final Allele a : g.getAlleles()) n += a.isNoCall() ? 0 : 1;
    }

    return n;
  }
Example #8
0
  public boolean hasAllele(Allele allele, boolean ignoreRefState) {
    if (allele == REF || allele == ALT) // optimization for cached cases
    return true;

    for (Allele a : getAlleles()) {
      if (a.equals(allele, ignoreRefState)) return true;
    }

    return false;
  }
Example #9
0
  public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) {

    int index = 1;
    for (Allele allele : getAlternateAlleles()) {
      if (allele.equals(targetAllele)) break;
      index++;
    }

    return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index);
  }
Example #10
0
  private Collection<VariantContext> getVariantContexts(
      RefMetaDataTracker tracker, ReferenceContext ref) {

    List<Feature> features = tracker.getValues(variants, ref.getLocus());
    List<VariantContext> VCs = new ArrayList<VariantContext>(features.size());

    for (Feature record : features) {
      if (VariantContextAdaptors.canBeConvertedToVariantContext(record)) {
        // we need to special case the HapMap format because indels aren't handled correctly
        if (record instanceof RawHapMapFeature) {

          // is it an indel?
          RawHapMapFeature hapmap = (RawHapMapFeature) record;
          if (hapmap.getAlleles()[0].equals(RawHapMapFeature.NULL_ALLELE_STRING)
              || hapmap.getAlleles()[1].equals(RawHapMapFeature.NULL_ALLELE_STRING)) {
            // get the dbsnp object corresponding to this record (needed to help us distinguish
            // between insertions and deletions)
            VariantContext dbsnpVC = getDbsnp(hapmap.getName());
            if (dbsnpVC == null || dbsnpVC.isMixed()) continue;

            Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
            alleleMap.put(
                RawHapMapFeature.DELETION,
                Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
            alleleMap.put(
                RawHapMapFeature.INSERTION,
                Allele.create(
                    (char) ref.getBase() + ((RawHapMapFeature) record).getAlleles()[1],
                    !dbsnpVC.isSimpleInsertion()));
            hapmap.setActualAlleles(alleleMap);

            // also, use the correct positioning for insertions
            hapmap.updatePosition(dbsnpVC.getStart());

            if (hapmap.getStart() < ref.getWindow().getStart()) {
              logger.warn(
                  "Hapmap record at "
                      + ref.getLocus()
                      + " represents an indel too large to be converted; skipping...");
              continue;
            }
          }
        }

        // ok, we might actually be able to turn this record in a variant context
        VariantContext vc =
            VariantContextAdaptors.toVariantContext(variants.getName(), record, ref);

        if (vc != null) // sometimes the track has odd stuff in it that can't be converted
        VCs.add(vc);
      }
    }

    return VCs;
  }
Example #11
0
  private void validateAlleles() {
    // check alleles
    boolean alreadySeenRef = false, alreadySeenNull = false;
    for (Allele allele : alleles) {
      // make sure there's only one reference allele
      if (allele.isReference()) {
        if (alreadySeenRef)
          throw new IllegalArgumentException(
              "BUG: Received two reference tagged alleles in VariantContext "
                  + alleles
                  + " this="
                  + this);
        alreadySeenRef = true;
      }

      if (allele.isNoCall()) {
        throw new IllegalArgumentException(
            "BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
      }

      // make sure there's only one null allele
      if (allele.isNull()) {
        if (alreadySeenNull)
          throw new IllegalArgumentException(
              "BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
        alreadySeenNull = true;
      }
    }

    // make sure there's one reference allele
    if (!alreadySeenRef)
      throw new IllegalArgumentException("No reference allele found in VariantContext");

    //        if ( getType() == Type.INDEL ) {
    //            if ( getReference().length() != (getLocation().size()-1) ) {
    long length = (stop - start) + 1;
    if ((getReference().isNull() && length != 1)
        || (getReference().isNonNull() && (length - getReference().length() > 1))) {
      throw new IllegalStateException(
          "BUG: GenomeLoc "
              + contig
              + ":"
              + start
              + "-"
              + stop
              + " has a size == "
              + length
              + " but the variation reference allele has length "
              + getReference().length()
              + " this = "
              + this);
    }
  }
Example #12
0
  /**
   * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a
   * list of their sizes
   *
   * @return a list of indel lengths ( null if not of type indel or mixed )
   */
  public List<Integer> getIndelLengths() {
    if (getType() != Type.INDEL && getType() != Type.MIXED) {
      return null;
    }

    List<Integer> lengths = new ArrayList<Integer>();
    for (Allele a : getAlternateAlleles()) {
      lengths.add(a.length() - getReference().length());
    }

    return lengths;
  }
Example #13
0
  private void validateGenotypes() {
    if (this.genotypes == null) throw new IllegalStateException("Genotypes is null");

    for (final Genotype g : this.genotypes) {
      if (g.isAvailable()) {
        for (Allele gAllele : g.getAlleles()) {
          if (!hasAllele(gAllele) && gAllele.isCalled())
            throw new IllegalStateException(
                "Allele in genotype " + gAllele + " not in the variant context " + alleles);
        }
      }
    }
  }
Example #14
0
  /**
   * helper routine for subcontext
   *
   * @param genotypes genotypes
   * @return allele set
   */
  private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
    final Set<Allele> alleles = new HashSet<Allele>();

    boolean addedref = false;
    for (final Genotype g : genotypes) {
      for (final Allele a : g.getAlleles()) {
        addedref = addedref || a.isReference();
        if (a.isCalled()) alleles.add(a);
      }
    }
    if (!addedref) alleles.add(getReference());

    return alleles;
  }
  public static void addComplexGenotypesTest() {
    final List<Allele> allAlleles =
        Arrays.asList(
            Allele.create("A", true), Allele.create("C", false), Allele.create("G", false));

    for (int nAlleles : Arrays.asList(2, 3)) {
      for (int highestPloidy : Arrays.asList(1, 2, 3)) {
        // site alleles
        final List<Allele> siteAlleles = allAlleles.subList(0, nAlleles);

        // possible alleles for genotypes
        final List<Allele> possibleGenotypeAlleles = new ArrayList<Allele>(siteAlleles);
        possibleGenotypeAlleles.add(Allele.NO_CALL);

        // there are n^ploidy possible genotypes
        final List<List<Allele>> possibleGenotypes =
            makeAllGenotypes(possibleGenotypeAlleles, highestPloidy);
        final int nPossibleGenotypes = possibleGenotypes.size();

        VariantContextBuilder vb = new VariantContextBuilder("unittest", "1", 1, 1, siteAlleles);

        // first test -- create n copies of each genotype
        for (int i = 0; i < nPossibleGenotypes; i++) {
          final List<Genotype> samples = new ArrayList<Genotype>(3);
          samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          add(vb.genotypes(samples));
        }

        // second test -- create one sample with each genotype
        {
          final List<Genotype> samples = new ArrayList<Genotype>(nPossibleGenotypes);
          for (int i = 0; i < nPossibleGenotypes; i++) {
            samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i)));
          }
          add(vb.genotypes(samples));
        }

        // test mixed ploidy
        for (int i = 0; i < nPossibleGenotypes; i++) {
          for (int ploidy = 1; ploidy < highestPloidy; ploidy++) {
            final List<Genotype> samples = new ArrayList<Genotype>(highestPloidy);
            final List<Allele> genotype = possibleGenotypes.get(i).subList(0, ploidy);
            samples.add(GenotypeBuilder.create("sample" + i, genotype));
            add(vb.genotypes(samples));
          }
        }
      }
    }
  }
 private ReverseClippingPositionTestProvider(
     final int expectedClip, final String ref, final String... alleles) {
   super(ReverseClippingPositionTestProvider.class);
   this.ref = ref;
   for (final String allele : alleles) this.alleles.add(Allele.create(allele));
   this.expectedClip = expectedClip;
 }
  private VariantCallContext generateEmptyContext(
      RefMetaDataTracker tracker,
      ReferenceContext ref,
      Map<String, AlignmentContext> stratifiedContexts,
      AlignmentContext rawContext) {
    VariantContext vc;
    if (UAC.GenotypingMode
        == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
      VariantContext vcInput =
          UnifiedGenotyperEngine.getVCFromAllelesRod(
              tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
      if (vcInput == null) return null;
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  vcInput.getStart(),
                  vcInput.getEnd(),
                  vcInput.getAlleles())
              .make();
    } else {
      // deal with bad/non-standard reference bases
      if (!Allele.acceptableAlleleBases(new byte[] {ref.getBase()})) return null;

      Set<Allele> alleles = new HashSet<Allele>();
      alleles.add(Allele.create(ref.getBase(), true));
      vc =
          new VariantContextBuilder(
                  "UG_call",
                  ref.getLocus().getContig(),
                  ref.getLocus().getStart(),
                  ref.getLocus().getStart(),
                  alleles)
              .make();
    }

    if (annotationEngine != null) {
      // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
      final ReadBackedPileup pileup = rawContext.getBasePileup();
      stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);

      vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
    }

    return new VariantCallContext(vc, false);
  }
Example #18
0
 public char getFirstBase(Allele allele) {
   byte[] bases = allele.getBases();
   if (bases.length > 0) {
     return (char) bases[0];
   } else {
     return '.';
   }
 }
  private static final boolean hasPLIncompatibleAlleles(
      final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
    final Iterator<Allele> it1 = alleleSet1.iterator();
    final Iterator<Allele> it2 = alleleSet2.iterator();

    while (it1.hasNext() && it2.hasNext()) {
      final Allele a1 = it1.next();
      final Allele a2 = it2.next();
      if (!a1.equals(a2)) return true;
    }

    // by this point, at least one of the iterators is empty.  All of the elements
    // we've compared are equal up until this point.  But it's possible that the
    // sets aren't the same size, which is indicated by the test below.  If they
    // are of the same size, though, the sets are compatible
    return it1.hasNext() || it2.hasNext();
  }
Example #20
0
  static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next(); // this is OK, since allSamplesAreMergeable()

        if (all1.isNonReference() && all2.isNonReference()) // corresponding alleles are alternate
        return true;
      }
    }

    return false;
  }
    private RepeatDetectorTest(
        boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) {
      super(RepeatDetectorTest.class);
      this.ref = "N" + ref; // add a dummy base for the event here
      this.isTrueRepeat = isTrueRepeat;

      List<Allele> alleles = new LinkedList<Allele>();
      final Allele refAllele = Allele.create(refAlleleString, true);
      alleles.add(refAllele);
      for (final String altString : altAlleleStrings) {
        final Allele alt = Allele.create(altString, false);
        alleles.add(alt);
      }

      VariantContextBuilder builder =
          new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles);
      this.vc = builder.make();
    }
Example #22
0
    private Allele ensureMergedAllele(
        Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
      AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
      Allele mergedAllele = mergedAlleles.get(all12);

      if (mergedAllele == null) {
        byte[] bases1 = all1.getBases();
        byte[] bases2 = all2.getBases();

        byte[] mergedBases = new byte[bases1.length + intermediateLength + bases2.length];
        System.arraycopy(bases1, 0, mergedBases, 0, bases1.length);
        if (intermediateBases != null)
          System.arraycopy(intermediateBases, 0, mergedBases, bases1.length, intermediateLength);
        System.arraycopy(bases2, 0, mergedBases, bases1.length + intermediateLength, bases2.length);

        mergedAllele = Allele.create(mergedBases, creatingReferenceForFirstTime);
        mergedAlleles.put(all12, mergedAllele);
      }

      return mergedAllele;
    }
  /**
   * Returns a context identical to this with the REF and ALT alleles reverse complemented.
   *
   * @param vc variant context
   * @return new vc
   */
  public static VariantContext reverseComplement(VariantContext vc) {
    // create a mapping from original allele to reverse complemented allele
    HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
    for (Allele originalAllele : vc.getAlleles()) {
      Allele newAllele;
      if (originalAllele.isNoCall() || originalAllele.isNull()) newAllele = originalAllele;
      else
        newAllele =
            Allele.create(
                BaseUtils.simpleReverseComplement(originalAllele.getBases()),
                originalAllele.isReference());
      alleleMap.put(originalAllele, newAllele);
    }

    // create new Genotype objects
    GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
    for (final Genotype genotype : vc.getGenotypes()) {
      List<Allele> newAlleles = new ArrayList<Allele>();
      for (Allele allele : genotype.getAlleles()) {
        Allele newAllele = alleleMap.get(allele);
        if (newAllele == null) newAllele = Allele.NO_CALL;
        newAlleles.add(newAllele);
      }
      newGenotypes.add(Genotype.modifyAlleles(genotype, newAlleles));
    }

    return new VariantContextBuilder(vc).alleles(alleleMap.values()).genotypes(newGenotypes).make();
  }
Example #24
0
  public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
    if (reference == null) return;

    // don't validate if we're a complex event
    if (!isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference())) {
      throw new TribbleException.InternalCodecException(
          String.format(
              "the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s",
              getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
    }

    // we also need to validate the padding base for simple indels
    if (hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase)) {
      throw new TribbleException.InternalCodecException(
          String.format(
              "the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s",
              getChr(),
              getStart(),
              (char) paddedRefBase.byteValue(),
              (char) getReferenceBaseForIndel().byteValue()));
    }
  }
Example #25
0
  static boolean doubleAllelesSegregatePerfectlyAmongSamples(
      VariantContext vc1, VariantContext vc2) {
    // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including
    // reference):
    Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
    Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();

    // Note the segregation of the alleles for the reference genome:
    allele1ToAllele2.put(vc1.getReference(), vc2.getReference());
    allele2ToAllele1.put(vc2.getReference(), vc1.getReference());

    // Note the segregation of the alleles for each sample (and check that it is consistent with the
    // reference and all previous samples).
    for (final Genotype gt1 : vc1.getGenotypes()) {
      Genotype gt2 = vc2.getGenotype(gt1.getSampleName());

      List<Allele> site1Alleles = gt1.getAlleles();
      List<Allele> site2Alleles = gt2.getAlleles();

      Iterator<Allele> all2It = site2Alleles.iterator();
      for (Allele all1 : site1Alleles) {
        Allele all2 = all2It.next();

        Allele all1To2 = allele1ToAllele2.get(all1);
        if (all1To2 == null) allele1ToAllele2.put(all1, all2);
        else if (!all1To2.equals(all2)) // all1 segregates with two different alleles at site 2
        return false;

        Allele all2To1 = allele2ToAllele1.get(all2);
        if (all2To1 == null) allele2ToAllele1.put(all2, all1);
        else if (!all2To1.equals(all1)) // all2 segregates with two different alleles at site 1
        return false;
      }
    }

    return true;
  }
 /**
  * Outputs all intervals that are behind the current reference locus
  *
  * @param refLocus the current reference locus
  * @param refBase the reference allele
  */
 private void outputFinishedIntervals(final GenomeLoc refLocus, final byte refBase) {
   // output any intervals that were finished
   final List<GenomeLoc> toRemove = new LinkedList<>();
   for (GenomeLoc key : intervalMap.keySet()) {
     if (key.isBefore(refLocus)) {
       final IntervalStratification intervalStats = intervalMap.get(key);
       outputStatsToVCF(intervalStats, Allele.create(refBase, true));
       if (hasMissingLoci(intervalStats)) {
         outputMissingInterval(intervalStats);
       }
       toRemove.add(key);
     }
   }
   for (GenomeLoc key : toRemove) {
     intervalMap.remove(key);
   }
 }
  private static Allele determineReferenceAllele(List<VariantContext> VCs) {
    Allele ref = null;

    for (VariantContext vc : VCs) {
      Allele myRef = vc.getReference();
      if (ref == null || ref.length() < myRef.length()) ref = myRef;
      else if (ref.length() == myRef.length() && !ref.equals(myRef))
        throw new UserException.BadInput(
            String.format(
                "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s",
                vc.getChr(), vc.getStart(), ref, myRef));
    }

    return ref;
  }
  @BeforeSuite
  public void setup() {
    final File referenceFile = new File(b37KGReference);
    try {
      IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
      genomeLocParser = new GenomeLocParser(seq);
    } catch (FileNotFoundException ex) {
      throw new UserException.CouldNotReadInputFile(referenceFile, ex);
    }

    // alleles
    Aref = Allele.create("A", true);
    Cref = Allele.create("C", true);
    T = Allele.create("T");
    C = Allele.create("C");
    ATC = Allele.create("ATC");
    ATCATC = Allele.create("ATCATC");
  }
Example #29
0
  /**
   * Read in a list of ExactCall objects from reader, keeping only those with starts in startsToKeep
   * or all sites (if this is empty)
   *
   * @param reader a just-opened reader sitting at the start of the file
   * @param startsToKeep a list of start position of the calls to keep, or empty if all calls should
   *     be kept
   * @param parser a genome loc parser to create genome locs
   * @return a list of ExactCall objects in reader
   * @throws IOException
   */
  public static List<ExactCall> readExactLog(
      final BufferedReader reader, final List<Integer> startsToKeep, GenomeLocParser parser)
      throws IOException {
    if (reader == null) throw new IllegalArgumentException("reader cannot be null");
    if (startsToKeep == null) throw new IllegalArgumentException("startsToKeep cannot be null");
    if (parser == null) throw new IllegalArgumentException("GenomeLocParser cannot be null");

    List<ExactCall> calls = new LinkedList<ExactCall>();

    // skip the header line
    reader.readLine();

    // skip the first "type" line
    reader.readLine();

    while (true) {
      final VariantContextBuilder builder = new VariantContextBuilder();
      final List<Allele> alleles = new ArrayList<Allele>();
      final List<Genotype> genotypes = new ArrayList<Genotype>();
      final double[] posteriors = new double[2];
      final double[] priors = MathUtils.normalizeFromLog10(new double[] {0.5, 0.5}, true);
      final List<Integer> mle = new ArrayList<Integer>();
      final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
      long runtimeNano = -1;

      GenomeLoc currentLoc = null;
      while (true) {
        final String line = reader.readLine();
        if (line == null) return calls;

        final String[] parts = line.split("\t");
        final GenomeLoc lineLoc = parser.parseGenomeLoc(parts[0]);
        final String variable = parts[1];
        final String key = parts[2];
        final String value = parts[3];

        if (currentLoc == null) currentLoc = lineLoc;

        if (variable.equals("type")) {
          if (startsToKeep.isEmpty() || startsToKeep.contains(currentLoc.getStart())) {
            builder.alleles(alleles);
            final int stop = currentLoc.getStart() + alleles.get(0).length() - 1;
            builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
            builder.genotypes(genotypes);
            final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[] {}));
            final AFCalcResult result =
                new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
            calls.add(new ExactCall(builder.make(), runtimeNano, result));
          }
          break;
        } else if (variable.equals("allele")) {
          final boolean isRef = key.equals("0");
          alleles.add(Allele.create(value, isRef));
        } else if (variable.equals("PL")) {
          final GenotypeBuilder gb = new GenotypeBuilder(key);
          gb.PL(GenotypeLikelihoods.fromPLField(value).getAsPLs());
          genotypes.add(gb.make());
        } else if (variable.equals("log10PosteriorOfAFEq0")) {
          posteriors[0] = Double.valueOf(value);
        } else if (variable.equals("log10PosteriorOfAFGt0")) {
          posteriors[1] = Double.valueOf(value);
        } else if (variable.equals("MLE")) {
          mle.add(Integer.valueOf(value));
        } else if (variable.equals("pNonRefByAllele")) {
          final Allele a = Allele.create(key);
          log10pNonRefByAllele.put(a, Double.valueOf(value));
        } else if (variable.equals("runtime.nano")) {
          runtimeNano = Long.valueOf(value);
        } else {
          // nothing to do
        }
      }
    }
  }
Example #30
0
  public void writeBeagleOutput(
      VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) {
    GenomeLoc currentLoc =
        VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC);
    StringBuffer beagleOut = new StringBuffer();

    String marker = String.format("%s:%d ", currentLoc.getContig(), currentLoc.getStart());
    beagleOut.append(marker);
    if (markers != null)
      markers.append(marker).append("\t").append(Integer.toString(markerCounter++)).append("\t");
    for (Allele allele : preferredVC.getAlleles()) {
      String bglPrintString;
      if (allele.isNoCall() || allele.isNull()) bglPrintString = "-";
      else bglPrintString = allele.getBaseString(); // get rid of * in case of reference allele

      beagleOut.append(String.format("%s ", bglPrintString));
      if (markers != null) markers.append(bglPrintString).append("\t");
    }
    if (markers != null) markers.append("\n");

    GenotypesContext preferredGenotypes = preferredVC.getGenotypes();
    GenotypesContext otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
    for (String sample : samples) {
      boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

      Genotype genotype;
      boolean isValidation;
      // use sample as key into genotypes structure
      if (preferredGenotypes.containsSample(sample)) {
        genotype = preferredGenotypes.get(sample);
        isValidation = isValidationSite;
      } else if (otherGenotypes != null && otherGenotypes.containsSample(sample)) {
        genotype = otherGenotypes.get(sample);
        isValidation = !isValidationSite;
      } else {
        // there is magically no genotype for this sample.
        throw new StingException(
            "Sample "
                + sample
                + " arose with no genotype in variant or validation VCF. This should never happen.");
      }

      /*
       * Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
       */
      double[] log10Likelihoods = null;
      if ((isValidation && prior < 0.0) || genotype.hasLikelihoods()) {
        log10Likelihoods = genotype.getLikelihoods().getAsVector();

        // see if we need to randomly mask out genotype in this position.
        if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() <= insertedNoCallRate) {
          // we are masking out this genotype
          log10Likelihoods =
              isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
        }

        if (isMaleOnChrX) {
          log10Likelihoods[1] = -255; // todo -- warning this is dangerous for multi-allele case
        }
      }
      /** otherwise, use the prior uniformly */
      else if (!isValidation && genotype.isCalled() && !genotype.hasLikelihoods()) {
        // hack to deal with input VCFs with no genotype likelihoods.  Just assume the called
        // genotype
        // is confident.  This is useful for Hapmap and 1KG release VCFs.
        double AA = (1.0 - prior) / 2.0;
        double AB = (1.0 - prior) / 2.0;
        double BB = (1.0 - prior) / 2.0;

        if (genotype.isHomRef()) {
          AA = prior;
        } else if (genotype.isHet()) {
          AB = prior;
        } else if (genotype.isHomVar()) {
          BB = prior;
        }

        log10Likelihoods = MathUtils.toLog10(new double[] {AA, isMaleOnChrX ? 0.0 : AB, BB});
      } else {
        log10Likelihoods =
            isMaleOnChrX ? HAPLOID_FLAT_LOG10_LIKELIHOODS : DIPLOID_FLAT_LOG10_LIKELIHOODS;
      }

      writeSampleLikelihoods(beagleOut, preferredVC, log10Likelihoods);
    }

    beagleWriter.println(beagleOut.toString());
  }