Esempio n. 1
0
  private static Type typeOfBiallelicVariant(Allele ref, Allele allele) {
    if (ref.isSymbolic())
      throw new IllegalStateException(
          "Unexpected error: encountered a record with a symbolic reference allele");

    if (allele.isSymbolic()) return Type.SYMBOLIC;

    if (ref.length() == allele.length()) {
      if (allele.length() == 1) return Type.SNP;
      else return Type.MNP;
    }

    // Important note: previously we were checking that one allele is the prefix of the other.
    // However, that's not an
    // appropriate check as can be seen from the following example:
    // REF = CTTA and ALT = C,CT,CA
    // This should be assigned the INDEL type but was being marked as a MIXED type because of the
    // prefix check.
    // In truth, it should be absolutely impossible to return a MIXED type from this method because
    // it simply
    // performs a pairwise comparison of a single alternate allele against the reference allele
    // (whereas the MIXED type
    // is reserved for cases of multiple alternate alleles of different types).  Therefore, if we've
    // reached this point
    // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
    return Type.INDEL;

    // old incorrect logic:
    // if (oneIsPrefixOfOther(ref, allele))
    //     return Type.INDEL;
    // else
    //     return Type.MIXED;
  }
  private static Allele determineReferenceAllele(List<VariantContext> VCs) {
    Allele ref = null;

    for (VariantContext vc : VCs) {
      Allele myRef = vc.getReference();
      if (ref == null || ref.length() < myRef.length()) ref = myRef;
      else if (ref.length() == myRef.length() && !ref.equals(myRef))
        throw new UserException.BadInput(
            String.format(
                "The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s",
                vc.getChr(), vc.getStart(), ref, myRef));
    }

    return ref;
  }
  private static AlleleMapper resolveIncompatibleAlleles(
      Allele refAllele, VariantContext vc, Set<Allele> allAlleles) {
    if (refAllele.equals(vc.getReference())) return new AlleleMapper(vc);
    else {
      // we really need to do some work.  The refAllele is the longest reference allele seen at this
      // start site.  So imagine it is:
      //
      // refAllele: ACGTGA
      // myRef:     ACGT
      // myAlt:     -
      //
      // We need to remap all of the alleles in vc to include the extra GA so that
      // myRef => refAllele and myAlt => GA
      //

      Allele myRef = vc.getReference();
      if (refAllele.length() <= myRef.length())
        throw new ReviewedStingException(
            "BUG: myRef=" + myRef + " is longer than refAllele=" + refAllele);
      byte[] extraBases =
          Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());

      //            System.out.printf("Remapping allele at %s%n", vc);
      //            System.out.printf("ref   %s%n", refAllele);
      //            System.out.printf("myref %s%n", myRef );
      //            System.out.printf("extrabases %s%n", new String(extraBases));

      Map<Allele, Allele> map = new HashMap<Allele, Allele>();
      for (Allele a : vc.getAlleles()) {
        if (a.isReference()) map.put(a, refAllele);
        else {
          Allele extended = Allele.extend(a, extraBases);
          for (Allele b : allAlleles) if (extended.equals(b)) extended = b;
          //                    System.out.printf("  Extending %s => %s%n", a, extended);
          map.put(a, extended);
        }
      }

      // debugging
      //            System.out.printf("mapping %s%n", map);

      return new AlleleMapper(map);
    }
  }
Esempio n. 4
0
  /**
   * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a
   * list of their sizes
   *
   * @return a list of indel lengths ( null if not of type indel or mixed )
   */
  public List<Integer> getIndelLengths() {
    if (getType() != Type.INDEL && getType() != Type.MIXED) {
      return null;
    }

    List<Integer> lengths = new ArrayList<Integer>();
    for (Allele a : getAlternateAlleles()) {
      lengths.add(a.length() - getReference().length());
    }

    return lengths;
  }
    private RepeatDetectorTest(
        boolean isTrueRepeat, String ref, String refAlleleString, String... altAlleleStrings) {
      super(RepeatDetectorTest.class);
      this.ref = "N" + ref; // add a dummy base for the event here
      this.isTrueRepeat = isTrueRepeat;

      List<Allele> alleles = new LinkedList<Allele>();
      final Allele refAllele = Allele.create(refAlleleString, true);
      alleles.add(refAllele);
      for (final String altString : altAlleleStrings) {
        final Allele alt = Allele.create(altString, false);
        alleles.add(alt);
      }

      VariantContextBuilder builder =
          new VariantContextBuilder("test", "chr1", 1, 1 + refAllele.length(), alleles);
      this.vc = builder.make();
    }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }