Exemple #1
0
  private static Type typeOfBiallelicVariant(Allele ref, Allele allele) {
    if (ref.isSymbolic())
      throw new IllegalStateException(
          "Unexpected error: encountered a record with a symbolic reference allele");

    if (allele.isSymbolic()) return Type.SYMBOLIC;

    if (ref.length() == allele.length()) {
      if (allele.length() == 1) return Type.SNP;
      else return Type.MNP;
    }

    // Important note: previously we were checking that one allele is the prefix of the other.
    // However, that's not an
    // appropriate check as can be seen from the following example:
    // REF = CTTA and ALT = C,CT,CA
    // This should be assigned the INDEL type but was being marked as a MIXED type because of the
    // prefix check.
    // In truth, it should be absolutely impossible to return a MIXED type from this method because
    // it simply
    // performs a pairwise comparison of a single alternate allele against the reference allele
    // (whereas the MIXED type
    // is reserved for cases of multiple alternate alleles of different types).  Therefore, if we've
    // reached this point
    // in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
    return Type.INDEL;

    // old incorrect logic:
    // if (oneIsPrefixOfOther(ref, allele))
    //     return Type.INDEL;
    // else
    //     return Type.MIXED;
  }
Exemple #2
0
 public boolean hasSymbolicAlleles() {
   for (final Allele a : getAlleles()) {
     if (a.isSymbolic()) {
       return true;
     }
   }
   return false;
 }
  public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
    // see if we need to trim common reference base from all alleles
    boolean trimVC;

    // We need to trim common reference base from all alleles in all genotypes if a ref base is
    // common to all alleles
    Allele refAllele = inputVC.getReference();
    if (!inputVC.isVariant()) trimVC = false;
    else if (refAllele.isNull()) trimVC = false;
    else {
      trimVC =
          (AbstractVCFCodec.computeForwardClipping(
                  new ArrayList<Allele>(inputVC.getAlternateAlleles()),
                  inputVC.getReference().getDisplayString())
              > 0);
    }

    // nothing to do if we don't need to trim bases
    if (trimVC) {
      List<Allele> alleles = new ArrayList<Allele>();
      GenotypesContext genotypes = GenotypesContext.create();

      // set the reference base for indels in the attributes
      Map<String, Object> attributes = new TreeMap<String, Object>(inputVC.getAttributes());

      Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();

      for (Allele a : inputVC.getAlleles()) {
        if (a.isSymbolic()) {
          alleles.add(a);
          originalToTrimmedAlleleMap.put(a, a);
        } else {
          // get bases for current allele and create a new one with trimmed bases
          byte[] newBases = Arrays.copyOfRange(a.getBases(), 1, a.length());
          Allele trimmedAllele = Allele.create(newBases, a.isReference());
          alleles.add(trimmedAllele);
          originalToTrimmedAlleleMap.put(a, trimmedAllele);
        }
      }

      // detect case where we're trimming bases but resulting vc doesn't have any null allele. In
      // that case, we keep original representation
      // example: mixed records such as {TA*,TGA,TG}
      boolean hasNullAlleles = false;

      for (Allele a : originalToTrimmedAlleleMap.values()) {
        if (a.isNull()) hasNullAlleles = true;
        if (a.isReference()) refAllele = a;
      }

      if (!hasNullAlleles) return inputVC;
      // now we can recreate new genotypes with trimmed alleles
      for (final Genotype genotype : inputVC.getGenotypes()) {

        List<Allele> originalAlleles = genotype.getAlleles();
        List<Allele> trimmedAlleles = new ArrayList<Allele>();
        for (Allele a : originalAlleles) {
          if (a.isCalled()) trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
          else trimmedAlleles.add(Allele.NO_CALL);
        }
        genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
      }

      final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
      return builder
          .alleles(alleles)
          .genotypes(genotypes)
          .attributes(attributes)
          .referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0]))
          .make();
    }

    return inputVC;
  }
  public static VariantContext createVariantContextWithPaddedAlleles(
      VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
    // see if we need to pad common reference base from all alleles
    boolean padVC;

    // We need to pad a VC with a common base if the length of the reference allele is less than the
    // length of the VariantContext.
    // This happens because the position of e.g. an indel is always one before the actual event (as
    // per VCF convention).
    long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
    if (inputVC.hasSymbolicAlleles()) padVC = true;
    else if (inputVC.getReference().length() == locLength) padVC = false;
    else if (inputVC.getReference().length() == locLength - 1) padVC = true;
    else
      throw new IllegalArgumentException(
          "Badly formed variant context at location "
              + String.valueOf(inputVC.getStart())
              + " in contig "
              + inputVC.getChr()
              + ". Reference length must be at most one base shorter than location size");

    // nothing to do if we don't need to pad bases
    if (padVC) {
      if (!inputVC.hasReferenceBaseForIndel())
        throw new ReviewedStingException(
            "Badly formed variant context at location "
                + inputVC.getChr()
                + ":"
                + inputVC.getStart()
                + "; no padded reference base is available.");

      Byte refByte = inputVC.getReferenceBaseForIndel();

      List<Allele> alleles = new ArrayList<Allele>();

      for (Allele a : inputVC.getAlleles()) {
        // get bases for current allele and create a new one with trimmed bases
        if (a.isSymbolic()) {
          alleles.add(a);
        } else {
          String newBases;
          if (refBaseShouldBeAppliedToEndOfAlleles)
            newBases = a.getBaseString() + new String(new byte[] {refByte});
          else newBases = new String(new byte[] {refByte}) + a.getBaseString();
          alleles.add(Allele.create(newBases, a.isReference()));
        }
      }

      // now we can recreate new genotypes with trimmed alleles
      GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
      for (final Genotype g : inputVC.getGenotypes()) {
        List<Allele> inAlleles = g.getAlleles();
        List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
        for (Allele a : inAlleles) {
          if (a.isCalled()) {
            if (a.isSymbolic()) {
              newGenotypeAlleles.add(a);
            } else {
              String newBases;
              if (refBaseShouldBeAppliedToEndOfAlleles)
                newBases = a.getBaseString() + new String(new byte[] {refByte});
              else newBases = new String(new byte[] {refByte}) + a.getBaseString();
              newGenotypeAlleles.add(Allele.create(newBases, a.isReference()));
            }
          } else {
            // add no-call allele
            newGenotypeAlleles.add(Allele.NO_CALL);
          }
        }
        genotypes.add(
            new Genotype(
                g.getSampleName(),
                newGenotypeAlleles,
                g.getLog10PError(),
                g.getFilters(),
                g.getAttributes(),
                g.isPhased()));
      }

      return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
    } else return inputVC;
  }