コード例 #1
0
  /**
   * This function figures out the consequence of a DNA duplication that does not begin at position
   * zero of a codon, i.e., that is not right in the frame. This can result in a delins or in a
   * simple dup depending on the surrounding sequence. For example, imagine we have the following
   * duplication. ggaggaggaggaggaggagga (add another gga). but the frame is gag-gag-gag-...with
   * GAG=Glu/E. Then the effect of the duplication is to add another E to the aminoacid sequence.
   * The variable frame_s is 2 in this case, because gga starts at nucleotide 2 (zero-based) of the
   * GAG codon.
   *
   * @param trmdl The affected transcript
   * @param exonNumber One-based number of the exon
   * @param cDNAannot Annotation of the cDNA, e.g., c.769_771dupTTC
   * @param var the duplicated sequence
   * @param endpos the end position of the duplication (zero-based)
   * @param frame_s the location within the frame (0,1,2) in which mutation occurs
   */
  private static String shiftedInFrameDuplication(
      TranscriptModel trmdl,
      int exonNumber,
      String cDNAanno,
      String var,
      int endpos,
      int aaVarStartPos,
      int frame_s)
      throws AnnotationException {
    Translator translator = Translator.getTranslator(); /* Singleton */
    int len = var.length();
    if ((len % 3) != 0) {
      String s =
          String.format(
              "[ERROR] DuplicationAnnotation:shiftedInFrameDuplication - "
                  + "variant length not a multiple of 3: %s (len=%d)",
              var, len);
      throw new AnnotationException(s);
    }
    int aalen = len / 3;

    String dna = trmdl.getCdnaSequence();
    int start = endpos - var.length();
    String prefix = dna.substring(start - frame_s, start);
    // System.out.println("prefix = " + prefix + ", frame_s="+frame_s + ", var="+var + " endpos=" +
    // endpos);
    // System.out.println(cDNAanno);
    String rest = dna.substring(start, start + len);
    String wt = prefix + rest;
    String mut = prefix + var + rest;
    String wtaa = translator.translateDNA(wt);
    String mutaa = translator.translateDNA(mut);
    String annot;
    if (mutaa.startsWith(wtaa) && (mutaa.indexOf(wtaa, aalen)) > 0) {
      annot =
          String.format(
              "%s:exon%d:%s:p.%s%ddup", trmdl.getName(), exonNumber, cDNAanno, wtaa, aaVarStartPos);
    } else {
      annot =
          String.format(
              "%s:exon%d:%s:p.%s%ddelins%s",
              trmdl.getName(), exonNumber, cDNAanno, wtaa, aaVarStartPos, mutaa);
    }
    return annot;
  }
コード例 #2
0
  /**
   * Annotates an insertion variant that is an duplication. The methods of this class are called
   * from {@link jannovar.annotation.InsertionAnnotation InsertionAnnotation} if that class
   * determines that the insertion is equal to the preceding nucleotides in the reference sequence.
   * That is, in addition to the conditions for a insertion variant, the duplication variant
   * requires a similar sequence to the insertion (before or) after the insertion.
   *
   * <p>There are two possible duplication insertions with or without frameshift causation. e.g.
   * inserting an additional 'C' in the sequence 'ACC,GAG' at position 2 would cause a frameshift,
   * whereas insertion of 'CCG' at position 2 just inserts an additional triple 'ACC GCC GAG'.
   *
   * <p>if (var.length() % 3 == 0) { /* ORF CONSERVING if(startPosMutationInCDS.length() % 3 == 0){
   * /* SIMPLE DUPLICATION OF CODONS } else { /* substitution from original AA to AAs
   * if(wtaa.equals("*")) { /* Mutation affects the wildtype stop codon int idx =
   * varaa.indexOf("*"); if (idx < 0) { } /* Substitution } }else { /* FRAMESHIFT * short
   * p.(Arg97fs)) denotes a frame shifting change with Arginine-97 as the first affected amino acid
   * }
   *
   * @param trmdl The transcriptmodel / gene in which the current mutation is contained
   * @param frame_s the location within the frame (0,1,2) in which mutation occurs
   * @param wtnt3 The three nucleotides of codon affected by start of mutation
   * @param var alternate nucleotide sequence (the duplication)
   * @param startpos The startposition of the duplication (zero based)
   * @param endpos The endposition of the duplication (zero based)
   * @param exonNumber Number (one-based) of affected exon.
   * @return an {@link jannovar.annotation.Annotation Annotation} object representing the current
   *     variant
   * @throws AnnotationException
   */
  public static Annotation getAnnotation(
      TranscriptModel trmdl,
      int frame_s,
      String wtnt3,
      String var,
      int startpos,
      int endpos,
      int exonNumber)
      throws AnnotationException {
    String annot;
    Annotation ann;
    Translator translator = Translator.getTranslator(); /* Singleton */
    int refcdsstart = trmdl.getRefCDSStart();

    /**/
    int newpos = shiftToThreePrime(trmdl, var, startpos, endpos);
    if (newpos != startpos) {
      startpos = newpos;
      endpos = startpos + var.length() - 1;
    }

    int cdsEndPos = endpos - refcdsstart + 1;
    int cdsStartPos = cdsEndPos - var.length() + 1;

    /**
     * aavarpos is now the FIRST position (one-based) of the amino-acid sequence that was
     * duplicated.
     */
    int aaVarStartPos =
        cdsStartPos % 3 == 0
            ? (int) Math.floor(cdsStartPos / 3)
            : (int) Math.floor(cdsStartPos / 3) + 1;

    // debugDuplication(trmdl,frame_s, wtnt3, var, startpos, exonNumber,aaVarStartPos);
    /* get coding DNA HGVS string */
    String canno;
    if (var.length() == 1) canno = String.format("c.%ddup%s", cdsStartPos, var);
    else canno = String.format("c.%d_%ddup%s", cdsStartPos, cdsEndPos, var);

    /* now create the protein HGVS string */

    /* generate in-frame snippet for translation and correct for '-'-strand */
    if (trmdl.isMinusStrand()) {
      /* Re-adjust the wildtype nucleotides for minus strand */
      wtnt3 = trmdl.getWTCodonNucleotides(startpos - 1 + ((3 - (var.length() % 3)) % 3), frame_s);
    }
    String varnt3 = getVarNt3(trmdl, wtnt3, var, frame_s);

    String wtaa = translator.translateDNA(wtnt3);
    String varaa = translator.translateDNA(varnt3);

    if (var.length() % 3 == 0) {
        /* ORF CONSERVING */
      if ((cdsStartPos - 1) % 3 == 0) {
          /* SIMPLE DUPLICATION OF CODONS, e.g., nucleotide position 4 starts a
          codon, and (4-1)%3==0.*/
        String wtaaDupStart = translator.translateDNA(var.substring(0, 3));
        String wtaaDupEnd = translator.translateDNA(var.substring(var.length() - 3));
        if (var.length() == 3) {
          // Three nucleotides affected, inframe, single aminoacid duplication.
          annot =
              singleAminoAcidInframeDuplication(
                  trmdl.getName(), exonNumber, canno, wtaaDupStart, aaVarStartPos);
        } else {
          int aaEndPos =
              aaVarStartPos + (var.length() / 3) - 1; /* last amino acid of duplicated WT seq. */
          annot =
              multipleAminoAcidInframeDuplication(
                  trmdl.getName(),
                  exonNumber,
                  canno,
                  wtaaDupStart,
                  aaVarStartPos,
                  wtaaDupEnd,
                  aaEndPos);
        }
        ann = new Annotation(trmdl, annot, VariantType.NON_FS_DUPLICATION, cdsStartPos);
      } else {
          /* substitution from original AA to AAs */
        if (wtaa.equals("*")) {
            /* Mutation affects the wildtype stop codon */
          int idx = varaa.indexOf("*");
          if (idx < 0) {
            annot =
                String.format(
                    "%s:exon%d:%s:p.*%d%sext*?",
                    trmdl.getName(), exonNumber, canno, aaVarStartPos, varaa);
          } else {
            annot =
                String.format(
                    "%s:exon%d:%s:p.*%ddelins%s",
                    trmdl.getName(), exonNumber, canno, aaVarStartPos, varaa.substring(0, idx + 1));
          }
        } else {
          /* substitution starts not on frame */
          annot =
              shiftedInFrameDuplication(
                  trmdl, exonNumber, canno, var, endpos, aaVarStartPos, frame_s);
        }
        /* Substitution */
        ann = new Annotation(trmdl, annot, VariantType.NON_FS_DUPLICATION, cdsStartPos);
      }
    } else {
        /* FRAMESHIFT
         * short p.(Arg97fs)) denotes a frame shifting change with Arginine-97 as the first affected amino acid */

      annot =
          String.format(
              "%s:exon%d:%s:p.%s%dfs", trmdl.getName(), exonNumber, canno, wtaa, aaVarStartPos);
      // System.out.println("FS wtaa="+wtaa);
      // System.out.println(annot);
      ann = new Annotation(trmdl, annot, VariantType.FS_DUPLICATION, cdsStartPos);
    }
    return ann;
  }