/** * This function figures out the consequence of a DNA duplication that does not begin at position * zero of a codon, i.e., that is not right in the frame. This can result in a delins or in a * simple dup depending on the surrounding sequence. For example, imagine we have the following * duplication. ggaggaggaggaggaggagga (add another gga). but the frame is gag-gag-gag-...with * GAG=Glu/E. Then the effect of the duplication is to add another E to the aminoacid sequence. * The variable frame_s is 2 in this case, because gga starts at nucleotide 2 (zero-based) of the * GAG codon. * * @param trmdl The affected transcript * @param exonNumber One-based number of the exon * @param cDNAannot Annotation of the cDNA, e.g., c.769_771dupTTC * @param var the duplicated sequence * @param endpos the end position of the duplication (zero-based) * @param frame_s the location within the frame (0,1,2) in which mutation occurs */ private static String shiftedInFrameDuplication( TranscriptModel trmdl, int exonNumber, String cDNAanno, String var, int endpos, int aaVarStartPos, int frame_s) throws AnnotationException { Translator translator = Translator.getTranslator(); /* Singleton */ int len = var.length(); if ((len % 3) != 0) { String s = String.format( "[ERROR] DuplicationAnnotation:shiftedInFrameDuplication - " + "variant length not a multiple of 3: %s (len=%d)", var, len); throw new AnnotationException(s); } int aalen = len / 3; String dna = trmdl.getCdnaSequence(); int start = endpos - var.length(); String prefix = dna.substring(start - frame_s, start); // System.out.println("prefix = " + prefix + ", frame_s="+frame_s + ", var="+var + " endpos=" + // endpos); // System.out.println(cDNAanno); String rest = dna.substring(start, start + len); String wt = prefix + rest; String mut = prefix + var + rest; String wtaa = translator.translateDNA(wt); String mutaa = translator.translateDNA(mut); String annot; if (mutaa.startsWith(wtaa) && (mutaa.indexOf(wtaa, aalen)) > 0) { annot = String.format( "%s:exon%d:%s:p.%s%ddup", trmdl.getName(), exonNumber, cDNAanno, wtaa, aaVarStartPos); } else { annot = String.format( "%s:exon%d:%s:p.%s%ddelins%s", trmdl.getName(), exonNumber, cDNAanno, wtaa, aaVarStartPos, mutaa); } return annot; }
/** * Annotates an insertion variant that is an duplication. The methods of this class are called * from {@link jannovar.annotation.InsertionAnnotation InsertionAnnotation} if that class * determines that the insertion is equal to the preceding nucleotides in the reference sequence. * That is, in addition to the conditions for a insertion variant, the duplication variant * requires a similar sequence to the insertion (before or) after the insertion. * * <p>There are two possible duplication insertions with or without frameshift causation. e.g. * inserting an additional 'C' in the sequence 'ACC,GAG' at position 2 would cause a frameshift, * whereas insertion of 'CCG' at position 2 just inserts an additional triple 'ACC GCC GAG'. * * <p>if (var.length() % 3 == 0) { /* ORF CONSERVING if(startPosMutationInCDS.length() % 3 == 0){ * /* SIMPLE DUPLICATION OF CODONS } else { /* substitution from original AA to AAs * if(wtaa.equals("*")) { /* Mutation affects the wildtype stop codon int idx = * varaa.indexOf("*"); if (idx < 0) { } /* Substitution } }else { /* FRAMESHIFT * short * p.(Arg97fs)) denotes a frame shifting change with Arginine-97 as the first affected amino acid * } * * @param trmdl The transcriptmodel / gene in which the current mutation is contained * @param frame_s the location within the frame (0,1,2) in which mutation occurs * @param wtnt3 The three nucleotides of codon affected by start of mutation * @param var alternate nucleotide sequence (the duplication) * @param startpos The startposition of the duplication (zero based) * @param endpos The endposition of the duplication (zero based) * @param exonNumber Number (one-based) of affected exon. * @return an {@link jannovar.annotation.Annotation Annotation} object representing the current * variant * @throws AnnotationException */ public static Annotation getAnnotation( TranscriptModel trmdl, int frame_s, String wtnt3, String var, int startpos, int endpos, int exonNumber) throws AnnotationException { String annot; Annotation ann; Translator translator = Translator.getTranslator(); /* Singleton */ int refcdsstart = trmdl.getRefCDSStart(); /**/ int newpos = shiftToThreePrime(trmdl, var, startpos, endpos); if (newpos != startpos) { startpos = newpos; endpos = startpos + var.length() - 1; } int cdsEndPos = endpos - refcdsstart + 1; int cdsStartPos = cdsEndPos - var.length() + 1; /** * aavarpos is now the FIRST position (one-based) of the amino-acid sequence that was * duplicated. */ int aaVarStartPos = cdsStartPos % 3 == 0 ? (int) Math.floor(cdsStartPos / 3) : (int) Math.floor(cdsStartPos / 3) + 1; // debugDuplication(trmdl,frame_s, wtnt3, var, startpos, exonNumber,aaVarStartPos); /* get coding DNA HGVS string */ String canno; if (var.length() == 1) canno = String.format("c.%ddup%s", cdsStartPos, var); else canno = String.format("c.%d_%ddup%s", cdsStartPos, cdsEndPos, var); /* now create the protein HGVS string */ /* generate in-frame snippet for translation and correct for '-'-strand */ if (trmdl.isMinusStrand()) { /* Re-adjust the wildtype nucleotides for minus strand */ wtnt3 = trmdl.getWTCodonNucleotides(startpos - 1 + ((3 - (var.length() % 3)) % 3), frame_s); } String varnt3 = getVarNt3(trmdl, wtnt3, var, frame_s); String wtaa = translator.translateDNA(wtnt3); String varaa = translator.translateDNA(varnt3); if (var.length() % 3 == 0) { /* ORF CONSERVING */ if ((cdsStartPos - 1) % 3 == 0) { /* SIMPLE DUPLICATION OF CODONS, e.g., nucleotide position 4 starts a codon, and (4-1)%3==0.*/ String wtaaDupStart = translator.translateDNA(var.substring(0, 3)); String wtaaDupEnd = translator.translateDNA(var.substring(var.length() - 3)); if (var.length() == 3) { // Three nucleotides affected, inframe, single aminoacid duplication. annot = singleAminoAcidInframeDuplication( trmdl.getName(), exonNumber, canno, wtaaDupStart, aaVarStartPos); } else { int aaEndPos = aaVarStartPos + (var.length() / 3) - 1; /* last amino acid of duplicated WT seq. */ annot = multipleAminoAcidInframeDuplication( trmdl.getName(), exonNumber, canno, wtaaDupStart, aaVarStartPos, wtaaDupEnd, aaEndPos); } ann = new Annotation(trmdl, annot, VariantType.NON_FS_DUPLICATION, cdsStartPos); } else { /* substitution from original AA to AAs */ if (wtaa.equals("*")) { /* Mutation affects the wildtype stop codon */ int idx = varaa.indexOf("*"); if (idx < 0) { annot = String.format( "%s:exon%d:%s:p.*%d%sext*?", trmdl.getName(), exonNumber, canno, aaVarStartPos, varaa); } else { annot = String.format( "%s:exon%d:%s:p.*%ddelins%s", trmdl.getName(), exonNumber, canno, aaVarStartPos, varaa.substring(0, idx + 1)); } } else { /* substitution starts not on frame */ annot = shiftedInFrameDuplication( trmdl, exonNumber, canno, var, endpos, aaVarStartPos, frame_s); } /* Substitution */ ann = new Annotation(trmdl, annot, VariantType.NON_FS_DUPLICATION, cdsStartPos); } } else { /* FRAMESHIFT * short p.(Arg97fs)) denotes a frame shifting change with Arginine-97 as the first affected amino acid */ annot = String.format( "%s:exon%d:%s:p.%s%dfs", trmdl.getName(), exonNumber, canno, wtaa, aaVarStartPos); // System.out.println("FS wtaa="+wtaa); // System.out.println(annot); ann = new Annotation(trmdl, annot, VariantType.FS_DUPLICATION, cdsStartPos); } return ann; }