/** Check proteins using all possible codon tables */ void checkCodonTables() { if (verbose) Timer.showStdErr("Comparing Proteins..."); createTrByChromo(); // Create lists of transcripts by chromosome // For each chromosome... for (Chromosome chromo : genome) { String chr = chromo.getId(); // Check against each codon table for (CodonTable codonTable : CodonTables.getInstance()) { setCodonTable(chromo, codonTable); proteinCompare(chr, false, false); } } if (verbose) Timer.showStdErr("done"); }
/** Set codon table for a given chromosome */ void setCodonTable(Chromosome chromo, CodonTable codonTable) { CodonTables.getInstance().set(genome, chromo, codonTable); // Set codon tables // Reset all protein translations for this chromosome for (Transcript tr : trByChromo.get(chromo.getId())) tr.resetCache(); }
/** Compare list of proteins */ double proteinCompare(String chr, boolean addTotals, boolean updateTranscriptAaCheck) { List<Transcript> trList = null; // No chromosome name specified? => Use all transcripts if (chr == null) { trList = new ArrayList<Transcript>(); for (Gene g : genome.getGenes()) for (Transcript tr : g) trList.add(tr); } else trList = trByChromo.get(chr); // No transcripts in the list? We are done if (trList.isEmpty()) return 0; int i = 1; if (verbose) { // Show labels System.err.println("\tLabels:"); System.err.println("\t\t'+' : OK"); System.err.println("\t\t'.' : Missing"); System.err.println("\t\t'*' : Error"); System.out.print((chr != null ? chr : "") + "\t"); } // Check each transcript int countNotFound = 0, countOk = 0, countErrors = 0; for (Transcript tr : trList) { char status = ' '; String protein = tr.protein(); String proteinReference = proteinByTrId.get(tr.getId()); if (proteinReference == null) { if (tr.isProteinCoding()) { status = '.'; if (debug) System.err.println("\nWARNING:Cannot find Protein for transcript " + tr.getId()); } } else if (equals(protein, proteinReference)) { status = '+'; } else { status = '*'; if (debug || storeAlignments || onlyOneError) { protein = proteinFormat(protein); proteinReference = proteinFormat(proteinReference); SmithWaterman sw = new SmithWaterman(protein, proteinReference); if (Math.max(protein.length(), proteinReference.length()) < SnpEffCmdCds.MAX_ALIGN_LENGTH) sw.align(); if (storeAlignments) alignmentByTrId.put(tr.getId(), sw); int maxScore = Math.min(protein.length(), proteinReference.length()); int score = sw.getAlignmentScore(); if (debug || onlyOneError) { System.err.println( "\nERROR: Proteins do not match for transcript " + tr.getId() // + "\tStrand:" + (tr.isStrandPlus() ? "+" : "-") // + "\tExons: " + tr.numChilds() // + "\n" // + String.format( "\tSnpEff protein (%6d) : '%s'\n", protein.length(), protein) // + String.format( "\tReference protein (%6d) : '%s'\n", proteinReference.length(), proteinReference) // + "\tAlignment (Snpeff protein vs Reference protein)." // + "\tScore: " + score // + "\tMax. possible score: " + maxScore // + "\tDiff: " + (maxScore - score) // + "\n" + sw // ); System.err.println("Transcript details:\n" + tr); } } if (onlyOneError) { System.err.println("Transcript details:\n" + tr); throw new RuntimeException("DIE"); } } // Update counters boolean ok = false; switch (status) { case '.': countNotFound++; break; case '+': countOk++; ok = true; break; case '*': countErrors++; break; case ' ': break; default: throw new RuntimeException("Unknown status '" + status + "'"); } // Update transcript status if (ok && updateTranscriptAaCheck) tr.setAaCheck(true); // Show a mark if (verbose && (status != ' ')) { System.out.print(status); i++; if (i % 100 == 0) System.out.print("\n\t"); } } // Relative error rate double errorRate = ((double) countErrors) / ((double) (countErrors + countOk)); if (verbose) System.out.println("\n"); System.out.println( "\tProtein check:" // + "\t" + genome.getVersion() // + (chr != null ? "\tChromosome: " + chr : "") // + (chr != null ? "\tCodon table: " + CodonTables.getInstance().getTable(genome, chr).getName() : "") // + "\tOK: " + countOk // + "\tNot found: " + countNotFound // + "\tErrors: " + countErrors // + "\tError percentage: " + (100 * errorRate) + "%" // ); // Add to totals if (addTotals) { totalNotFound += countNotFound; totalOk += countOk; totalErrors += countErrors; } return errorRate; }