Ejemplo n.º 1
0
  /** Check proteins using all possible codon tables */
  void checkCodonTables() {
    if (verbose) Timer.showStdErr("Comparing Proteins...");

    createTrByChromo(); // Create lists of transcripts by chromosome

    // For each chromosome...
    for (Chromosome chromo : genome) {
      String chr = chromo.getId();

      // Check against each codon table
      for (CodonTable codonTable : CodonTables.getInstance()) {
        setCodonTable(chromo, codonTable);
        proteinCompare(chr, false, false);
      }
    }

    if (verbose) Timer.showStdErr("done");
  }
Ejemplo n.º 2
0
  /** Set codon table for a given chromosome */
  void setCodonTable(Chromosome chromo, CodonTable codonTable) {
    CodonTables.getInstance().set(genome, chromo, codonTable); // Set codon tables

    // Reset all protein translations for this chromosome
    for (Transcript tr : trByChromo.get(chromo.getId())) tr.resetCache();
  }
Ejemplo n.º 3
0
  /** Compare list of proteins */
  double proteinCompare(String chr, boolean addTotals, boolean updateTranscriptAaCheck) {
    List<Transcript> trList = null;

    // No chromosome name specified? => Use all transcripts
    if (chr == null) {
      trList = new ArrayList<Transcript>();
      for (Gene g : genome.getGenes()) for (Transcript tr : g) trList.add(tr);
    } else trList = trByChromo.get(chr);

    // No transcripts in the list? We are done
    if (trList.isEmpty()) return 0;

    int i = 1;

    if (verbose) {
      // Show labels
      System.err.println("\tLabels:");
      System.err.println("\t\t'+' : OK");
      System.err.println("\t\t'.' : Missing");
      System.err.println("\t\t'*' : Error");
      System.out.print((chr != null ? chr : "") + "\t");
    }

    // Check each transcript
    int countNotFound = 0, countOk = 0, countErrors = 0;
    for (Transcript tr : trList) {
      char status = ' ';
      String protein = tr.protein();
      String proteinReference = proteinByTrId.get(tr.getId());

      if (proteinReference == null) {
        if (tr.isProteinCoding()) {
          status = '.';
          if (debug)
            System.err.println("\nWARNING:Cannot find Protein for transcript " + tr.getId());
        }
      } else if (equals(protein, proteinReference)) {
        status = '+';
      } else {
        status = '*';

        if (debug || storeAlignments || onlyOneError) {
          protein = proteinFormat(protein);
          proteinReference = proteinFormat(proteinReference);

          SmithWaterman sw = new SmithWaterman(protein, proteinReference);
          if (Math.max(protein.length(), proteinReference.length()) < SnpEffCmdCds.MAX_ALIGN_LENGTH)
            sw.align();

          if (storeAlignments) alignmentByTrId.put(tr.getId(), sw);

          int maxScore = Math.min(protein.length(), proteinReference.length());
          int score = sw.getAlignmentScore();

          if (debug || onlyOneError) {
            System.err.println(
                "\nERROR: Proteins do not match for transcript "
                    + tr.getId() //
                    + "\tStrand:"
                    + (tr.isStrandPlus() ? "+" : "-") //
                    + "\tExons: "
                    + tr.numChilds() //
                    + "\n" //
                    + String.format(
                        "\tSnpEff protein     (%6d) : '%s'\n", protein.length(), protein) //
                    + String.format(
                        "\tReference protein  (%6d) : '%s'\n",
                        proteinReference.length(), proteinReference) //
                    + "\tAlignment (Snpeff protein vs Reference protein)." //
                    + "\tScore: "
                    + score //
                    + "\tMax. possible score: "
                    + maxScore //
                    + "\tDiff: "
                    + (maxScore - score) //
                    + "\n"
                    + sw //
                );
            System.err.println("Transcript details:\n" + tr);
          }
        }

        if (onlyOneError) {
          System.err.println("Transcript details:\n" + tr);
          throw new RuntimeException("DIE");
        }
      }

      // Update counters
      boolean ok = false;
      switch (status) {
        case '.':
          countNotFound++;
          break;

        case '+':
          countOk++;
          ok = true;
          break;

        case '*':
          countErrors++;
          break;

        case ' ':
          break;

        default:
          throw new RuntimeException("Unknown status '" + status + "'");
      }

      // Update transcript status
      if (ok && updateTranscriptAaCheck) tr.setAaCheck(true);

      // Show a mark
      if (verbose && (status != ' ')) {
        System.out.print(status);
        i++;
        if (i % 100 == 0) System.out.print("\n\t");
      }
    }

    // Relative error rate
    double errorRate = ((double) countErrors) / ((double) (countErrors + countOk));
    if (verbose) System.out.println("\n");
    System.out.println(
        "\tProtein check:" //
            + "\t"
            + genome.getVersion() //
            + (chr != null ? "\tChromosome: " + chr : "") //
            + (chr != null
                ? "\tCodon table: " + CodonTables.getInstance().getTable(genome, chr).getName()
                : "") //
            + "\tOK: "
            + countOk //
            + "\tNot found: "
            + countNotFound //
            + "\tErrors: "
            + countErrors //
            + "\tError percentage: "
            + (100 * errorRate)
            + "%" //
        );

    // Add to totals
    if (addTotals) {
      totalNotFound += countNotFound;
      totalOk += countOk;
      totalErrors += countErrors;
    }

    return errorRate;
  }