/**
   * Gather data from transcript
   *
   * @param isoformRef
   * @param aaStart
   * @param aaEnd
   * @param sequence
   * @param subSeq
   * @return
   */
  TranscriptData transcriptData(
      String isoformRef, int aaStart, int aaEnd, String sequence, String subSeq) {
    String trId = trIdByUniqueName.get(isoformRef);
    TranscriptData trData = new TranscriptData();
    if (trId != null) {
      Transcript tr = trById.get(trId);
      if (tr != null) {
        trData.tr = tr;
        String protein = tr.protein();

        // Remove trailing stop codon ('*')
        if (!protein.isEmpty() && (protein.charAt(protein.length() - 1) == '*'))
          protein = protein.substring(0, protein.length() - 1);

        // Sanity check: Do protein sequences match?
        if (protein.equals(sequence)) {
          proteinOk.add(trId);

          if ((aaStart >= 0) && (aaEnd >= aaStart)) {
            // Try to map to chromosome position
            int cdsBase2Pos[] = tr.cdsBaseNumber2ChrPos();
            int codonStart = aaStart * 3;
            int codonEnd = (aaEnd + 1) * 3 - 1;

            if (tr.isStrandPlus()) {
              trData.chrPosStart = cdsBase2Pos[codonStart];
              trData.chrPosEnd = cdsBase2Pos[codonEnd];
            } else {
              trData.chrPosStart = cdsBase2Pos[codonEnd];
              trData.chrPosEnd = cdsBase2Pos[codonStart];
            }

            trData.chrName = tr.getChromosomeName();

            // More sanity checks
            trData.codon = tr.cds().substring(codonStart, codonEnd + 1);
            trData.aa = CodonTables.getInstance().aa(trData.codon, genome, trData.chrName);
            if (!subSeq.equals(trData.aa))
              Timer.showStdErr(
                  "WARNING: AA differ: " //
                      + "\tUniqueName"
                      + isoformRef //
                      + "\tEnsembl ID: "
                      + trId //
                      + "\tEnsembl  AA: "
                      + trData.aa //
                      + "\tNextProt AA:"
                      + subSeq //
                      + "\n");
            else trData.ok = true; // All sanity checks passed
          }
        } else {
          if (!proteinDifferences.contains(trId))
            Timer.showStdErr(
                "WARNING: Protein sequences differ: " //
                    + "\tUniqueName"
                    + isoformRef //
                    + "\tEnsembl ID: "
                    + trId //
                    + "\n\tEnsembl  ("
                    + protein.length()
                    + "): "
                    + protein //
                    + "\n\tNextProt ("
                    + sequence.length()
                    + "): "
                    + sequence //
                    + "\n");
          proteinDifferences.add(trId);
        }
      }
    }

    return trData;
  }
Esempio n. 2
0
  /** Compare all CDS */
  double cdsCompare() {
    int i = 1;

    if (verbose) {
      // Show labels
      System.err.println("\tLabels:");
      System.err.println("\t\t'+' : OK");
      System.err.println("\t\t'.' : Missing");
      System.err.println("\t\t'*' : Error");
      System.err.print("\t");
    }

    // Compare all genes
    for (Gene gint : config.getGenome().getGenes())
      for (Transcript tint : gint) {
        String cds = tint.cds().toUpperCase();
        String mRna = tint.mRna().toUpperCase();
        String cdsReference = cdsByTrId.get(tint.getId());

        if (cdsReference != null) cdsReference = cdsReference.toUpperCase();

        if (cdsReference == null) {
          if (debug)
            System.err.println(
                "\nWARNING:Cannot find reference CDS for transcript '" + tint.getId() + "'");
          else if (verbose) System.out.print('.');
          totalNotFound++;
        } else if (cds.isEmpty()) {
          if (debug)
            System.err.println("\nWARNING:Empty CDS for transcript '" + tint.getId() + "'");
          else if (verbose) System.out.print('.');
          totalNotFound++;
        } else if (cds.equals(cdsReference)) {
          totalOk++;
          if (verbose) System.out.print('+');

          // Sanity check: Start and stop codons
          if ((cds != null) && (cds.length() >= 3)) {
            CodonTable ctable =
                CodonTables.getInstance().getTable(config.getGenome(), tint.getChromosomeName());

            // Check start codon
            String startCodon = cds.substring(0, 3);
            if (!ctable.isStart(startCodon)) {
              if (debug)
                System.err.println(
                    "\nWARNING: CDS for transcript '"
                        + tint.getId()
                        + "' does not start with a start codon:\t"
                        + startCodon
                        + "\t"
                        + cds);
              totalWarnings++;
            }

            // Check stop codon
            String stopCodon = cds.substring(cds.length() - 3, cds.length());
            if (!ctable.isStop(stopCodon)) {
              if (debug)
                System.err.println(
                    "\nWARNING: CDS for transcript '"
                        + tint.getId()
                        + "' does not end with a stop codon:\t"
                        + stopCodon
                        + "\t"
                        + cds);
              totalWarnings++;
            }
          }
        } else if (mRna.equals(cdsReference)) { // May be the file has mRNA instead of CDS?
          totalOk++;
          if (verbose) System.out.print('+');
        } else if ((mRna.length()
                < cdsReference
                    .length()) // CDS longer than mRNA? May be it is actually an mRNA + poly-A tail
                               // (instead of a CDS)
            && cdsReference
                .substring(mRna.length())
                .replace('A', ' ')
                .trim()
                .isEmpty() // May be it is an mRNA and it has a ploy-A tail added
            && cdsReference.substring(0, mRna.length()).equals(mRna) // Compare cutting poly-A tail
        ) {
          // OK, it was a mRNA +  polyA
          totalOk++;
          if (verbose) System.out.print('+');
        } else if ((mRna.length() > cdsReference.length()) // PolyA in the reference?
            && mRna.substring(cdsReference.length()).replace('A', ' ').trim().isEmpty() //
            && mRna.substring(0, cdsReference.length()).equals(mRna) //
        ) {
          // OK, it was a mRNA +  polyA
          totalOk++;
          if (verbose) System.out.print('+');
        } else {
          if (debug || onlyOneError) {
            // Create a string indicating differences
            String diffMrna = SnpEffCmdProtein.diffStr(mRna, cdsReference);
            int diffMrnaCount = SnpEffCmdProtein.diffCount(mRna, cdsReference);

            String diffCds = SnpEffCmdProtein.diffStr(cds, cdsReference);
            int diffCdsCount = SnpEffCmdProtein.diffCount(cds, cdsReference);

            System.err.println(
                "\nERROR:CDS do not match for transcript "
                    + tint.getId()
                    + "\tStrand:"
                    + tint.getStrand()
                    + "\tExons: "
                    + tint.numChilds());

            if (diffMrnaCount < diffCdsCount) {
              System.err.println(
                  String.format("\tsnpEff mRNA (%6d) : '%s'", mRna.length(), mRna.toLowerCase()));
              System.err.println(
                  String.format("\tdiff        (%6d) : '%s'", diffMrnaCount, diffMrna));
            } else {
              System.err.println(
                  String.format("\tsnpEff CDS  (%6d) : '%s'", cds.length(), cds.toLowerCase()));
              System.err.println(
                  String.format("\tdiff        (%6d) : '%s'", diffCdsCount, diffCds));
            }

            System.err.println(
                String.format(
                    "\tReference   (%6d) : '%s'",
                    cdsReference.length(), cdsReference.toLowerCase()));
            System.err.println("Transcript details:\n" + tint);

            if (onlyOneError) {
              System.err.println("Transcript details:\n" + tint);
              throw new RuntimeException("DIE");
            }

          } else if (verbose) System.out.print('*');

          totalErrors++;
        }

        // Show a mark
        if (verbose && (i % 100 == 0)) System.out.print("\n\t");
        i++;
      }

    double perc = ((double) totalErrors) / ((double) (totalErrors + totalOk));
    System.out.println(
        "\n\tCDS check:\t"
            + config.getGenome().getVersion()
            + "\tOK: "
            + totalOk
            + "\tWarnings: "
            + totalWarnings
            + "\tNot found: "
            + totalNotFound
            + "\tErrors: "
            + totalErrors
            + "\tError percentage: "
            + (100 * perc)
            + "%");
    return perc;
  }