示例#1
0
  /**
   * Fills the structure (str) string with the predicted structure according to running the CYK
   * algorithm and traceback on the provided sequence string (seq). The parameters should be
   * double[3] including the following probabilities:
   *
   * <p>params[0] = p(S->L) params[1] = p(L->s) params[2] = p(F->LS)
   *
   * @param seq - The sequence to predict from.
   * @param str - The string to fill with the predicted value.
   * @param params - The Knudson-Hein Grammar parameters as described above.
   * @param verbose - Display output to command line if true.
   */
  public static String predictKH(String seq, BigDouble[] params, boolean verbose) {
    int size = seq.length();
    BigDouble[][][] parr = new BigDouble[size][size][3];
    int[][][] tau = new int[size][size][3];
    String pred = null;

    if (size < 1) {
      output.out("Invalid sequence provided: \n\t[" + seq + "]");
      return pred;
    }

    output.out("Predicting secondary structure for \n\t\t[" + seq + "]");

    for (int i = 0; i < size; i++)
      for (int j = 0; j < size; j++) for (int k = 0; k < 3; k++) tau[i][j][k] = -1;

    // Fill Array
    kh_CYK(seq, params, parr, tau);
    BigDouble prob = parr[0][size - 1][0];

    output.out("\t-KH Maximum Probability for sequence is " + prob);

    // Trace Back
    if (prob.compareTo(0) > 0) {
      pred = kh_trace_back(tau);

      output.out("\t-KH most likely structure for sequence is\n\t\t[ " + pred + " ]");
    }

    return pred;
  }
示例#2
0
  public static String predictFromGrammar23S(
      String seq, String nat, Grammar grammar, boolean verbose) {
    int size = seq.length();
    Map<String, BigDouble> parr = new HashMap<String, BigDouble>();
    int[][][] tau = new int[size][size][3];

    output.out("\t\tTau allocated");
    String pred = null;

    if (size < 1) {
      output.out("\nInvalid sequence provided: \n\t[ " + seq + " ]");
      return pred;
    }

    output.out("\nPredicting secondary structure for \n\t\t[ " + seq + " ]");

    long start = System.currentTimeMillis();
    for (int i = 0; i < size; i++) {
      // Progress Bar
      CommandLine.DisplayBar(seq.length(), i, ((long) (System.currentTimeMillis() - start) / 1000));
      for (int j = i; j < size; j++) {
        String ij = i + ":" + (i + j);
        for (int k = 0; k < 3; k++) {
          tau[i][j][k] = -1;
          parr.put(ij + ":" + k, new BigDouble(0));
        }
      }
    }
    CommandLine.DisplayBarFinish();

    // Fill Array
    if (grammar instanceof PfoldGrammar) {
      PfoldGrammar pfold = (PfoldGrammar) grammar;
      //			_23S_CYK(seq, pfold.getKH_params(), pfold
      //					.getPfold_paramsUnmatched(), pfold
      //					.getPfold_paramsBasePairs(), parr, tau);
    }
    BigDouble prob = parr.get("0:" + (size - 1) + ":" + 0);

    output.out("\t-Probability of highest probability parse for sequence is \n\t\t" + prob);

    if (prob != null) {
      //			grammar.recordProbability(prob);

      // Trace Back
      if (prob.compareTo(0) > 0) {
        pred = kh_trace_back(tau);
        {
          output.out("\t-Highest probability parse generates structure\n\t\t[ " + pred + " ]");
          if (nat != null)
            output.out(
                "\t-FMeasure for predicted structure is\n\t\t" + Compare.getFMeasureBD(nat, pred));
        }
      }
    }

    return pred;
  }
示例#3
0
 /**
  * Runs the CYK algorithm on the provided sequence with the provided parameters. The int array
  * should be array size int[n-1][n-1][3] where "n" is the length of the sequence.
  *
  * <p>The maximum probability for the provided sequence can be found at index arr[0][n-1][0]
  *
  * @param seq - The sequence to calculate the maximum probability.
  * @param params - The Knudson-Hein grammar probabilities.
  * @param arr - The probability array to fill for the algorithm.
  * @param arr - The traceback array to fill for the algorithm.
  */
 private static void kh_CYK(String seq, BigDouble[] p, BigDouble[][][] arr, int[][][] tau) {
   int S = 0, L = 1, F = 2;
   BigDouble temp, tempProd;
   BigDouble S_LS, L_dFd, F_dFd;
   S_LS = new BigDouble(p[S]);
   L_dFd = new BigDouble(p[L]);
   F_dFd = new BigDouble(p[F]);
   for (int i = 0; i < seq.length(); i++) {
     if (seq.charAt(i) == 's') {
       arr[i][i][L] = p[L];
       tau[i][i][S] = 0;
       tau[i][i][L] = 0;
       tau[i][i][F] = 0;
     }
   }
   for (int j = 0; j < seq.length(); j++) {
     for (int i = 0; i + j < seq.length(); i++) {
       int ij = i + j;
       if (seq.charAt(i) == 'd' && seq.charAt(ij) == 'd' && j > 2) {
         // ///////
         // L->dFd
         temp = L_dFd.mult(arr[i + 1][ij - 1][F]);
         if (arr[i][ij][L].compareTo(temp) <= 0) {
           arr[i][ij][L] = temp;
           tau[i][ij][L] = (F << 8) | 0xff;
         }
         // ///////
         // F->dFd
         temp = F_dFd.mult(arr[i + 1][ij - 1][F]);
         if (arr[i][ij][F].compareTo(temp) <= 0) {
           arr[i][ij][F] = temp;
           tau[i][ij][F] = (F << 8) | 0xff;
         }
       }
       // /////
       // S->L
       temp = p[S].mult(arr[i][ij][L]);
       if (arr[i][ij][S].compareTo(temp) <= 0) {
         arr[i][ij][S] = temp;
         tau[i][ij][S] = (L << 8) | 0xff;
       }
       for (int k = i; k < ij; k++) {
         tempProd = arr[i][k][L].mult(arr[k + 1][ij][S]);
         // //////
         // S->LS
         temp = S_LS.mult(tempProd);
         if (arr[i][ij][S].compareTo(temp) <= 0) {
           arr[i][ij][S] = temp;
           tau[i][ij][S] = (k << 16) | (L << 8) | (S);
         }
         // //////
         // F->LS
         temp = p[F].mult(tempProd);
         if (arr[i][ij][F].compareTo(temp) <= 0) {
           arr[i][ij][F] = temp;
           tau[i][ij][F] = (k << 16) | (L << 8) | (S);
         }
       }
     }
   }
 }