/** * Fills the structure (str) string with the predicted structure according to running the CYK * algorithm and traceback on the provided sequence string (seq). The parameters should be * double[3] including the following probabilities: * * <p>params[0] = p(S->L) params[1] = p(L->s) params[2] = p(F->LS) * * @param seq - The sequence to predict from. * @param str - The string to fill with the predicted value. * @param params - The Knudson-Hein Grammar parameters as described above. * @param verbose - Display output to command line if true. */ public static String predictKH(String seq, BigDouble[] params, boolean verbose) { int size = seq.length(); BigDouble[][][] parr = new BigDouble[size][size][3]; int[][][] tau = new int[size][size][3]; String pred = null; if (size < 1) { output.out("Invalid sequence provided: \n\t[" + seq + "]"); return pred; } output.out("Predicting secondary structure for \n\t\t[" + seq + "]"); for (int i = 0; i < size; i++) for (int j = 0; j < size; j++) for (int k = 0; k < 3; k++) tau[i][j][k] = -1; // Fill Array kh_CYK(seq, params, parr, tau); BigDouble prob = parr[0][size - 1][0]; output.out("\t-KH Maximum Probability for sequence is " + prob); // Trace Back if (prob.compareTo(0) > 0) { pred = kh_trace_back(tau); output.out("\t-KH most likely structure for sequence is\n\t\t[ " + pred + " ]"); } return pred; }
public static String predictFromGrammar23S( String seq, String nat, Grammar grammar, boolean verbose) { int size = seq.length(); Map<String, BigDouble> parr = new HashMap<String, BigDouble>(); int[][][] tau = new int[size][size][3]; output.out("\t\tTau allocated"); String pred = null; if (size < 1) { output.out("\nInvalid sequence provided: \n\t[ " + seq + " ]"); return pred; } output.out("\nPredicting secondary structure for \n\t\t[ " + seq + " ]"); long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { // Progress Bar CommandLine.DisplayBar(seq.length(), i, ((long) (System.currentTimeMillis() - start) / 1000)); for (int j = i; j < size; j++) { String ij = i + ":" + (i + j); for (int k = 0; k < 3; k++) { tau[i][j][k] = -1; parr.put(ij + ":" + k, new BigDouble(0)); } } } CommandLine.DisplayBarFinish(); // Fill Array if (grammar instanceof PfoldGrammar) { PfoldGrammar pfold = (PfoldGrammar) grammar; // _23S_CYK(seq, pfold.getKH_params(), pfold // .getPfold_paramsUnmatched(), pfold // .getPfold_paramsBasePairs(), parr, tau); } BigDouble prob = parr.get("0:" + (size - 1) + ":" + 0); output.out("\t-Probability of highest probability parse for sequence is \n\t\t" + prob); if (prob != null) { // grammar.recordProbability(prob); // Trace Back if (prob.compareTo(0) > 0) { pred = kh_trace_back(tau); { output.out("\t-Highest probability parse generates structure\n\t\t[ " + pred + " ]"); if (nat != null) output.out( "\t-FMeasure for predicted structure is\n\t\t" + Compare.getFMeasureBD(nat, pred)); } } } return pred; }
/** * Runs the CYK algorithm on the provided sequence with the provided parameters. The int array * should be array size int[n-1][n-1][3] where "n" is the length of the sequence. * * <p>The maximum probability for the provided sequence can be found at index arr[0][n-1][0] * * @param seq - The sequence to calculate the maximum probability. * @param params - The Knudson-Hein grammar probabilities. * @param arr - The probability array to fill for the algorithm. * @param arr - The traceback array to fill for the algorithm. */ private static void kh_CYK(String seq, BigDouble[] p, BigDouble[][][] arr, int[][][] tau) { int S = 0, L = 1, F = 2; BigDouble temp, tempProd; BigDouble S_LS, L_dFd, F_dFd; S_LS = new BigDouble(p[S]); L_dFd = new BigDouble(p[L]); F_dFd = new BigDouble(p[F]); for (int i = 0; i < seq.length(); i++) { if (seq.charAt(i) == 's') { arr[i][i][L] = p[L]; tau[i][i][S] = 0; tau[i][i][L] = 0; tau[i][i][F] = 0; } } for (int j = 0; j < seq.length(); j++) { for (int i = 0; i + j < seq.length(); i++) { int ij = i + j; if (seq.charAt(i) == 'd' && seq.charAt(ij) == 'd' && j > 2) { // /////// // L->dFd temp = L_dFd.mult(arr[i + 1][ij - 1][F]); if (arr[i][ij][L].compareTo(temp) <= 0) { arr[i][ij][L] = temp; tau[i][ij][L] = (F << 8) | 0xff; } // /////// // F->dFd temp = F_dFd.mult(arr[i + 1][ij - 1][F]); if (arr[i][ij][F].compareTo(temp) <= 0) { arr[i][ij][F] = temp; tau[i][ij][F] = (F << 8) | 0xff; } } // ///// // S->L temp = p[S].mult(arr[i][ij][L]); if (arr[i][ij][S].compareTo(temp) <= 0) { arr[i][ij][S] = temp; tau[i][ij][S] = (L << 8) | 0xff; } for (int k = i; k < ij; k++) { tempProd = arr[i][k][L].mult(arr[k + 1][ij][S]); // ////// // S->LS temp = S_LS.mult(tempProd); if (arr[i][ij][S].compareTo(temp) <= 0) { arr[i][ij][S] = temp; tau[i][ij][S] = (k << 16) | (L << 8) | (S); } // ////// // F->LS temp = p[F].mult(tempProd); if (arr[i][ij][F].compareTo(temp) <= 0) { arr[i][ij][F] = temp; tau[i][ij][F] = (k << 16) | (L << 8) | (S); } } } } }