/** * Fills the structure (str) string with the predicted structure according to running the CYK * algorithm and traceback on the provided sequence string (seq). The parameters should be * double[3] including the following probabilities: * * <p>params[0] = p(S->L) params[1] = p(L->s) params[2] = p(F->LS) * * @param seq - The sequence to predict from. * @param str - The string to fill with the predicted value. * @param params - The Knudson-Hein Grammar parameters as described above. * @param verbose - Display output to command line if true. */ public static String predictFromGrammar(String seq, Grammar grammar) { int size = seq.length(); BigDecimal[][][] parr = new BigDecimal[size][size][3]; int[][][] tau = new int[size][size][3]; String pred = null; if (size < 1) { return pred; } for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { for (int k = 0; k < 3; k++) { tau[i][j][k] = -1; parr[i][j][k] = new BigDecimal(0); } } } if (grammar instanceof PfoldGrammar) { PfoldGrammar pfold = (PfoldGrammar) grammar; // BigDouble[] kh_params = new BigDouble[pfold.get_kh_params().length]; // for(int i=0;i<kh_params.length;i++) // kh_params[i] = new BigDouble(pfold.get_kh_params()[i].doubleValue()); // BigDouble[] Pfold_paramsUnmatched = new // BigDouble[pfold.getPfold_paramsUnmatched().length]; // for(int i=0;i<Pfold_paramsUnmatched.length;i++) // Pfold_paramsUnmatched[i] = new // BigDouble(pfold.getPfold_paramsUnmatched()[i].doubleValue()); // BigDouble[][] Pfold_paramsBasePairs = new // BigDouble[pfold.getPfold_paramsBasePairs().length][pfold.getPfold_paramsBasePairs()[0].length]; // for(int i=0;i<Pfold_paramsBasePairs.length;i++) // for(int j=0;j<Pfold_paramsBasePairs[i].length;j++) // Pfold_paramsBasePairs[i][j] = new // BigDouble(pfold.getPfold_paramsBasePairs()[i][j].doubleValue()); BigDecimal[] kh_params = new BigDecimal[pfold.get_kh_params().length]; for (int i = 0; i < kh_params.length; i++) kh_params[i] = new BigDecimal(pfold.get_kh_params()[i].doubleValue()); BigDecimal[] Pfold_paramsUnmatched = new BigDecimal[pfold.getPfold_paramsUnmatched().length]; for (int i = 0; i < Pfold_paramsUnmatched.length; i++) Pfold_paramsUnmatched[i] = new BigDecimal(pfold.getPfold_paramsUnmatched()[i].doubleValue()); BigDecimal[][] Pfold_paramsBasePairs = new BigDecimal[pfold.getPfold_paramsBasePairs().length] [pfold.getPfold_paramsBasePairs()[0].length]; for (int i = 0; i < Pfold_paramsBasePairs.length; i++) for (int j = 0; j < Pfold_paramsBasePairs[i].length; j++) Pfold_paramsBasePairs[i][j] = new BigDecimal(pfold.getPfold_paramsBasePairs()[i][j].doubleValue()); Pfold_CYK(seq, kh_params, Pfold_paramsUnmatched, Pfold_paramsBasePairs, parr, tau); } // BigDouble prob = parr[0][size - 1][0]; BigDecimal prob = parr[0][size - 1][0]; int compVal = prob.compareTo(BigDecimal.ZERO); if (compVal > 0) pred = kh_trace_back(tau); else output.out( "Probability of highest probability parse is <= 0. (" + prob.toString() + " :: " + compVal + ")"); return pred; }
// private static void Pfold_CYK(String seq, BigDouble[] p, BigDouble[] p_u, // BigDouble[][] p_bp, BigDouble[][][] arr, int[][][] tau) { private static void Pfold_CYK( String seq, BigDecimal[] p, BigDecimal[] p_u, BigDecimal[][] p_bp, BigDecimal[][][] arr, int[][][] tau) { int S = 0, L = 1, F = 2; int[] seq2 = PfoldGrammar.getNucleotideIndexArray(seq); BigDecimal temp, tempProd, one = new BigDecimal(1); BigDecimal S_LS, L_dFd, F_dFd; S_LS = one.subtract(p[S]); L_dFd = one.subtract(p[L]); F_dFd = one.subtract(p[F]); MathContext precision = MathContext.DECIMAL128; for (int i = 0; i < seq.length(); i++) { temp = p[L].multiply(p_u[seq2[i]]); arr[i][i][L] = p[L].multiply(p_u[seq2[i]]); tau[i][i][S] = 0; tau[i][i][L] = 0; tau[i][i][F] = 0; } System.out.println(); long start = System.currentTimeMillis(); for (int j = 0; j < seq.length(); j++) { // Progress Bar CommandLine.DisplayBar(seq.length(), j, ((long) (System.currentTimeMillis() - start) / 1000)); for (int i = 0; i + j < seq.length(); i++) { int ij = i + j; if (j > 2) { tempProd = p_bp[seq2[i]][seq2[ij]].multiply(arr[i + 1][ij - 1][F], precision); // /////// // L->dFd temp = L_dFd.multiply(tempProd, precision); if (arr[i][ij][L].compareTo(temp) <= 0) { arr[i][ij][L] = new BigDecimal(temp.toPlainString()); tau[i][ij][L] = (F << 8) | 0xff; } // /////// // F->dFd temp = F_dFd.multiply(tempProd, precision); if (arr[i][ij][F].compareTo(temp) <= 0) { arr[i][ij][F] = new BigDecimal(temp.toPlainString()); tau[i][ij][F] = (F << 8) | 0xff; } } // ///// // S->L temp = p[S].multiply(arr[i][ij][L], precision); if (arr[i][ij][S].compareTo(temp) <= 0) { arr[i][ij][S] = new BigDecimal(temp.toPlainString()); tau[i][ij][S] = (L << 8) | 0xff; } for (int k = i; k < ij; k++) { tempProd = arr[i][k][L].multiply(arr[k + 1][ij][S], precision); // ////// // S->LS temp = S_LS.multiply(tempProd, precision); if (arr[i][ij][S].compareTo(temp) <= 0) { arr[i][ij][S] = new BigDecimal(temp.toPlainString()); tau[i][ij][S] = (k << 16) | (L << 8) | (S); } // ////// // F->LS temp = p[F].multiply(tempProd, precision); if (arr[i][ij][F].compareTo(temp) <= 0) { arr[i][ij][F] = new BigDecimal(temp.toPlainString()); tau[i][ij][F] = (k << 16) | (L << 8) | (S); } } } } CommandLine.DisplayBarFinish(); }