/** * Fills the structure (str) string with the predicted structure according to running the CYK * algorithm and traceback on the provided sequence string (seq). The parameters should be * double[3] including the following probabilities: * * <p>params[0] = p(S->L) params[1] = p(L->s) params[2] = p(F->LS) * * @param seq - The sequence to predict from. * @param str - The string to fill with the predicted value. * @param params - The Knudson-Hein Grammar parameters as described above. * @param verbose - Display output to command line if true. */ public static String predictKH(String seq, BigDouble[] params, boolean verbose) { int size = seq.length(); BigDouble[][][] parr = new BigDouble[size][size][3]; int[][][] tau = new int[size][size][3]; String pred = null; if (size < 1) { output.out("Invalid sequence provided: \n\t[" + seq + "]"); return pred; } output.out("Predicting secondary structure for \n\t\t[" + seq + "]"); for (int i = 0; i < size; i++) for (int j = 0; j < size; j++) for (int k = 0; k < 3; k++) tau[i][j][k] = -1; // Fill Array kh_CYK(seq, params, parr, tau); BigDouble prob = parr[0][size - 1][0]; output.out("\t-KH Maximum Probability for sequence is " + prob); // Trace Back if (prob.compareTo(0) > 0) { pred = kh_trace_back(tau); output.out("\t-KH most likely structure for sequence is\n\t\t[ " + pred + " ]"); } return pred; }
public static String predictFromGrammar23S( String seq, String nat, Grammar grammar, boolean verbose) { int size = seq.length(); Map<String, BigDouble> parr = new HashMap<String, BigDouble>(); int[][][] tau = new int[size][size][3]; output.out("\t\tTau allocated"); String pred = null; if (size < 1) { output.out("\nInvalid sequence provided: \n\t[ " + seq + " ]"); return pred; } output.out("\nPredicting secondary structure for \n\t\t[ " + seq + " ]"); long start = System.currentTimeMillis(); for (int i = 0; i < size; i++) { // Progress Bar CommandLine.DisplayBar(seq.length(), i, ((long) (System.currentTimeMillis() - start) / 1000)); for (int j = i; j < size; j++) { String ij = i + ":" + (i + j); for (int k = 0; k < 3; k++) { tau[i][j][k] = -1; parr.put(ij + ":" + k, new BigDouble(0)); } } } CommandLine.DisplayBarFinish(); // Fill Array if (grammar instanceof PfoldGrammar) { PfoldGrammar pfold = (PfoldGrammar) grammar; // _23S_CYK(seq, pfold.getKH_params(), pfold // .getPfold_paramsUnmatched(), pfold // .getPfold_paramsBasePairs(), parr, tau); } BigDouble prob = parr.get("0:" + (size - 1) + ":" + 0); output.out("\t-Probability of highest probability parse for sequence is \n\t\t" + prob); if (prob != null) { // grammar.recordProbability(prob); // Trace Back if (prob.compareTo(0) > 0) { pred = kh_trace_back(tau); { output.out("\t-Highest probability parse generates structure\n\t\t[ " + pred + " ]"); if (nat != null) output.out( "\t-FMeasure for predicted structure is\n\t\t" + Compare.getFMeasureBD(nat, pred)); } } } return pred; }
/** * Fills the structure (str) string with the predicted structure according to running the CYK * algorithm and traceback on the provided sequence string (seq). The parameters should be * double[3] including the following probabilities: * * <p>params[0] = p(S->L) params[1] = p(L->s) params[2] = p(F->LS) * * @param seq - The sequence to predict from. * @param str - The string to fill with the predicted value. * @param params - The Knudson-Hein Grammar parameters as described above. * @param verbose - Display output to command line if true. */ public static String predictFromGrammar(String seq, Grammar grammar) { int size = seq.length(); BigDecimal[][][] parr = new BigDecimal[size][size][3]; int[][][] tau = new int[size][size][3]; String pred = null; if (size < 1) { return pred; } for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { for (int k = 0; k < 3; k++) { tau[i][j][k] = -1; parr[i][j][k] = new BigDecimal(0); } } } if (grammar instanceof PfoldGrammar) { PfoldGrammar pfold = (PfoldGrammar) grammar; // BigDouble[] kh_params = new BigDouble[pfold.get_kh_params().length]; // for(int i=0;i<kh_params.length;i++) // kh_params[i] = new BigDouble(pfold.get_kh_params()[i].doubleValue()); // BigDouble[] Pfold_paramsUnmatched = new // BigDouble[pfold.getPfold_paramsUnmatched().length]; // for(int i=0;i<Pfold_paramsUnmatched.length;i++) // Pfold_paramsUnmatched[i] = new // BigDouble(pfold.getPfold_paramsUnmatched()[i].doubleValue()); // BigDouble[][] Pfold_paramsBasePairs = new // BigDouble[pfold.getPfold_paramsBasePairs().length][pfold.getPfold_paramsBasePairs()[0].length]; // for(int i=0;i<Pfold_paramsBasePairs.length;i++) // for(int j=0;j<Pfold_paramsBasePairs[i].length;j++) // Pfold_paramsBasePairs[i][j] = new // BigDouble(pfold.getPfold_paramsBasePairs()[i][j].doubleValue()); BigDecimal[] kh_params = new BigDecimal[pfold.get_kh_params().length]; for (int i = 0; i < kh_params.length; i++) kh_params[i] = new BigDecimal(pfold.get_kh_params()[i].doubleValue()); BigDecimal[] Pfold_paramsUnmatched = new BigDecimal[pfold.getPfold_paramsUnmatched().length]; for (int i = 0; i < Pfold_paramsUnmatched.length; i++) Pfold_paramsUnmatched[i] = new BigDecimal(pfold.getPfold_paramsUnmatched()[i].doubleValue()); BigDecimal[][] Pfold_paramsBasePairs = new BigDecimal[pfold.getPfold_paramsBasePairs().length] [pfold.getPfold_paramsBasePairs()[0].length]; for (int i = 0; i < Pfold_paramsBasePairs.length; i++) for (int j = 0; j < Pfold_paramsBasePairs[i].length; j++) Pfold_paramsBasePairs[i][j] = new BigDecimal(pfold.getPfold_paramsBasePairs()[i][j].doubleValue()); Pfold_CYK(seq, kh_params, Pfold_paramsUnmatched, Pfold_paramsBasePairs, parr, tau); } // BigDouble prob = parr[0][size - 1][0]; BigDecimal prob = parr[0][size - 1][0]; int compVal = prob.compareTo(BigDecimal.ZERO); if (compVal > 0) pred = kh_trace_back(tau); else output.out( "Probability of highest probability parse is <= 0. (" + prob.toString() + " :: " + compVal + ")"); return pred; }