/** * parse parameters of all entries * * @param in */ private svm_problem parsePara(ArrayList<TrainingEntry> in, String taxo) { int num = 0; for (TrainingEntry t : in) num += t.NumOfMi(); svm_node[][] data = new svm_node[num][]; int i = 0; for (TrainingEntry mi : in) { pri = mi.getPriRNA(); pri.setPriPara(); for (int j = 0; j < mi.NumOfMi(); j++) { pri.process(mi.getStart(j), mi.getSize(j), mi.getStrand(j)); ps = new ParaSummary(pri, pri.getPreRNA(), pri.getMiRNA()); data[i++] = svm.predictMatrix(mi.getLabel(j), ps.paraMatrix(taxo)); } } return svm.trainMatrix(data); }
public ArrayList<TrainingEntry> positiveData(ArrayList<MiRBaseEntry> milist, int subnum) { System.out.print("Reads miRBase data "); ArrayList<TrainingEntry> pos = new ArrayList<TrainingEntry>(); int num = milist.size(); ArrayList<Integer> rmis = randomization(num, num); // randomizate the miRBase entries MfeFold doFold; int evNum = 0; // number of experimental verified miRNAs int uevNum = 0; // number of unexperimental verified miRNAs int positive = 0; // number of positive mirna int n = 0; for (Integer m : rmis) { if (subnum > 0 && positive >= subnum) break; // get subnum mirans MiRBaseEntry mi = milist.get(m); TrainingEntry te = new TrainingEntry(); pri = new PriMiRNA(mi.getPriAccession(), mi.getPriSequence()); // fold pri-miRNA doFold = new MfeFold(pri.getSeq()); doFold.cal(); pri.setStr(doFold.getStructure()); pri.setEnergy(doFold.getEnergy()); te.setPriRNA(pri); // store pri int end5 = pri.getStr().lastIndexOf("(") + 1; // 5' end position of hairpin,count from 1 int start3 = pri.getStr().indexOf(")") + 1; // 3' start position of hairpin,count from 1 for (int i = 0; i < mi.getMatNum(); i++) { // the miRNA should be experimental verified miRNA if (mi.getEvidence(i).equals("experimental")) { evNum += 1; // the miRNA should not have two or more loops if (end5 >= start3) continue; int strand; if (mi.getMatEnd(i) <= end5) strand = 5; // lie in 5' else if (mi.getMatStart(i) >= start3) strand = 3; // lie in 3' else continue; // the miRNA should not lie in loop area te.addMiEntry(mi.getMatStart(i) - 1, mi.getMatEnd(i) - mi.getMatStart(i) + 1, strand, 1); // for the fasta format mirna data // te.setMiId(mi.getMatID(i)); // p_fas.put(te.getMiId(te.NumOfMi()-1), te.getMiSeq(te.NumOfMi()-1)); positive++; } else uevNum += 1; } // add positive data if (te.NumOfMi() > 0) pos.add(te); n++; System.out.print(n + ":" + mi.getPriID() + backspace(n + ":" + mi.getPriID())); } System.out.println(); // report System.out.println("Loads " + (evNum + uevNum) + " Entries"); System.out.println("Experimental verified Entries: " + evNum); System.out.println("Non-experimental verified Entries: " + uevNum); System.out.println("Positive Entries: " + positive); return pos; }