/** * parse parameters of all entries * * @param in */ private svm_problem parsePara(ArrayList<TrainingEntry> in, String taxo) { int num = 0; for (TrainingEntry t : in) num += t.NumOfMi(); svm_node[][] data = new svm_node[num][]; int i = 0; for (TrainingEntry mi : in) { pri = mi.getPriRNA(); pri.setPriPara(); for (int j = 0; j < mi.NumOfMi(); j++) { pri.process(mi.getStart(j), mi.getSize(j), mi.getStrand(j)); ps = new ParaSummary(pri, pri.getPreRNA(), pri.getMiRNA()); data[i++] = svm.predictMatrix(mi.getLabel(j), ps.paraMatrix(taxo)); } } return svm.trainMatrix(data); }
public ArrayList<TrainingEntry> negativeData(ArrayList<TrainingEntry> pos, int level) { Random r = new Random(); // random number producer int rS = 0; ArrayList<TrainingEntry> nes = new ArrayList<TrainingEntry>(); TrainingEntry ne; int strand = 0; int negative = 0; for (TrainingEntry po : pos) { ne = new TrainingEntry(); ne.setPriRNA(po.getPriRNA()); int end5 = po.getPriRNA().getStr().lastIndexOf("("); // from 0 int start3 = po.getPriRNA().getStr().indexOf(")"); // from 0 int endP5 = end5 + (start3 - end5 - 1) / 2; // 5' endpoint of hairpin,count from 0 int endP3 = start3 - (start3 - end5 - 1) / 2; // 3' endpoint of hairpin, count from 0 int num = po.NumOfMi(); // the number of postive mi int size = po.getPriRNA().getLength(); // the size of pri in the entry for (int n = 0; n < level; n++) { // each level for (int i = 0; i < num; i++) { // each miRNA in the entry int miL = po.getSize(i); // the positive mi size HashMap bases = new HashMap(); int flag = 0; while (bases.size() < size) { // the negative mi start rS = r.nextInt(size); // get a random start if (bases.containsKey(rS)) continue; bases.put(rS, null); // store the positions which have been used // the random start is at least 5-bp from the true start positions on the entry int j; for (j = 0; j < num; j++) { if (Math.abs(po.getStart(i) - rS) < 5) break; } if (j < num) continue; // if the random start at 5' strand if ((rS < endP5 && rS + miL - 1 <= endP5)) { strand = 5; flag = 1; break; } // if the random start at 3' strand else if (rS >= endP3 && rS + miL < size) { strand = 3; flag = 1; break; } // else continue; } if (flag == 1) { ne.addMiEntry(rS, miL, strand, 0); // for the fasta format mirna data // ne.setMiId(po.getMiId(i)+"n"+n); // n_fas.put(ne.getMiId(i), ne.getMiSeq(i)); negative++; } else System.out.println("no negative data found on " + po.getPriRNA().getId()); } } nes.add(ne); } System.out.println("Negative Entries: " + negative); return nes; }