示例#1
0
  /**
   * parse parameters of all entries
   *
   * @param in
   */
  private svm_problem parsePara(ArrayList<TrainingEntry> in, String taxo) {

    int num = 0;
    for (TrainingEntry t : in) num += t.NumOfMi();

    svm_node[][] data = new svm_node[num][];
    int i = 0;
    for (TrainingEntry mi : in) {
      pri = mi.getPriRNA();
      pri.setPriPara();
      for (int j = 0; j < mi.NumOfMi(); j++) {
        pri.process(mi.getStart(j), mi.getSize(j), mi.getStrand(j));
        ps = new ParaSummary(pri, pri.getPreRNA(), pri.getMiRNA());
        data[i++] = svm.predictMatrix(mi.getLabel(j), ps.paraMatrix(taxo));
      }
    }
    return svm.trainMatrix(data);
  }
示例#2
0
  public ArrayList<TrainingEntry> negativeData(ArrayList<TrainingEntry> pos, int level) {
    Random r = new Random(); // random number producer
    int rS = 0;
    ArrayList<TrainingEntry> nes = new ArrayList<TrainingEntry>();
    TrainingEntry ne;
    int strand = 0;
    int negative = 0;
    for (TrainingEntry po : pos) {
      ne = new TrainingEntry();
      ne.setPriRNA(po.getPriRNA());
      int end5 = po.getPriRNA().getStr().lastIndexOf("("); // from 0
      int start3 = po.getPriRNA().getStr().indexOf(")"); // from 0
      int endP5 = end5 + (start3 - end5 - 1) / 2; // 5' endpoint of hairpin,count from 0
      int endP3 = start3 - (start3 - end5 - 1) / 2; // 3' endpoint of hairpin, count from 0

      int num = po.NumOfMi(); // the number of postive mi
      int size = po.getPriRNA().getLength(); // the size of pri in the entry
      for (int n = 0; n < level; n++) { // each level
        for (int i = 0; i < num; i++) { // each miRNA in the entry
          int miL = po.getSize(i); // the positive mi size
          HashMap bases = new HashMap();
          int flag = 0;
          while (bases.size() < size) { // the negative mi start
            rS = r.nextInt(size); // get a random start
            if (bases.containsKey(rS)) continue;
            bases.put(rS, null); // store the positions which have been used
            // the random start is at least 5-bp from the true start positions on the entry
            int j;
            for (j = 0; j < num; j++) {
              if (Math.abs(po.getStart(i) - rS) < 5) break;
            }
            if (j < num) continue;

            // if the random start at 5' strand
            if ((rS < endP5 && rS + miL - 1 <= endP5)) {
              strand = 5;
              flag = 1;
              break;
            }
            // if the random start at 3' strand
            else if (rS >= endP3 && rS + miL < size) {
              strand = 3;
              flag = 1;
              break;
            }
            // else continue;
          }
          if (flag == 1) {
            ne.addMiEntry(rS, miL, strand, 0);
            // for the fasta format mirna data
            //                        ne.setMiId(po.getMiId(i)+"n"+n);
            //                        n_fas.put(ne.getMiId(i), ne.getMiSeq(i));

            negative++;
          } else System.out.println("no negative data found on " + po.getPriRNA().getId());
        }
      }
      nes.add(ne);
    }
    System.out.println("Negative Entries: " + negative);
    return nes;
  }