Example #1
0
  /**
   * parse parameters of all entries
   *
   * @param in
   */
  private svm_problem parsePara(ArrayList<TrainingEntry> in, String taxo) {

    int num = 0;
    for (TrainingEntry t : in) num += t.NumOfMi();

    svm_node[][] data = new svm_node[num][];
    int i = 0;
    for (TrainingEntry mi : in) {
      pri = mi.getPriRNA();
      pri.setPriPara();
      for (int j = 0; j < mi.NumOfMi(); j++) {
        pri.process(mi.getStart(j), mi.getSize(j), mi.getStrand(j));
        ps = new ParaSummary(pri, pri.getPreRNA(), pri.getMiRNA());
        data[i++] = svm.predictMatrix(mi.getLabel(j), ps.paraMatrix(taxo));
      }
    }
    return svm.trainMatrix(data);
  }
Example #2
0
  public ArrayList<TrainingEntry> positiveData(ArrayList<MiRBaseEntry> milist, int subnum) {
    System.out.print("Reads miRBase data ");

    ArrayList<TrainingEntry> pos = new ArrayList<TrainingEntry>();

    int num = milist.size();
    ArrayList<Integer> rmis = randomization(num, num); // randomizate the miRBase entries

    MfeFold doFold;

    int evNum = 0; // number of experimental verified miRNAs
    int uevNum = 0; // number of unexperimental verified miRNAs

    int positive = 0; // number of positive mirna
    int n = 0;
    for (Integer m : rmis) {

      if (subnum > 0 && positive >= subnum) break; // get subnum mirans

      MiRBaseEntry mi = milist.get(m);

      TrainingEntry te = new TrainingEntry();

      pri = new PriMiRNA(mi.getPriAccession(), mi.getPriSequence());
      // fold pri-miRNA
      doFold = new MfeFold(pri.getSeq());
      doFold.cal();
      pri.setStr(doFold.getStructure());
      pri.setEnergy(doFold.getEnergy());

      te.setPriRNA(pri); // store pri

      int end5 = pri.getStr().lastIndexOf("(") + 1; // 5' end position of hairpin,count from 1
      int start3 = pri.getStr().indexOf(")") + 1; // 3' start position of hairpin,count from 1

      for (int i = 0; i < mi.getMatNum(); i++) {
        // the miRNA should be experimental verified miRNA
        if (mi.getEvidence(i).equals("experimental")) {
          evNum += 1;

          // the miRNA should not have two or more loops
          if (end5 >= start3) continue;

          int strand;
          if (mi.getMatEnd(i) <= end5) strand = 5; // lie in 5'
          else if (mi.getMatStart(i) >= start3) strand = 3; // lie in 3'
          else continue; // the miRNA should not lie in loop area

          te.addMiEntry(mi.getMatStart(i) - 1, mi.getMatEnd(i) - mi.getMatStart(i) + 1, strand, 1);
          // for the fasta format mirna data
          //                    te.setMiId(mi.getMatID(i));
          //                    p_fas.put(te.getMiId(te.NumOfMi()-1), te.getMiSeq(te.NumOfMi()-1));

          positive++;
        } else uevNum += 1;
      }
      // add positive data
      if (te.NumOfMi() > 0) pos.add(te);

      n++;
      System.out.print(n + ":" + mi.getPriID() + backspace(n + ":" + mi.getPriID()));
    }
    System.out.println();
    // report
    System.out.println("Loads " + (evNum + uevNum) + " Entries");
    System.out.println("Experimental verified Entries: " + evNum);
    System.out.println("Non-experimental verified Entries: " + uevNum);
    System.out.println("Positive Entries: " + positive);
    return pos;
  }