Exemplo n.º 1
0
  public ArrayList<TrainingEntry> positiveData(ArrayList<MiRBaseEntry> milist, int subnum) {
    System.out.print("Reads miRBase data ");

    ArrayList<TrainingEntry> pos = new ArrayList<TrainingEntry>();

    int num = milist.size();
    ArrayList<Integer> rmis = randomization(num, num); // randomizate the miRBase entries

    MfeFold doFold;

    int evNum = 0; // number of experimental verified miRNAs
    int uevNum = 0; // number of unexperimental verified miRNAs

    int positive = 0; // number of positive mirna
    int n = 0;
    for (Integer m : rmis) {

      if (subnum > 0 && positive >= subnum) break; // get subnum mirans

      MiRBaseEntry mi = milist.get(m);

      TrainingEntry te = new TrainingEntry();

      pri = new PriMiRNA(mi.getPriAccession(), mi.getPriSequence());
      // fold pri-miRNA
      doFold = new MfeFold(pri.getSeq());
      doFold.cal();
      pri.setStr(doFold.getStructure());
      pri.setEnergy(doFold.getEnergy());

      te.setPriRNA(pri); // store pri

      int end5 = pri.getStr().lastIndexOf("(") + 1; // 5' end position of hairpin,count from 1
      int start3 = pri.getStr().indexOf(")") + 1; // 3' start position of hairpin,count from 1

      for (int i = 0; i < mi.getMatNum(); i++) {
        // the miRNA should be experimental verified miRNA
        if (mi.getEvidence(i).equals("experimental")) {
          evNum += 1;

          // the miRNA should not have two or more loops
          if (end5 >= start3) continue;

          int strand;
          if (mi.getMatEnd(i) <= end5) strand = 5; // lie in 5'
          else if (mi.getMatStart(i) >= start3) strand = 3; // lie in 3'
          else continue; // the miRNA should not lie in loop area

          te.addMiEntry(mi.getMatStart(i) - 1, mi.getMatEnd(i) - mi.getMatStart(i) + 1, strand, 1);
          // for the fasta format mirna data
          //                    te.setMiId(mi.getMatID(i));
          //                    p_fas.put(te.getMiId(te.NumOfMi()-1), te.getMiSeq(te.NumOfMi()-1));

          positive++;
        } else uevNum += 1;
      }
      // add positive data
      if (te.NumOfMi() > 0) pos.add(te);

      n++;
      System.out.print(n + ":" + mi.getPriID() + backspace(n + ":" + mi.getPriID()));
    }
    System.out.println();
    // report
    System.out.println("Loads " + (evNum + uevNum) + " Entries");
    System.out.println("Experimental verified Entries: " + evNum);
    System.out.println("Non-experimental verified Entries: " + uevNum);
    System.out.println("Positive Entries: " + positive);
    return pos;
  }
Exemplo n.º 2
0
  public ArrayList<TrainingEntry> negativeData(ArrayList<TrainingEntry> pos, int level) {
    Random r = new Random(); // random number producer
    int rS = 0;
    ArrayList<TrainingEntry> nes = new ArrayList<TrainingEntry>();
    TrainingEntry ne;
    int strand = 0;
    int negative = 0;
    for (TrainingEntry po : pos) {
      ne = new TrainingEntry();
      ne.setPriRNA(po.getPriRNA());
      int end5 = po.getPriRNA().getStr().lastIndexOf("("); // from 0
      int start3 = po.getPriRNA().getStr().indexOf(")"); // from 0
      int endP5 = end5 + (start3 - end5 - 1) / 2; // 5' endpoint of hairpin,count from 0
      int endP3 = start3 - (start3 - end5 - 1) / 2; // 3' endpoint of hairpin, count from 0

      int num = po.NumOfMi(); // the number of postive mi
      int size = po.getPriRNA().getLength(); // the size of pri in the entry
      for (int n = 0; n < level; n++) { // each level
        for (int i = 0; i < num; i++) { // each miRNA in the entry
          int miL = po.getSize(i); // the positive mi size
          HashMap bases = new HashMap();
          int flag = 0;
          while (bases.size() < size) { // the negative mi start
            rS = r.nextInt(size); // get a random start
            if (bases.containsKey(rS)) continue;
            bases.put(rS, null); // store the positions which have been used
            // the random start is at least 5-bp from the true start positions on the entry
            int j;
            for (j = 0; j < num; j++) {
              if (Math.abs(po.getStart(i) - rS) < 5) break;
            }
            if (j < num) continue;

            // if the random start at 5' strand
            if ((rS < endP5 && rS + miL - 1 <= endP5)) {
              strand = 5;
              flag = 1;
              break;
            }
            // if the random start at 3' strand
            else if (rS >= endP3 && rS + miL < size) {
              strand = 3;
              flag = 1;
              break;
            }
            // else continue;
          }
          if (flag == 1) {
            ne.addMiEntry(rS, miL, strand, 0);
            // for the fasta format mirna data
            //                        ne.setMiId(po.getMiId(i)+"n"+n);
            //                        n_fas.put(ne.getMiId(i), ne.getMiSeq(i));

            negative++;
          } else System.out.println("no negative data found on " + po.getPriRNA().getId());
        }
      }
      nes.add(ne);
    }
    System.out.println("Negative Entries: " + negative);
    return nes;
  }