예제 #1
0
  // Derives QFunctions for the given value function and simulates the
  // greedy policy for the given number of trials and steps per trial.
  // Returns final value of every trial.
  public ArrayList simulate(int trials, int steps, long rand_seed) {
    ArrayList values = new ArrayList();
    _r = new Random(rand_seed);

    for (int trial = 1; trial <= trials; trial++) {

      System.out.println("\n -----------\n   Trial " + trial + "\n -----------");

      // Initialize state
      _state = new ArrayList();
      _nVars = _mdp._alVars.size();
      for (int c = 0; c < (_nVars << 1); c++) {
        _state.add("-");
      }
      Iterator i = _mdp._alVars.iterator();
      _vars = new TreeSet();
      while (i.hasNext()) {
        String s = (String) i.next();
        if (!s.endsWith("\'")) {
          Integer gid = (Integer) _mdp._tmVar2ID.get(s);
          _vars.add(gid);

          // Note: assign level (level is gid-1 b/c gids in order)
          _state.set(gid.intValue() - 1, _r.nextBoolean() ? TRUE : FALSE);
        }
      }
      // System.out.println(_mdp._context.printNode(_mdp._valueDD) + "\n" + _state);
      double reward = _mdp._context.evaluate(_mdp._rewardDD, _state);
      System.out.print(" " + PrintState(_state) + "  " + MDP._df.format(reward));

      // Run steps
      for (int step = 1; step <= steps; step++) {

        // Get action
        Action a;
        if (_bUseBasis) {
          a = getBasisAction();
        } else {
          a = getAction();
        }

        // Execute action
        executeAction(a);

        // Update reward
        reward =
            (_mdp._bdDiscount.doubleValue() * reward)
                + _mdp._context.evaluate(_mdp._rewardDD, _state);

        System.out.println(", a=" + a._sName);
        System.out.print(
            " " + PrintState(_state) + "  " + MDP._df.format(reward) + ": " + "Step " + step);
      }
      values.add(new Double(reward));
      System.out.println();
    }

    return values;
  }
예제 #2
0
  // Executes the given action
  public void executeAction(Action a) {

    // For each state variable, retrieve the DD for it, evaluate it
    // w.r.t. the current state and build a new state.
    ArrayList new_state = new ArrayList();
    int c;
    for (c = 0; c < (_nVars << 1); c++) {
      new_state.add("-");
    }
    for (c = 0; c < (_nVars << 1); c++) {
      Object cur_assign = _state.get(c);
      if (cur_assign instanceof Boolean) {
        // System.out.println(a._tmID2DD);
        int nonprime_id = c + 1;
        int prime_id = nonprime_id - _nVars;
        Object DD = a._tmID2DD.get(new Integer(prime_id));
        _state.set(prime_id - 1, TRUE);
        double p_true = _mdp._context.evaluate(DD, _state);
        // System.out.println("ID: " + nonprime_id + ", " + prime_id + ": " +
        //		   _mdp._context.printNode(DD) + " -> " +
        //		   _mdp._df.format(p_true));
        new_state.set(c, (_r.nextFloat() < p_true) ? TRUE : FALSE);
      }
    }

    _state = new_state;
  }