Пример #1
0
  // Build the MDP and value function, as well as the Q-functions for
  // executing the greedy policy.
  public MDPSim(String prob_file, String vfun_file) {
    _mdp = new MDP(prob_file, DD.TYPE_ADD);
    _bUseBasis = vfun_file.equalsIgnoreCase("basis");
    if (_bUseBasis) {
      _mdp._valueDD = null;
    } else {
      _mdp._valueDD =
          _mdp._context.buildDDFromUnorderedTree(
              MDPConverter.ADDFileToTree(vfun_file), _mdp._tmVar2ID);

      _qfuns = new HashMap();
      Iterator i = _mdp._hmName2Action.entrySet().iterator();
      while (i.hasNext()) {

        Map.Entry me = (Map.Entry) i.next();
        Action a = (Action) me.getValue();

        //////////////////////////////////////////////////////////////
        // Regress the current value function through each action
        //////////////////////////////////////////////////////////////
        Object qfun =
            _mdp.regress(_mdp._context.remapGIDsInt(_mdp._valueDD, _mdp._hmPrimeRemap), a);
        System.out.println("Calculating Q-function for action: " + a._sName);
        // System.out.println(_mdp._context.printNode(qfun) + "\n");

        _qfuns.put(a._sName, qfun);
      }
    }
    System.out.println(_mdp);
  }
Пример #2
0
  // Determines the best action, given the Q-functions
  public Action getAction() {

    Iterator i = _qfuns.keySet().iterator();
    Action best_act = null;
    double best_val = -1d;
    while (i.hasNext()) {

      String act_name = (String) i.next();
      Object qfun = _qfuns.get(act_name);
      Action a = (Action) _mdp._hmName2Action.get(act_name);
      double val = _mdp._context.evaluate(qfun, _state);

      // System.out.println(a._sName + " -> " +_mdp._context.printNode(qfun));
      // System.out.println("  " + a._sName + " -> " + _mdp._df.format(val));
      if (val > best_val) {
        best_act = a;
        best_val = val;
      }
    }
    // System.out.println("  Chose: " + best_act._sName);

    return best_act;
  }