示例#1
0
  public EpisodeAnalysis evaluateBehavior(State s, RewardFunction rf, TerminalFunction tf) {
    EpisodeAnalysis res = new EpisodeAnalysis();
    res.addState(s); // add initial state

    State cur = s;
    while (!tf.isTerminal(cur)) {
      cur = this.followAndRecordPolicy(res, cur, rf);
    }

    return res;
  }
示例#2
0
  public EpisodeAnalysis evaluateBehavior(State s, RewardFunction rf, int numSteps) {
    EpisodeAnalysis res = new EpisodeAnalysis();
    res.addState(s);

    State cur = s;
    int nSteps = 0;
    while (nSteps < numSteps) {

      cur = this.followAndRecordPolicy(res, cur, rf);

      nSteps = res.numTimeSteps();
    }

    return res;
  }
示例#3
0
  public EpisodeAnalysis evaluateBehavior(
      State s, RewardFunction rf, TerminalFunction tf, int maxSteps) {
    EpisodeAnalysis res = new EpisodeAnalysis();
    res.addState(s); // add initial state

    State cur = s;
    int nSteps = 0;
    while (!tf.isTerminal(cur) && nSteps < maxSteps) {

      cur = this.followAndRecordPolicy(res, cur, rf);

      nSteps = res.numTimeSteps();
    }

    return res;
  }
示例#4
0
  private State followAndRecordPolicy(EpisodeAnalysis ea, State cur, RewardFunction rf) {

    State next = null;

    // follow policy
    GroundedAction ga = this.getAction(cur);
    if (ga.action.isPrimitive() || !this.evaluateDecomposesOptions) {
      next = ga.executeIn(cur);
      double r = rf.reward(cur, ga, next);

      // record result
      ea.recordTransitionTo(next, ga, r);
    } else {
      // then we need to decompose the option
      Option o = (Option) ga.action;
      o.initiateInState(cur, ga.params);
      int ns = 0;
      do {
        // do step of option
        GroundedAction cga = o.oneStepActionSelection(cur, ga.params);
        next = cga.executeIn(cur);
        double r = rf.reward(cur, cga, next);

        if (annotateOptionDecomposition) {
          // setup a null action to record the option and primitive action taken
          NullAction annotatedPrimitive =
              new NullAction(o.getName() + "(" + ns + ")-" + cga.action.getName());
          GroundedAction annotatedPrimitiveGA = new GroundedAction(annotatedPrimitive, cga.params);

          // record it
          ea.recordTransitionTo(next, annotatedPrimitiveGA, r);
        } else {
          // otherwise just record the primitive that was taken
          ea.recordTransitionTo(next, cga, r);
        }

        cur = next;
        ns++;

      } while (o.continueFromState(cur, ga.params));
    }

    // return outcome state
    return next;
  }