public EpisodeAnalysis evaluateBehavior(State s, RewardFunction rf, TerminalFunction tf) { EpisodeAnalysis res = new EpisodeAnalysis(); res.addState(s); // add initial state State cur = s; while (!tf.isTerminal(cur)) { cur = this.followAndRecordPolicy(res, cur, rf); } return res; }
public EpisodeAnalysis evaluateBehavior( State s, RewardFunction rf, TerminalFunction tf, int maxSteps) { EpisodeAnalysis res = new EpisodeAnalysis(); res.addState(s); // add initial state State cur = s; int nSteps = 0; while (!tf.isTerminal(cur) && nSteps < maxSteps) { cur = this.followAndRecordPolicy(res, cur, rf); nSteps = res.numTimeSteps(); } return res; }