public double update(RealVector x_t, Action a_t, RealVector x_tp1, Action a_tp1, double r_tp1) { if (x_t == null) return initEpisode(); Action atp1_star = greedy.decide(x_tp1); RealVector phi_sa_t = toStateAction.stateAction(x_t, a_t); delta = r_tp1 + gamma * greedy.bestActionValue() - theta.dotProduct(phi_sa_t); if (a_t == at_star) e.update(gamma * lambda, phi_sa_t); else { e.clear(); e.update(0, phi_sa_t); } theta.addToSelf(alpha * delta, e.vect()); at_star = atp1_star; return delta; }
public QLearning( Action[] actions, double alpha, double gamma, double lambda, StateToStateAction toStateAction, int nbFeatures, Traces prototype) { this.alpha = alpha; this.gamma = gamma; this.lambda = lambda; this.toStateAction = toStateAction; greedy = new Greedy(this, actions, toStateAction); theta = new PVector(nbFeatures); e = prototype.newTraces(nbFeatures); }
private double initEpisode() { if (e != null) e.clear(); delta = 0.0; at_star = null; return delta; }