public double update(RealVector x_t, RealVector x_tp1, double r_tp1, double gamma_tp1) { if (x_t == null) return initEpisode(); v_t = v.dotProduct(x_t); delta_t = r_tp1 + gamma_tp1 * v.dotProduct(x_tp1) - v_t; v.addToSelf(alpha_v * delta_t, x_t); return delta_t; }
public double update(RealVector x_t, Action a_t, RealVector x_tp1, Action a_tp1, double r_tp1) { if (x_t == null) return initEpisode(); Action atp1_star = greedy.decide(x_tp1); RealVector phi_sa_t = toStateAction.stateAction(x_t, a_t); delta = r_tp1 + gamma * greedy.bestActionValue() - theta.dotProduct(phi_sa_t); if (a_t == at_star) e.update(gamma * lambda, phi_sa_t); else { e.clear(); e.update(0, phi_sa_t); } theta.addToSelf(alpha * delta, e.vect()); at_star = atp1_star; return delta; }
@Override public double predict(RealVector phi) { return v.dotProduct(phi); }
@Override public void resetWeight(int index) { theta.setEntry(index, 0); }
@Override public double predict(RealVector phi_sa) { return theta.dotProduct(phi_sa); }