示例#1
0
文件: TD.java 项目: pilarski/rlpark
 public double update(RealVector x_t, RealVector x_tp1, double r_tp1, double gamma_tp1) {
   if (x_t == null) return initEpisode();
   v_t = v.dotProduct(x_t);
   delta_t = r_tp1 + gamma_tp1 * v.dotProduct(x_tp1) - v_t;
   v.addToSelf(alpha_v * delta_t, x_t);
   return delta_t;
 }
示例#2
0
 public double update(RealVector x_t, Action a_t, RealVector x_tp1, Action a_tp1, double r_tp1) {
   if (x_t == null) return initEpisode();
   Action atp1_star = greedy.decide(x_tp1);
   RealVector phi_sa_t = toStateAction.stateAction(x_t, a_t);
   delta = r_tp1 + gamma * greedy.bestActionValue() - theta.dotProduct(phi_sa_t);
   if (a_t == at_star) e.update(gamma * lambda, phi_sa_t);
   else {
     e.clear();
     e.update(0, phi_sa_t);
   }
   theta.addToSelf(alpha * delta, e.vect());
   at_star = atp1_star;
   return delta;
 }
示例#3
0
文件: TD.java 项目: pilarski/rlpark
 @Override
 public double predict(RealVector phi) {
   return v.dotProduct(phi);
 }
示例#4
0
 @Override
 public void resetWeight(int index) {
   theta.setEntry(index, 0);
 }
示例#5
0
 @Override
 public double predict(RealVector phi_sa) {
   return theta.dotProduct(phi_sa);
 }