Ejemplo n.º 1
0
  public Reward_observation_terminal env_step(Action theAction) {
    // Bail if we're terminal
    boolean terminal = taskTracker.isTerminal();
    if (terminal) {
      currentState = envSimulator.getODEState();
      return makeRewardObservation(0.0, terminal);
    }

    // BT: This code is expecting a double array called action, I'll give it to them
    double[] action = theAction.doubleArray;
    List<Compartment> compartments = arm.getCompartments();
    /* guard against oversized or undersized action arrays */

    if (useDiscreteActions) {
      int theDiscreteAction = theAction.intArray[0];
      action = new double[2 + compartments.size() * 3];
      handleIntAction(theDiscreteAction, compartments.size(), action);
    }
    double actionSum = 0.0d;
    for (int i = 0; i < action.length; i++) {
      actionSum += action[i];
    }
    ArmBase base = arm.getBase();
    base.setAction(action[0], action[1]);

    for (int i = 0; i < compartments.size(); i++) {
      compartments
          .get(i)
          .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]);
    }

    ODEState odeState = envSimulator.getODEState();
    double timeStep = .2;
    odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep);
    envSimulator.setODEState(odeState);
    currentState = odeState;

    taskTracker.update();

    terminal = taskTracker.isTerminal();
    for (EnvironmentObserver o : observers) {
      o.stateChanged(taskTracker.getReward());
      if (terminal) {
        o.episodeFinished();
      }
    }
    double reward = taskTracker.getReward() - actionSum / (double) action.length;
    // want to add a small penality for actions;

    return makeRewardObservation(reward, terminal);
  }
Ejemplo n.º 2
0
      public boolean check() {
        boolean hit = subObjectives.get(current).check();
        if (hit && subObjectives.get(current).isAccomplished()) {
          subObjectives.get(current).unhighlight();
          current++;

          if (current < subObjectives.size()) {
            subObjectives.get(current).highlight();
          } else {
            accomplished = true;
          }
        }
        return hit;
      }
Ejemplo n.º 3
0
 public SequenceTracker(SequenceSpec spec) {
   subObjectives = new ArrayList<ObjectiveTracker>();
   for (ObjectiveSpec os : spec.getObjective()) {
     subObjectives.add(makeObjectiveTracker(os));
   }
   current = 0;
   accomplished = false;
 }
Ejemplo n.º 4
0
 public void highlight() {
   subObjectives.get(0).highlight();
   for (int i = 1; i < subObjectives.size(); i++) {
     subObjectives.get(i).unhighlight();
   }
 }
Ejemplo n.º 5
0
 /* observation support (needed for display) */
 public void addObserver(EnvironmentObserver o) {
   observers.add(o);
 }