Beispiel #1
0
  public Reward_observation_terminal env_step(Action theAction) {
    // Bail if we're terminal
    boolean terminal = taskTracker.isTerminal();
    if (terminal) {
      currentState = envSimulator.getODEState();
      return makeRewardObservation(0.0, terminal);
    }

    // BT: This code is expecting a double array called action, I'll give it to them
    double[] action = theAction.doubleArray;
    List<Compartment> compartments = arm.getCompartments();
    /* guard against oversized or undersized action arrays */

    if (useDiscreteActions) {
      int theDiscreteAction = theAction.intArray[0];
      action = new double[2 + compartments.size() * 3];
      handleIntAction(theDiscreteAction, compartments.size(), action);
    }
    double actionSum = 0.0d;
    for (int i = 0; i < action.length; i++) {
      actionSum += action[i];
    }
    ArmBase base = arm.getBase();
    base.setAction(action[0], action[1]);

    for (int i = 0; i < compartments.size(); i++) {
      compartments
          .get(i)
          .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]);
    }

    ODEState odeState = envSimulator.getODEState();
    double timeStep = .2;
    odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep);
    envSimulator.setODEState(odeState);
    currentState = odeState;

    taskTracker.update();

    terminal = taskTracker.isTerminal();
    for (EnvironmentObserver o : observers) {
      o.stateChanged(taskTracker.getReward());
      if (terminal) {
        o.episodeFinished();
      }
    }
    double reward = taskTracker.getReward() - actionSum / (double) action.length;
    // want to add a small penality for actions;

    return makeRewardObservation(reward, terminal);
  }