public Observation env_start() { taskTracker.reset(); envSimulator.setODEState(initialState); currentState = initialState; for (EnvironmentObserver o : observers) { o.episodeStarted(); } return makeObservation(); }
public Reward_observation_terminal env_step(Action theAction) { // Bail if we're terminal boolean terminal = taskTracker.isTerminal(); if (terminal) { currentState = envSimulator.getODEState(); return makeRewardObservation(0.0, terminal); } // BT: This code is expecting a double array called action, I'll give it to them double[] action = theAction.doubleArray; List<Compartment> compartments = arm.getCompartments(); /* guard against oversized or undersized action arrays */ if (useDiscreteActions) { int theDiscreteAction = theAction.intArray[0]; action = new double[2 + compartments.size() * 3]; handleIntAction(theDiscreteAction, compartments.size(), action); } double actionSum = 0.0d; for (int i = 0; i < action.length; i++) { actionSum += action[i]; } ArmBase base = arm.getBase(); base.setAction(action[0], action[1]); for (int i = 0; i < compartments.size(); i++) { compartments .get(i) .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]); } ODEState odeState = envSimulator.getODEState(); double timeStep = .2; odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep); envSimulator.setODEState(odeState); currentState = odeState; taskTracker.update(); terminal = taskTracker.isTerminal(); for (EnvironmentObserver o : observers) { o.stateChanged(taskTracker.getReward()); if (terminal) { o.episodeFinished(); } } double reward = taskTracker.getReward() - actionSum / (double) action.length; // want to add a small penality for actions; return makeRewardObservation(reward, terminal); }