public Reward_observation_terminal env_step(Action theAction) { // Bail if we're terminal boolean terminal = taskTracker.isTerminal(); if (terminal) { currentState = envSimulator.getODEState(); return makeRewardObservation(0.0, terminal); } // BT: This code is expecting a double array called action, I'll give it to them double[] action = theAction.doubleArray; List<Compartment> compartments = arm.getCompartments(); /* guard against oversized or undersized action arrays */ if (useDiscreteActions) { int theDiscreteAction = theAction.intArray[0]; action = new double[2 + compartments.size() * 3]; handleIntAction(theDiscreteAction, compartments.size(), action); } double actionSum = 0.0d; for (int i = 0; i < action.length; i++) { actionSum += action[i]; } ArmBase base = arm.getBase(); base.setAction(action[0], action[1]); for (int i = 0; i < compartments.size(); i++) { compartments .get(i) .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]); } ODEState odeState = envSimulator.getODEState(); double timeStep = .2; odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep); envSimulator.setODEState(odeState); currentState = odeState; taskTracker.update(); terminal = taskTracker.isTerminal(); for (EnvironmentObserver o : observers) { o.stateChanged(taskTracker.getReward()); if (terminal) { o.episodeFinished(); } } double reward = taskTracker.getReward() - actionSum / (double) action.length; // want to add a small penality for actions; return makeRewardObservation(reward, terminal); }
public boolean check() { boolean hit = subObjectives.get(current).check(); if (hit && subObjectives.get(current).isAccomplished()) { subObjectives.get(current).unhighlight(); current++; if (current < subObjectives.size()) { subObjectives.get(current).highlight(); } else { accomplished = true; } } return hit; }
public SequenceTracker(SequenceSpec spec) { subObjectives = new ArrayList<ObjectiveTracker>(); for (ObjectiveSpec os : spec.getObjective()) { subObjectives.add(makeObjectiveTracker(os)); } current = 0; accomplished = false; }
public void highlight() { subObjectives.get(0).highlight(); for (int i = 1; i < subObjectives.size(); i++) { subObjectives.get(i).unhighlight(); } }
/* observation support (needed for display) */ public void addObserver(EnvironmentObserver o) { observers.add(o); }