public Reward_observation_terminal env_step(Action theAction) { // Bail if we're terminal boolean terminal = taskTracker.isTerminal(); if (terminal) { currentState = envSimulator.getODEState(); return makeRewardObservation(0.0, terminal); } // BT: This code is expecting a double array called action, I'll give it to them double[] action = theAction.doubleArray; List<Compartment> compartments = arm.getCompartments(); /* guard against oversized or undersized action arrays */ if (useDiscreteActions) { int theDiscreteAction = theAction.intArray[0]; action = new double[2 + compartments.size() * 3]; handleIntAction(theDiscreteAction, compartments.size(), action); } double actionSum = 0.0d; for (int i = 0; i < action.length; i++) { actionSum += action[i]; } ArmBase base = arm.getBase(); base.setAction(action[0], action[1]); for (int i = 0; i < compartments.size(); i++) { compartments .get(i) .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]); } ODEState odeState = envSimulator.getODEState(); double timeStep = .2; odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep); envSimulator.setODEState(odeState); currentState = odeState; taskTracker.update(); terminal = taskTracker.isTerminal(); for (EnvironmentObserver o : observers) { o.stateChanged(taskTracker.getReward()); if (terminal) { o.episodeFinished(); } } double reward = taskTracker.getReward() - actionSum / (double) action.length; // want to add a small penality for actions; return makeRewardObservation(reward, terminal); }
public TaskDescription getTaskSpec() { int numCompartments = arm.getCompartments().size(); int numFood = allFood.size(); int numStateVars = 2 + 4 * (2 * numCompartments + numFood); int numActionVars = 3 * numCompartments; return new TaskDescription(numStateVars, numActionVars); }
private String makeTaskSpec() { double minXPos = -12.0d; double maxXPos = 12.0d; double minYPos = -12.0d; double maxYPos = 6.0d; double minVel = -1.0d; double maxVel = 1.0d; double minAction = 0.0d; double maxAction = 1.0d; TaskDescription ts = getTaskSpec(); TaskSpecVRLGLUE3 theTaskSpecObject = new TaskSpecVRLGLUE3(); theTaskSpecObject.setEpisodic(); theTaskSpecObject.setDiscountFactor(1.0d); // Angle of the arm theTaskSpecObject.addContinuousObservation(new DoubleRange(-4.0d * Math.PI, 4.0d * Math.PI)); // Angular Velocity theTaskSpecObject.addContinuousObservation(new DoubleRange(-1.0d, 1.0d)); // Now we want to add the variable for all the compartments for (int i = 0; i < 4 * (2 * arm.getCompartments().size() + allFood.size()); i++) { if (i % 4 == 0) { theTaskSpecObject.addContinuousObservation(new DoubleRange(minXPos, maxXPos)); } if (i % 4 == 1) { theTaskSpecObject.addContinuousObservation(new DoubleRange(minYPos, maxYPos)); } if (i % 4 == 2 || i % 4 == 3) { theTaskSpecObject.addContinuousObservation(new DoubleRange(minVel, maxVel)); } } int NDA = 8; if (useDiscreteActions) { theTaskSpecObject.addDiscreteAction(new IntRange(0, NDA - 1)); } else { theTaskSpecObject.addContinuousAction( (new DoubleRange(minAction, maxAction, ts.getNumActionVariables()))); } theTaskSpecObject.setRewardRange(new DoubleRange(-1, 20)); String newTaskSpecString = theTaskSpecObject.toTaskSpec(); TaskSpec.checkTaskSpec(newTaskSpecString); return newTaskSpecString; }
public String env_message(String theMessage) { EnvironmentMessages theMessageObject; try { theMessageObject = EnvironmentMessageParser.parseMessage(theMessage); } catch (NotAnRLVizMessageException e) { System.err.println("Someone sent Octopus a message that wasn't RL-Viz compatible"); return "I only respond to RL-Viz messages!"; } if (theMessageObject.canHandleAutomatically(this)) { String theResponseString = theMessageObject.handleAutomatically(this); return theResponseString; } // If it wasn't handled automatically, maybe its a custom Mountain Car Message if (theMessageObject.getTheMessageType() == { String theCustomType = theMessageObject.getPayLoad(); AbstractResponse theResponse = null; if (theCustomType.equals("GETOCTSTATE")) { theResponse = new OctopusStateResponse(arm.getCompartments()); } if (theCustomType.equals("GETOCTCOREDATA")) { theResponse = new OctopusCoreDataResponse(targets, Constants.get().getSurfaceLevel()); } if (theResponse != null) { return theResponse.makeStringResponse(); } } System.err.println( "We need some code written in Env Message for Octopus.. unknown request received: " + theMessage); Thread.dumpStack(); return null; }