예제 #1
0
  public Reward_observation_terminal env_step(Action theAction) {
    // Bail if we're terminal
    boolean terminal = taskTracker.isTerminal();
    if (terminal) {
      currentState = envSimulator.getODEState();
      return makeRewardObservation(0.0, terminal);
    }

    // BT: This code is expecting a double array called action, I'll give it to them
    double[] action = theAction.doubleArray;
    List<Compartment> compartments = arm.getCompartments();
    /* guard against oversized or undersized action arrays */

    if (useDiscreteActions) {
      int theDiscreteAction = theAction.intArray[0];
      action = new double[2 + compartments.size() * 3];
      handleIntAction(theDiscreteAction, compartments.size(), action);
    }
    double actionSum = 0.0d;
    for (int i = 0; i < action.length; i++) {
      actionSum += action[i];
    }
    ArmBase base = arm.getBase();
    base.setAction(action[0], action[1]);

    for (int i = 0; i < compartments.size(); i++) {
      compartments
          .get(i)
          .setAction(action[2 + 3 * i], action[2 + 3 * i + 1], action[2 + 3 * i + 2]);
    }

    ODEState odeState = envSimulator.getODEState();
    double timeStep = .2;
    odeState = solver.solve(envSimulator.asEquation(), odeState, 0, 5, timeStep);
    envSimulator.setODEState(odeState);
    currentState = odeState;

    taskTracker.update();

    terminal = taskTracker.isTerminal();
    for (EnvironmentObserver o : observers) {
      o.stateChanged(taskTracker.getReward());
      if (terminal) {
        o.episodeFinished();
      }
    }
    double reward = taskTracker.getReward() - actionSum / (double) action.length;
    // want to add a small penality for actions;

    return makeRewardObservation(reward, terminal);
  }
예제 #2
0
 public TaskDescription getTaskSpec() {
   int numCompartments = arm.getCompartments().size();
   int numFood = allFood.size();
   int numStateVars = 2 + 4 * (2 * numCompartments + numFood);
   int numActionVars = 3 * numCompartments;
   return new TaskDescription(numStateVars, numActionVars);
 }
예제 #3
0
  private String makeTaskSpec() {
    double minXPos = -12.0d;
    double maxXPos = 12.0d;

    double minYPos = -12.0d;
    double maxYPos = 6.0d;

    double minVel = -1.0d;
    double maxVel = 1.0d;
    double minAction = 0.0d;
    double maxAction = 1.0d;

    TaskDescription ts = getTaskSpec();

    TaskSpecVRLGLUE3 theTaskSpecObject = new TaskSpecVRLGLUE3();
    theTaskSpecObject.setEpisodic();
    theTaskSpecObject.setDiscountFactor(1.0d);
    // Angle of the arm
    theTaskSpecObject.addContinuousObservation(new DoubleRange(-4.0d * Math.PI, 4.0d * Math.PI));
    // Angular Velocity
    theTaskSpecObject.addContinuousObservation(new DoubleRange(-1.0d, 1.0d));
    // Now we want to add the variable for all the compartments
    for (int i = 0; i < 4 * (2 * arm.getCompartments().size() + allFood.size()); i++) {
      if (i % 4 == 0) {
        theTaskSpecObject.addContinuousObservation(new DoubleRange(minXPos, maxXPos));
      }
      if (i % 4 == 1) {
        theTaskSpecObject.addContinuousObservation(new DoubleRange(minYPos, maxYPos));
      }
      if (i % 4 == 2 || i % 4 == 3) {
        theTaskSpecObject.addContinuousObservation(new DoubleRange(minVel, maxVel));
      }
    }
    int NDA = 8;

    if (useDiscreteActions) {
      theTaskSpecObject.addDiscreteAction(new IntRange(0, NDA - 1));
    } else {
      theTaskSpecObject.addContinuousAction(
          (new DoubleRange(minAction, maxAction, ts.getNumActionVariables())));
    }
    theTaskSpecObject.setRewardRange(new DoubleRange(-1, 20));

    String newTaskSpecString = theTaskSpecObject.toTaskSpec();
    TaskSpec.checkTaskSpec(newTaskSpecString);

    return newTaskSpecString;
  }
예제 #4
0
  public String env_message(String theMessage) {
    EnvironmentMessages theMessageObject;
    try {
      theMessageObject = EnvironmentMessageParser.parseMessage(theMessage);
    } catch (NotAnRLVizMessageException e) {
      System.err.println("Someone sent Octopus a message that wasn't RL-Viz compatible");
      return "I only respond to RL-Viz messages!";
    }

    if (theMessageObject.canHandleAutomatically(this)) {
      String theResponseString = theMessageObject.handleAutomatically(this);
      return theResponseString;
    }

    //		If it wasn't handled automatically, maybe its a custom Mountain Car Message
    if (theMessageObject.getTheMessageType()
        == rlVizLib.messaging.environment.EnvMessageType.kEnvCustom.id()) {

      String theCustomType = theMessageObject.getPayLoad();
      AbstractResponse theResponse = null;
      if (theCustomType.equals("GETOCTSTATE")) {
        theResponse = new OctopusStateResponse(arm.getCompartments());
      }
      if (theCustomType.equals("GETOCTCOREDATA")) {
        theResponse = new OctopusCoreDataResponse(targets, Constants.get().getSurfaceLevel());
      }
      if (theResponse != null) {
        return theResponse.makeStringResponse();
      }
    }
    System.err.println(
        "We need some code written in Env Message for Octopus.. unknown request received: "
            + theMessage);
    Thread.dumpStack();
    return null;
  }