Beispiel #1
0
 protected void runEpisode(MetaNEATGenome genome) {
   RLGlue.RL_agent_message(genome.toPhenotype().toString());
   RLGlue.RL_init();
   RLGlue.RL_episode(steps);
   double totalReward = RLGlue.RL_return();
   genome.addFitness(rewardToFitness(totalReward));
   genome.incrEpisodes();
   RLGlue.RL_cleanup();
 }
  public static void main(String[] args) {

    Thread rlgluethread = new RLGlueThread();
    rlgluethread.start();

    EnvThread envthread = new EnvThread();
    envthread.start();

    AgentThread agentthread = new AgentThread();
    agentthread.start();

    int whichTrainingMDP = 0; // select the MDP to load
    for (int it = 0; it < 20; it++) {
      whichTrainingMDP = it;
      consoleTrainerHelper.loadTetris(whichTrainingMDP); // whichTrainingMDP should be in [0,19]
      RLGlue.RL_init();

      int stepsRemaining = 50000;
      int totalEpisodes = 0;
      double returnThisMDP = 0.0d;

      while (stepsRemaining > 0) {
        RLGlue.RL_episode(stepsRemaining);

        int thisStepCount = RLGlue.RL_num_steps();
        stepsRemaining -= thisStepCount;

        returnThisMDP += RLGlue.RL_return();
        totalEpisodes++;
      }
      System.out.println(
          "MDP "
              + it
              + " completed with "
              + totalEpisodes
              + " episodes, got "
              + returnThisMDP
              + " reward");

      // clean up the environment and end the program
      RLGlue.RL_cleanup();
    }
    System.exit(0);
  }
  public static void main(String[] args) {

    /* Log level*/
    Logger.getRootLogger().setLevel(Level.INFO);

    /** ************* Setup Agent ************* */
    /* create RLGlue Agent to get task specification (needed by the learners Q-function approximator) */
    RlGlueAgent rlGlueAgent = new RlGlueAgent();
    // create an agentLoader that will start the agent when its run method is called
    AgentLoader agentLoader = new AgentLoader(rlGlueAgent);
    // create thread so that the agent and environment can run asynchronously
    Thread agentThread = new Thread(agentLoader);
    // start the thread
    agentThread.start();

    String taskSpec = RLGlue.RL_init();
    System.out.println("Task-Specification: " + taskSpec);

    /**
     * ************************************ Configure Tile-Coding approximation
     * ************************************
     */
    // BEGIN TILE CODING APPROXIMATION
    // the number of tilings/layers
    int nTilings = 5;

    // the configuration. {from, to, number if discs}
    double[][] config =
        new double[][] {
          {0, 1.0, 5},
          {0, 1.0, 5},
        };

    // create square tilings
    Network net = new Network();
    net.setIsNormalized(true);
    double[][] optimizationConfig = config.clone();
    net.setFeatureGenerator(
        new GridHashFeatureGenerator(optimizationConfig, new TileAndIndexBoundingBoxCalculator()));
    net.add(TileCodingFactory.createTilings(config, nTilings));

    // setup Q-Function
    QFeatureFunction Q = new QFeatureFunction(net, rlGlueAgent.getTeachingboxActionSet());
    // END TILE-CODING

    /**
     * *************************************** setup policy, learner & the TB's agent
     * ***************************************
     */
    // the ActionSet for the policy is read from the rlGlueAgent (RL_init must have been called
    // before!)
    EpsilonGreedyPolicy pi = new EpsilonGreedyPolicy(Q, rlGlueAgent.getTeachingboxActionSet(), 0.1);
    System.out.println("POLICY-LEARNER ActionSet: " + rlGlueAgent.getTeachingboxActionSet());
    GradientDescentSarsaLearner learner =
        new GradientDescentSarsaLearner(Q, net, rlGlueAgent.getTeachingboxActionSet());
    learner.setAlpha(0.5);
    learner.setGamma(1.0);
    learner.setLambda(0.9);
    Agent tbAgent = new Agent(pi);
    tbAgent.addObserver(learner);

    /* SET THE TEACHINGBOX-AGENT IN THE RL-GLUE-AGENT-ADAPTER */
    rlGlueAgent.setTeachingBoxAgent(tbAgent);

    /**
     * ******************************* Setup Experiment and Plotting *******************************
     */
    RLGlueRemoteEnvironment rlEnv = new RLGlueRemoteEnvironment();
    Experiment experiment = new Experiment(tbAgent, rlEnv, 100, 1000);

    // 3D PLOTTING
    // draw the maximum value of the QFunction
    // to plot the corresponding VFunction we just have to pass in the policy
    // as well as the actionSet
    ValueFunctionEQ V = new ValueFunctionEQ(Q);
    V.costfunction = true;
    Plotter Vplotter =
        new ValueFunctionSurfacePlotter(V, "[0:0.02:1.0]", "[0:0.02:1.0]", "PuddleWorld");
    // use action runtime plotter, that calls the ValueFunctionPlotter every 10th episode
    Vplotter = new RuntimePlotter(Vplotter, RuntimePlotter.Mode.EPISODE, 10, net);
    // add the plotter as an observer to the experiment
    experiment.addObserver((RuntimePlotter) Vplotter);

    // RUN THE EXPERIMENT
    experiment.run();

    // cleanup rl-glue at the end
    RLGlue.RL_cleanup();

    System.exit(1);
  }
  public void runExperiment() {
    System.out.println("\n\nExperiment starting up!");
    String taskSpec = RLGlue.RL_init();
    System.out.println("RL_init called, the environment sent task spec: " + taskSpec);

    System.out.println("\n\n----------Sending some sample messages----------");

    /* Talk to the agent and environment a bit... */
    String responseMessage = RLGlue.RL_agent_message("what is your name?");
    System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage);

    responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Agent responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage
            + "\n");

    responseMessage = RLGlue.RL_env_message("what is your name?");
    System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage);
    responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Environment responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage);

    System.out.println("\n\n----------Running a few episodes----------");
    for (int i = 0; i < 1; i++) runEpisode(20000);

    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all! */
    // runEpisode(0);
    RLGlue.RL_cleanup();

    System.out.println("\n\n----------Stepping through an episode----------");

    // taskSpec = RLGlue.RL_init();
    // for(int i = 0; i < 2; i++) {
    // /*We could also start over and do another experiment */
    // /*We could run one step at a time instead of one episode at a time */
    // /*Start the episode */
    // Observation_action startResponse = RLGlue.RL_start();
    // int firstObservation = startResponse.o.intArray[0];
    // int firstAction = startResponse.a.intArray[0];
    // System.out.println("First observation and action were: " +
    // firstObservation + " and: " + firstAction);
    //
    // /*Run one step */
    // Reward_observation_action_terminal stepResponse = RLGlue.RL_step();
    //
    // /*Run until the episode ends*/
    // while (stepResponse.terminal != 1) {
    // stepResponse = RLGlue.RL_step();
    // if (stepResponse.terminal != 1) {
    // /*Could optionally print state,action pairs */
    // System.out.println(
    // ("(state,action)=(" +
    // stepResponse.o.intArray[0] +
    // "," +
    // stepResponse.a.intArray[0] + ")"));
    // }
    // }
    //
    // System.out.println("\n\n----------Summary----------");
    //
    // int totalSteps = RLGlue.RL_num_steps();
    // double totalReward = RLGlue.RL_return();
    // System.out.println("It ran for " + totalSteps +
    // " steps, total reward was: " + totalReward);
    // }
    // RLGlue.RL_cleanup();

  }