示例#1
0
 protected void runEpisode(MetaNEATGenome genome) {
   RLGlue.RL_agent_message(genome.toPhenotype().toString());
   RLGlue.RL_init();
   RLGlue.RL_episode(steps);
   double totalReward = RLGlue.RL_return();
   genome.addFitness(rewardToFitness(totalReward));
   genome.incrEpisodes();
   RLGlue.RL_cleanup();
 }
  public static void main(String[] args) {

    Thread rlgluethread = new RLGlueThread();
    rlgluethread.start();

    EnvThread envthread = new EnvThread();
    envthread.start();

    AgentThread agentthread = new AgentThread();
    agentthread.start();

    int whichTrainingMDP = 0; // select the MDP to load
    for (int it = 0; it < 20; it++) {
      whichTrainingMDP = it;
      consoleTrainerHelper.loadTetris(whichTrainingMDP); // whichTrainingMDP should be in [0,19]
      RLGlue.RL_init();

      int stepsRemaining = 50000;
      int totalEpisodes = 0;
      double returnThisMDP = 0.0d;

      while (stepsRemaining > 0) {
        RLGlue.RL_episode(stepsRemaining);

        int thisStepCount = RLGlue.RL_num_steps();
        stepsRemaining -= thisStepCount;

        returnThisMDP += RLGlue.RL_return();
        totalEpisodes++;
      }
      System.out.println(
          "MDP "
              + it
              + " completed with "
              + totalEpisodes
              + " episodes, got "
              + returnThisMDP
              + " reward");

      // clean up the environment and end the program
      RLGlue.RL_cleanup();
    }
    System.exit(0);
  }
示例#3
0
  /* Run One Episode of length maximum cutOff */
  private void runEpisode(int stepLimit) {
    int terminal = RLGlue.RL_episode(stepLimit);

    int totalSteps = RLGlue.RL_num_steps();
    double totalReward = RLGlue.RL_return();

    System.out.println(
        "Episode "
            + whichEpisode
            + "\t "
            + totalSteps
            + " steps \t"
            + totalReward
            + " total reward\t "
            + terminal
            + " natural end");

    whichEpisode++;
  }
示例#4
0
  public static synchronized TetrisWorldResponse Execute() {
    String theRequest =
        AbstractMessage.makeMessage(
            MessageUser.kEnv.id(),
            MessageUser.kBenchmark.id(),
            EnvMessageType.kEnvCustom.id(),
            MessageValueType.kString.id(),
            "GETTETRLAISWORLD");

    String responseMessage = RLGlue.RL_env_message(theRequest);

    TetrisWorldResponse theResponse;
    try {
      theResponse = new TetrisWorldResponse(responseMessage);
    } catch (NotAnRLVizMessageException ex) {
      System.out.println("Not a valid RL Viz Message in Tetrlais World Response" + ex);
      return null;
    }

    return theResponse;
  }
示例#5
0
 public void runExperiment() {
   //        logger.info("Experiment starts!");
   System.out.println("Experiment starts!");
   try {
     for (int i = 1; i <= runs; i++) {
       //	        	logger.info("Run " + i);
       System.out.println("Run " + i);
       // Create population
       // Create the Agent
       Class<?> neAlg = Class.forName(algorithm);
       Constructor<?> ctor = neAlg.getDeclaredConstructor(String.class);
       MetaNEATEvolvable population = (MetaNEATEvolvable) ctor.newInstance(agentParamsFile);
       MetaNEATGenome championOfChampions = null;
       double championOfChampionsFitness = -1000000;
       for (int gen = 1; gen <= generations; gen++) {
         MetaNEATGenome champion = null;
         double championFitness = -1000000;
         for (int pop = 0; pop < population.getPopulationSize(); pop++) {
           MetaNEATGenome genome = population.getGenome(pop);
           genome.setFitness(0);
           genome.setEpisodes(0);
         }
         if (enableLearning) {
           RLGlue.RL_agent_message("enable-learning:0.00001");
           RLGlue.RL_agent_message("enable-exploration:0.01");
         }
         // Evaluate the population
         for (int episode = 0; episode < episodes * population.getPopulationSize(); episode++) {
           // Select random genome to be evaluated
           //		        		int selection = Utils.rand.nextInt(population.getPopulationSize());
           int selection = (episode % population.getPopulationSize());
           MetaNEATGenome genome = population.getGenome(selection);
           runEpisode(genome);
           String weights = RLGlue.RL_agent_message("get-learned-weights");
           genome.message(weights);
         }
         double Dmean = 0;
         double Nmean = 0;
         double rhomean = 0;
         // Calculate fitness
         double[] populationPerformance = new double[population.getPopulationSize()];
         for (int pop = 0; pop < population.getPopulationSize(); pop++) {
           MetaNEATGenome genome = population.getGenome(pop);
           if (genome.getEpisodes() > 0) {
             genome.setFitness(genome.getFitness() / genome.getEpisodes());
           } else {
             genome.setFitness(0);
           }
           Dmean += ((NEARGenome) genome).getSparseness();
           Nmean += ((NEARGenome) genome).getNInternalUnits();
           rhomean += ((NEARGenome) genome).getSpectralRadius();
           populationPerformance[pop] = fitnessToReward(genome.getFitness());
           if (championFitness < populationPerformance[pop]) {
             championFitness = populationPerformance[pop];
             champion = genome.clone();
           }
         }
         System.out.println("Mean N: " + gen + " " + (Nmean / population.getPopulationSize()));
         System.out.println("Mean D: " + gen + " " + (Dmean / population.getPopulationSize()));
         System.out.println("Mean rho: " + gen + " " + (rhomean / population.getPopulationSize()));
         RLGlue.RL_agent_message("disable-learning");
         RLGlue.RL_agent_message("disable-exploration");
         double championGeneralizationPerf = generalizationPerformance(1000, champion);
         //	        		logger.info("Generation Champion: " + gen + " " +
         // StatUtils.max(populationPerformance));
         //	        		logger.info("Generalization Performance: " + gen + " " +
         // championGeneralizationPerf);
         //	        		logger.info("Avg Performance: " + gen + " " +
         // StatUtils.mean(populationPerformance));
         System.out.println(
             "Generation Champion: " + gen + " " + StatUtils.max(populationPerformance));
         System.out.println(
             "Generalization Performance: " + gen + " " + championGeneralizationPerf);
         System.out.println(
             "Avg Performance: " + gen + " " + StatUtils.mean(populationPerformance));
         System.out.println(champion);
         if (championOfChampionsFitness < championGeneralizationPerf) {
           championOfChampionsFitness = championGeneralizationPerf;
           championOfChampions = champion.clone();
         }
         population.evolveNextGeneration();
         //	        		logger.info("===*** " + gen + " ***===");
         System.out.println("===*** " + gen + " ***===");
       }
       RLGlue.RL_agent_message("disable-learning");
       RLGlue.RL_agent_message("disable-exploration");
       //	        	logger.info("Champion Of Generations Performace: " +
       // championOfChampionsFitness);
       System.out.println("Champion Of Generations Performace: " + championOfChampionsFitness);
       double cocGenPerf = generalizationPerformance(1000, championOfChampions);
       //	        	logger.info("Champion Of Generations Generalization Performace: " + cocGenPerf);
       //	        	logger.info(championOfChampions);
       System.out.println("Champion Of Generations Generalization Performace: " + cocGenPerf);
       System.out.print("ChampNet: ");
       System.out.println(championOfChampions);
     }
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
  public static void main(String[] args) {

    /* Log level*/
    Logger.getRootLogger().setLevel(Level.INFO);

    /** ************* Setup Agent ************* */
    /* create RLGlue Agent to get task specification (needed by the learners Q-function approximator) */
    RlGlueAgent rlGlueAgent = new RlGlueAgent();
    // create an agentLoader that will start the agent when its run method is called
    AgentLoader agentLoader = new AgentLoader(rlGlueAgent);
    // create thread so that the agent and environment can run asynchronously
    Thread agentThread = new Thread(agentLoader);
    // start the thread
    agentThread.start();

    String taskSpec = RLGlue.RL_init();
    System.out.println("Task-Specification: " + taskSpec);

    /**
     * ************************************ Configure Tile-Coding approximation
     * ************************************
     */
    // BEGIN TILE CODING APPROXIMATION
    // the number of tilings/layers
    int nTilings = 5;

    // the configuration. {from, to, number if discs}
    double[][] config =
        new double[][] {
          {0, 1.0, 5},
          {0, 1.0, 5},
        };

    // create square tilings
    Network net = new Network();
    net.setIsNormalized(true);
    double[][] optimizationConfig = config.clone();
    net.setFeatureGenerator(
        new GridHashFeatureGenerator(optimizationConfig, new TileAndIndexBoundingBoxCalculator()));
    net.add(TileCodingFactory.createTilings(config, nTilings));

    // setup Q-Function
    QFeatureFunction Q = new QFeatureFunction(net, rlGlueAgent.getTeachingboxActionSet());
    // END TILE-CODING

    /**
     * *************************************** setup policy, learner & the TB's agent
     * ***************************************
     */
    // the ActionSet for the policy is read from the rlGlueAgent (RL_init must have been called
    // before!)
    EpsilonGreedyPolicy pi = new EpsilonGreedyPolicy(Q, rlGlueAgent.getTeachingboxActionSet(), 0.1);
    System.out.println("POLICY-LEARNER ActionSet: " + rlGlueAgent.getTeachingboxActionSet());
    GradientDescentSarsaLearner learner =
        new GradientDescentSarsaLearner(Q, net, rlGlueAgent.getTeachingboxActionSet());
    learner.setAlpha(0.5);
    learner.setGamma(1.0);
    learner.setLambda(0.9);
    Agent tbAgent = new Agent(pi);
    tbAgent.addObserver(learner);

    /* SET THE TEACHINGBOX-AGENT IN THE RL-GLUE-AGENT-ADAPTER */
    rlGlueAgent.setTeachingBoxAgent(tbAgent);

    /**
     * ******************************* Setup Experiment and Plotting *******************************
     */
    RLGlueRemoteEnvironment rlEnv = new RLGlueRemoteEnvironment();
    Experiment experiment = new Experiment(tbAgent, rlEnv, 100, 1000);

    // 3D PLOTTING
    // draw the maximum value of the QFunction
    // to plot the corresponding VFunction we just have to pass in the policy
    // as well as the actionSet
    ValueFunctionEQ V = new ValueFunctionEQ(Q);
    V.costfunction = true;
    Plotter Vplotter =
        new ValueFunctionSurfacePlotter(V, "[0:0.02:1.0]", "[0:0.02:1.0]", "PuddleWorld");
    // use action runtime plotter, that calls the ValueFunctionPlotter every 10th episode
    Vplotter = new RuntimePlotter(Vplotter, RuntimePlotter.Mode.EPISODE, 10, net);
    // add the plotter as an observer to the experiment
    experiment.addObserver((RuntimePlotter) Vplotter);

    // RUN THE EXPERIMENT
    experiment.run();

    // cleanup rl-glue at the end
    RLGlue.RL_cleanup();

    System.exit(1);
  }
示例#7
0
  public void runExperiment() {
    System.out.println("\n\nExperiment starting up!");
    String taskSpec = RLGlue.RL_init();
    System.out.println("RL_init called, the environment sent task spec: " + taskSpec);

    System.out.println("\n\n----------Sending some sample messages----------");

    /* Talk to the agent and environment a bit... */
    String responseMessage = RLGlue.RL_agent_message("what is your name?");
    System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage);

    responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Agent responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage
            + "\n");

    responseMessage = RLGlue.RL_env_message("what is your name?");
    System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage);
    responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Environment responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage);

    System.out.println("\n\n----------Running a few episodes----------");
    for (int i = 0; i < 1; i++) runEpisode(20000);

    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all! */
    // runEpisode(0);
    RLGlue.RL_cleanup();

    System.out.println("\n\n----------Stepping through an episode----------");

    // taskSpec = RLGlue.RL_init();
    // for(int i = 0; i < 2; i++) {
    // /*We could also start over and do another experiment */
    // /*We could run one step at a time instead of one episode at a time */
    // /*Start the episode */
    // Observation_action startResponse = RLGlue.RL_start();
    // int firstObservation = startResponse.o.intArray[0];
    // int firstAction = startResponse.a.intArray[0];
    // System.out.println("First observation and action were: " +
    // firstObservation + " and: " + firstAction);
    //
    // /*Run one step */
    // Reward_observation_action_terminal stepResponse = RLGlue.RL_step();
    //
    // /*Run until the episode ends*/
    // while (stepResponse.terminal != 1) {
    // stepResponse = RLGlue.RL_step();
    // if (stepResponse.terminal != 1) {
    // /*Could optionally print state,action pairs */
    // System.out.println(
    // ("(state,action)=(" +
    // stepResponse.o.intArray[0] +
    // "," +
    // stepResponse.a.intArray[0] + ")"));
    // }
    // }
    //
    // System.out.println("\n\n----------Summary----------");
    //
    // int totalSteps = RLGlue.RL_num_steps();
    // double totalReward = RLGlue.RL_return();
    // System.out.println("It ran for " + totalSteps +
    // " steps, total reward was: " + totalReward);
    // }
    // RLGlue.RL_cleanup();

  }