示例#1
0
 protected void runEpisode(MetaNEATGenome genome) {
   RLGlue.RL_agent_message(genome.toPhenotype().toString());
   RLGlue.RL_init();
   RLGlue.RL_episode(steps);
   double totalReward = RLGlue.RL_return();
   genome.addFitness(rewardToFitness(totalReward));
   genome.incrEpisodes();
   RLGlue.RL_cleanup();
 }
示例#2
0
 public void runExperiment() {
   //        logger.info("Experiment starts!");
   System.out.println("Experiment starts!");
   try {
     for (int i = 1; i <= runs; i++) {
       //	        	logger.info("Run " + i);
       System.out.println("Run " + i);
       // Create population
       // Create the Agent
       Class<?> neAlg = Class.forName(algorithm);
       Constructor<?> ctor = neAlg.getDeclaredConstructor(String.class);
       MetaNEATEvolvable population = (MetaNEATEvolvable) ctor.newInstance(agentParamsFile);
       MetaNEATGenome championOfChampions = null;
       double championOfChampionsFitness = -1000000;
       for (int gen = 1; gen <= generations; gen++) {
         MetaNEATGenome champion = null;
         double championFitness = -1000000;
         for (int pop = 0; pop < population.getPopulationSize(); pop++) {
           MetaNEATGenome genome = population.getGenome(pop);
           genome.setFitness(0);
           genome.setEpisodes(0);
         }
         if (enableLearning) {
           RLGlue.RL_agent_message("enable-learning:0.00001");
           RLGlue.RL_agent_message("enable-exploration:0.01");
         }
         // Evaluate the population
         for (int episode = 0; episode < episodes * population.getPopulationSize(); episode++) {
           // Select random genome to be evaluated
           //		        		int selection = Utils.rand.nextInt(population.getPopulationSize());
           int selection = (episode % population.getPopulationSize());
           MetaNEATGenome genome = population.getGenome(selection);
           runEpisode(genome);
           String weights = RLGlue.RL_agent_message("get-learned-weights");
           genome.message(weights);
         }
         double Dmean = 0;
         double Nmean = 0;
         double rhomean = 0;
         // Calculate fitness
         double[] populationPerformance = new double[population.getPopulationSize()];
         for (int pop = 0; pop < population.getPopulationSize(); pop++) {
           MetaNEATGenome genome = population.getGenome(pop);
           if (genome.getEpisodes() > 0) {
             genome.setFitness(genome.getFitness() / genome.getEpisodes());
           } else {
             genome.setFitness(0);
           }
           Dmean += ((NEARGenome) genome).getSparseness();
           Nmean += ((NEARGenome) genome).getNInternalUnits();
           rhomean += ((NEARGenome) genome).getSpectralRadius();
           populationPerformance[pop] = fitnessToReward(genome.getFitness());
           if (championFitness < populationPerformance[pop]) {
             championFitness = populationPerformance[pop];
             champion = genome.clone();
           }
         }
         System.out.println("Mean N: " + gen + " " + (Nmean / population.getPopulationSize()));
         System.out.println("Mean D: " + gen + " " + (Dmean / population.getPopulationSize()));
         System.out.println("Mean rho: " + gen + " " + (rhomean / population.getPopulationSize()));
         RLGlue.RL_agent_message("disable-learning");
         RLGlue.RL_agent_message("disable-exploration");
         double championGeneralizationPerf = generalizationPerformance(1000, champion);
         //	        		logger.info("Generation Champion: " + gen + " " +
         // StatUtils.max(populationPerformance));
         //	        		logger.info("Generalization Performance: " + gen + " " +
         // championGeneralizationPerf);
         //	        		logger.info("Avg Performance: " + gen + " " +
         // StatUtils.mean(populationPerformance));
         System.out.println(
             "Generation Champion: " + gen + " " + StatUtils.max(populationPerformance));
         System.out.println(
             "Generalization Performance: " + gen + " " + championGeneralizationPerf);
         System.out.println(
             "Avg Performance: " + gen + " " + StatUtils.mean(populationPerformance));
         System.out.println(champion);
         if (championOfChampionsFitness < championGeneralizationPerf) {
           championOfChampionsFitness = championGeneralizationPerf;
           championOfChampions = champion.clone();
         }
         population.evolveNextGeneration();
         //	        		logger.info("===*** " + gen + " ***===");
         System.out.println("===*** " + gen + " ***===");
       }
       RLGlue.RL_agent_message("disable-learning");
       RLGlue.RL_agent_message("disable-exploration");
       //	        	logger.info("Champion Of Generations Performace: " +
       // championOfChampionsFitness);
       System.out.println("Champion Of Generations Performace: " + championOfChampionsFitness);
       double cocGenPerf = generalizationPerformance(1000, championOfChampions);
       //	        	logger.info("Champion Of Generations Generalization Performace: " + cocGenPerf);
       //	        	logger.info(championOfChampions);
       System.out.println("Champion Of Generations Generalization Performace: " + cocGenPerf);
       System.out.print("ChampNet: ");
       System.out.println(championOfChampions);
     }
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
示例#3
0
  public void runExperiment() {
    System.out.println("\n\nExperiment starting up!");
    String taskSpec = RLGlue.RL_init();
    System.out.println("RL_init called, the environment sent task spec: " + taskSpec);

    System.out.println("\n\n----------Sending some sample messages----------");

    /* Talk to the agent and environment a bit... */
    String responseMessage = RLGlue.RL_agent_message("what is your name?");
    System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage);

    responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Agent responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage
            + "\n");

    responseMessage = RLGlue.RL_env_message("what is your name?");
    System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage);
    responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0");
    System.out.println(
        "Environment responded to \"If at first you don't succeed; call it version 1.0  \" with: "
            + responseMessage);

    System.out.println("\n\n----------Running a few episodes----------");
    for (int i = 0; i < 1; i++) runEpisode(20000);

    runEpisode(1);
    /* Remember that stepLimit of 0 means there is no limit at all! */
    // runEpisode(0);
    RLGlue.RL_cleanup();

    System.out.println("\n\n----------Stepping through an episode----------");

    // taskSpec = RLGlue.RL_init();
    // for(int i = 0; i < 2; i++) {
    // /*We could also start over and do another experiment */
    // /*We could run one step at a time instead of one episode at a time */
    // /*Start the episode */
    // Observation_action startResponse = RLGlue.RL_start();
    // int firstObservation = startResponse.o.intArray[0];
    // int firstAction = startResponse.a.intArray[0];
    // System.out.println("First observation and action were: " +
    // firstObservation + " and: " + firstAction);
    //
    // /*Run one step */
    // Reward_observation_action_terminal stepResponse = RLGlue.RL_step();
    //
    // /*Run until the episode ends*/
    // while (stepResponse.terminal != 1) {
    // stepResponse = RLGlue.RL_step();
    // if (stepResponse.terminal != 1) {
    // /*Could optionally print state,action pairs */
    // System.out.println(
    // ("(state,action)=(" +
    // stepResponse.o.intArray[0] +
    // "," +
    // stepResponse.a.intArray[0] + ")"));
    // }
    // }
    //
    // System.out.println("\n\n----------Summary----------");
    //
    // int totalSteps = RLGlue.RL_num_steps();
    // double totalReward = RLGlue.RL_return();
    // System.out.println("It ran for " + totalSteps +
    // " steps, total reward was: " + totalReward);
    // }
    // RLGlue.RL_cleanup();

  }