protected void runEpisode(MetaNEATGenome genome) { RLGlue.RL_agent_message(genome.toPhenotype().toString()); RLGlue.RL_init(); RLGlue.RL_episode(steps); double totalReward = RLGlue.RL_return(); genome.addFitness(rewardToFitness(totalReward)); genome.incrEpisodes(); RLGlue.RL_cleanup(); }
public void runExperiment() { // logger.info("Experiment starts!"); System.out.println("Experiment starts!"); try { for (int i = 1; i <= runs; i++) { // logger.info("Run " + i); System.out.println("Run " + i); // Create population // Create the Agent Class<?> neAlg = Class.forName(algorithm); Constructor<?> ctor = neAlg.getDeclaredConstructor(String.class); MetaNEATEvolvable population = (MetaNEATEvolvable) ctor.newInstance(agentParamsFile); MetaNEATGenome championOfChampions = null; double championOfChampionsFitness = -1000000; for (int gen = 1; gen <= generations; gen++) { MetaNEATGenome champion = null; double championFitness = -1000000; for (int pop = 0; pop < population.getPopulationSize(); pop++) { MetaNEATGenome genome = population.getGenome(pop); genome.setFitness(0); genome.setEpisodes(0); } if (enableLearning) { RLGlue.RL_agent_message("enable-learning:0.00001"); RLGlue.RL_agent_message("enable-exploration:0.01"); } // Evaluate the population for (int episode = 0; episode < episodes * population.getPopulationSize(); episode++) { // Select random genome to be evaluated // int selection = Utils.rand.nextInt(population.getPopulationSize()); int selection = (episode % population.getPopulationSize()); MetaNEATGenome genome = population.getGenome(selection); runEpisode(genome); String weights = RLGlue.RL_agent_message("get-learned-weights"); genome.message(weights); } double Dmean = 0; double Nmean = 0; double rhomean = 0; // Calculate fitness double[] populationPerformance = new double[population.getPopulationSize()]; for (int pop = 0; pop < population.getPopulationSize(); pop++) { MetaNEATGenome genome = population.getGenome(pop); if (genome.getEpisodes() > 0) { genome.setFitness(genome.getFitness() / genome.getEpisodes()); } else { genome.setFitness(0); } Dmean += ((NEARGenome) genome).getSparseness(); Nmean += ((NEARGenome) genome).getNInternalUnits(); rhomean += ((NEARGenome) genome).getSpectralRadius(); populationPerformance[pop] = fitnessToReward(genome.getFitness()); if (championFitness < populationPerformance[pop]) { championFitness = populationPerformance[pop]; champion = genome.clone(); } } System.out.println("Mean N: " + gen + " " + (Nmean / population.getPopulationSize())); System.out.println("Mean D: " + gen + " " + (Dmean / population.getPopulationSize())); System.out.println("Mean rho: " + gen + " " + (rhomean / population.getPopulationSize())); RLGlue.RL_agent_message("disable-learning"); RLGlue.RL_agent_message("disable-exploration"); double championGeneralizationPerf = generalizationPerformance(1000, champion); // logger.info("Generation Champion: " + gen + " " + // StatUtils.max(populationPerformance)); // logger.info("Generalization Performance: " + gen + " " + // championGeneralizationPerf); // logger.info("Avg Performance: " + gen + " " + // StatUtils.mean(populationPerformance)); System.out.println( "Generation Champion: " + gen + " " + StatUtils.max(populationPerformance)); System.out.println( "Generalization Performance: " + gen + " " + championGeneralizationPerf); System.out.println( "Avg Performance: " + gen + " " + StatUtils.mean(populationPerformance)); System.out.println(champion); if (championOfChampionsFitness < championGeneralizationPerf) { championOfChampionsFitness = championGeneralizationPerf; championOfChampions = champion.clone(); } population.evolveNextGeneration(); // logger.info("===*** " + gen + " ***==="); System.out.println("===*** " + gen + " ***==="); } RLGlue.RL_agent_message("disable-learning"); RLGlue.RL_agent_message("disable-exploration"); // logger.info("Champion Of Generations Performace: " + // championOfChampionsFitness); System.out.println("Champion Of Generations Performace: " + championOfChampionsFitness); double cocGenPerf = generalizationPerformance(1000, championOfChampions); // logger.info("Champion Of Generations Generalization Performace: " + cocGenPerf); // logger.info(championOfChampions); System.out.println("Champion Of Generations Generalization Performace: " + cocGenPerf); System.out.print("ChampNet: "); System.out.println(championOfChampions); } } catch (Exception e) { e.printStackTrace(); } }
public void runExperiment() { System.out.println("\n\nExperiment starting up!"); String taskSpec = RLGlue.RL_init(); System.out.println("RL_init called, the environment sent task spec: " + taskSpec); System.out.println("\n\n----------Sending some sample messages----------"); /* Talk to the agent and environment a bit... */ String responseMessage = RLGlue.RL_agent_message("what is your name?"); System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Agent responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage + "\n"); responseMessage = RLGlue.RL_env_message("what is your name?"); System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Environment responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage); System.out.println("\n\n----------Running a few episodes----------"); for (int i = 0; i < 1; i++) runEpisode(20000); runEpisode(1); /* Remember that stepLimit of 0 means there is no limit at all! */ // runEpisode(0); RLGlue.RL_cleanup(); System.out.println("\n\n----------Stepping through an episode----------"); // taskSpec = RLGlue.RL_init(); // for(int i = 0; i < 2; i++) { // /*We could also start over and do another experiment */ // /*We could run one step at a time instead of one episode at a time */ // /*Start the episode */ // Observation_action startResponse = RLGlue.RL_start(); // int firstObservation = startResponse.o.intArray[0]; // int firstAction = startResponse.a.intArray[0]; // System.out.println("First observation and action were: " + // firstObservation + " and: " + firstAction); // // /*Run one step */ // Reward_observation_action_terminal stepResponse = RLGlue.RL_step(); // // /*Run until the episode ends*/ // while (stepResponse.terminal != 1) { // stepResponse = RLGlue.RL_step(); // if (stepResponse.terminal != 1) { // /*Could optionally print state,action pairs */ // System.out.println( // ("(state,action)=(" + // stepResponse.o.intArray[0] + // "," + // stepResponse.a.intArray[0] + ")")); // } // } // // System.out.println("\n\n----------Summary----------"); // // int totalSteps = RLGlue.RL_num_steps(); // double totalReward = RLGlue.RL_return(); // System.out.println("It ran for " + totalSteps + // " steps, total reward was: " + totalReward); // } // RLGlue.RL_cleanup(); }