protected void runEpisode(MetaNEATGenome genome) { RLGlue.RL_agent_message(genome.toPhenotype().toString()); RLGlue.RL_init(); RLGlue.RL_episode(steps); double totalReward = RLGlue.RL_return(); genome.addFitness(rewardToFitness(totalReward)); genome.incrEpisodes(); RLGlue.RL_cleanup(); }
public static void main(String[] args) { Thread rlgluethread = new RLGlueThread(); rlgluethread.start(); EnvThread envthread = new EnvThread(); envthread.start(); AgentThread agentthread = new AgentThread(); agentthread.start(); int whichTrainingMDP = 0; // select the MDP to load for (int it = 0; it < 20; it++) { whichTrainingMDP = it; consoleTrainerHelper.loadTetris(whichTrainingMDP); // whichTrainingMDP should be in [0,19] RLGlue.RL_init(); int stepsRemaining = 50000; int totalEpisodes = 0; double returnThisMDP = 0.0d; while (stepsRemaining > 0) { RLGlue.RL_episode(stepsRemaining); int thisStepCount = RLGlue.RL_num_steps(); stepsRemaining -= thisStepCount; returnThisMDP += RLGlue.RL_return(); totalEpisodes++; } System.out.println( "MDP " + it + " completed with " + totalEpisodes + " episodes, got " + returnThisMDP + " reward"); // clean up the environment and end the program RLGlue.RL_cleanup(); } System.exit(0); }
/* Run One Episode of length maximum cutOff */ private void runEpisode(int stepLimit) { int terminal = RLGlue.RL_episode(stepLimit); int totalSteps = RLGlue.RL_num_steps(); double totalReward = RLGlue.RL_return(); System.out.println( "Episode " + whichEpisode + "\t " + totalSteps + " steps \t" + totalReward + " total reward\t " + terminal + " natural end"); whichEpisode++; }
public static synchronized TetrisWorldResponse Execute() { String theRequest = AbstractMessage.makeMessage( MessageUser.kEnv.id(), MessageUser.kBenchmark.id(), EnvMessageType.kEnvCustom.id(), MessageValueType.kString.id(), "GETTETRLAISWORLD"); String responseMessage = RLGlue.RL_env_message(theRequest); TetrisWorldResponse theResponse; try { theResponse = new TetrisWorldResponse(responseMessage); } catch (NotAnRLVizMessageException ex) { System.out.println("Not a valid RL Viz Message in Tetrlais World Response" + ex); return null; } return theResponse; }
public void runExperiment() { // logger.info("Experiment starts!"); System.out.println("Experiment starts!"); try { for (int i = 1; i <= runs; i++) { // logger.info("Run " + i); System.out.println("Run " + i); // Create population // Create the Agent Class<?> neAlg = Class.forName(algorithm); Constructor<?> ctor = neAlg.getDeclaredConstructor(String.class); MetaNEATEvolvable population = (MetaNEATEvolvable) ctor.newInstance(agentParamsFile); MetaNEATGenome championOfChampions = null; double championOfChampionsFitness = -1000000; for (int gen = 1; gen <= generations; gen++) { MetaNEATGenome champion = null; double championFitness = -1000000; for (int pop = 0; pop < population.getPopulationSize(); pop++) { MetaNEATGenome genome = population.getGenome(pop); genome.setFitness(0); genome.setEpisodes(0); } if (enableLearning) { RLGlue.RL_agent_message("enable-learning:0.00001"); RLGlue.RL_agent_message("enable-exploration:0.01"); } // Evaluate the population for (int episode = 0; episode < episodes * population.getPopulationSize(); episode++) { // Select random genome to be evaluated // int selection = Utils.rand.nextInt(population.getPopulationSize()); int selection = (episode % population.getPopulationSize()); MetaNEATGenome genome = population.getGenome(selection); runEpisode(genome); String weights = RLGlue.RL_agent_message("get-learned-weights"); genome.message(weights); } double Dmean = 0; double Nmean = 0; double rhomean = 0; // Calculate fitness double[] populationPerformance = new double[population.getPopulationSize()]; for (int pop = 0; pop < population.getPopulationSize(); pop++) { MetaNEATGenome genome = population.getGenome(pop); if (genome.getEpisodes() > 0) { genome.setFitness(genome.getFitness() / genome.getEpisodes()); } else { genome.setFitness(0); } Dmean += ((NEARGenome) genome).getSparseness(); Nmean += ((NEARGenome) genome).getNInternalUnits(); rhomean += ((NEARGenome) genome).getSpectralRadius(); populationPerformance[pop] = fitnessToReward(genome.getFitness()); if (championFitness < populationPerformance[pop]) { championFitness = populationPerformance[pop]; champion = genome.clone(); } } System.out.println("Mean N: " + gen + " " + (Nmean / population.getPopulationSize())); System.out.println("Mean D: " + gen + " " + (Dmean / population.getPopulationSize())); System.out.println("Mean rho: " + gen + " " + (rhomean / population.getPopulationSize())); RLGlue.RL_agent_message("disable-learning"); RLGlue.RL_agent_message("disable-exploration"); double championGeneralizationPerf = generalizationPerformance(1000, champion); // logger.info("Generation Champion: " + gen + " " + // StatUtils.max(populationPerformance)); // logger.info("Generalization Performance: " + gen + " " + // championGeneralizationPerf); // logger.info("Avg Performance: " + gen + " " + // StatUtils.mean(populationPerformance)); System.out.println( "Generation Champion: " + gen + " " + StatUtils.max(populationPerformance)); System.out.println( "Generalization Performance: " + gen + " " + championGeneralizationPerf); System.out.println( "Avg Performance: " + gen + " " + StatUtils.mean(populationPerformance)); System.out.println(champion); if (championOfChampionsFitness < championGeneralizationPerf) { championOfChampionsFitness = championGeneralizationPerf; championOfChampions = champion.clone(); } population.evolveNextGeneration(); // logger.info("===*** " + gen + " ***==="); System.out.println("===*** " + gen + " ***==="); } RLGlue.RL_agent_message("disable-learning"); RLGlue.RL_agent_message("disable-exploration"); // logger.info("Champion Of Generations Performace: " + // championOfChampionsFitness); System.out.println("Champion Of Generations Performace: " + championOfChampionsFitness); double cocGenPerf = generalizationPerformance(1000, championOfChampions); // logger.info("Champion Of Generations Generalization Performace: " + cocGenPerf); // logger.info(championOfChampions); System.out.println("Champion Of Generations Generalization Performace: " + cocGenPerf); System.out.print("ChampNet: "); System.out.println(championOfChampions); } } catch (Exception e) { e.printStackTrace(); } }
public static void main(String[] args) { /* Log level*/ Logger.getRootLogger().setLevel(Level.INFO); /** ************* Setup Agent ************* */ /* create RLGlue Agent to get task specification (needed by the learners Q-function approximator) */ RlGlueAgent rlGlueAgent = new RlGlueAgent(); // create an agentLoader that will start the agent when its run method is called AgentLoader agentLoader = new AgentLoader(rlGlueAgent); // create thread so that the agent and environment can run asynchronously Thread agentThread = new Thread(agentLoader); // start the thread agentThread.start(); String taskSpec = RLGlue.RL_init(); System.out.println("Task-Specification: " + taskSpec); /** * ************************************ Configure Tile-Coding approximation * ************************************ */ // BEGIN TILE CODING APPROXIMATION // the number of tilings/layers int nTilings = 5; // the configuration. {from, to, number if discs} double[][] config = new double[][] { {0, 1.0, 5}, {0, 1.0, 5}, }; // create square tilings Network net = new Network(); net.setIsNormalized(true); double[][] optimizationConfig = config.clone(); net.setFeatureGenerator( new GridHashFeatureGenerator(optimizationConfig, new TileAndIndexBoundingBoxCalculator())); net.add(TileCodingFactory.createTilings(config, nTilings)); // setup Q-Function QFeatureFunction Q = new QFeatureFunction(net, rlGlueAgent.getTeachingboxActionSet()); // END TILE-CODING /** * *************************************** setup policy, learner & the TB's agent * *************************************** */ // the ActionSet for the policy is read from the rlGlueAgent (RL_init must have been called // before!) EpsilonGreedyPolicy pi = new EpsilonGreedyPolicy(Q, rlGlueAgent.getTeachingboxActionSet(), 0.1); System.out.println("POLICY-LEARNER ActionSet: " + rlGlueAgent.getTeachingboxActionSet()); GradientDescentSarsaLearner learner = new GradientDescentSarsaLearner(Q, net, rlGlueAgent.getTeachingboxActionSet()); learner.setAlpha(0.5); learner.setGamma(1.0); learner.setLambda(0.9); Agent tbAgent = new Agent(pi); tbAgent.addObserver(learner); /* SET THE TEACHINGBOX-AGENT IN THE RL-GLUE-AGENT-ADAPTER */ rlGlueAgent.setTeachingBoxAgent(tbAgent); /** * ******************************* Setup Experiment and Plotting ******************************* */ RLGlueRemoteEnvironment rlEnv = new RLGlueRemoteEnvironment(); Experiment experiment = new Experiment(tbAgent, rlEnv, 100, 1000); // 3D PLOTTING // draw the maximum value of the QFunction // to plot the corresponding VFunction we just have to pass in the policy // as well as the actionSet ValueFunctionEQ V = new ValueFunctionEQ(Q); V.costfunction = true; Plotter Vplotter = new ValueFunctionSurfacePlotter(V, "[0:0.02:1.0]", "[0:0.02:1.0]", "PuddleWorld"); // use action runtime plotter, that calls the ValueFunctionPlotter every 10th episode Vplotter = new RuntimePlotter(Vplotter, RuntimePlotter.Mode.EPISODE, 10, net); // add the plotter as an observer to the experiment experiment.addObserver((RuntimePlotter) Vplotter); // RUN THE EXPERIMENT experiment.run(); // cleanup rl-glue at the end RLGlue.RL_cleanup(); System.exit(1); }
public void runExperiment() { System.out.println("\n\nExperiment starting up!"); String taskSpec = RLGlue.RL_init(); System.out.println("RL_init called, the environment sent task spec: " + taskSpec); System.out.println("\n\n----------Sending some sample messages----------"); /* Talk to the agent and environment a bit... */ String responseMessage = RLGlue.RL_agent_message("what is your name?"); System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Agent responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage + "\n"); responseMessage = RLGlue.RL_env_message("what is your name?"); System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Environment responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage); System.out.println("\n\n----------Running a few episodes----------"); for (int i = 0; i < 1; i++) runEpisode(20000); runEpisode(1); /* Remember that stepLimit of 0 means there is no limit at all! */ // runEpisode(0); RLGlue.RL_cleanup(); System.out.println("\n\n----------Stepping through an episode----------"); // taskSpec = RLGlue.RL_init(); // for(int i = 0; i < 2; i++) { // /*We could also start over and do another experiment */ // /*We could run one step at a time instead of one episode at a time */ // /*Start the episode */ // Observation_action startResponse = RLGlue.RL_start(); // int firstObservation = startResponse.o.intArray[0]; // int firstAction = startResponse.a.intArray[0]; // System.out.println("First observation and action were: " + // firstObservation + " and: " + firstAction); // // /*Run one step */ // Reward_observation_action_terminal stepResponse = RLGlue.RL_step(); // // /*Run until the episode ends*/ // while (stepResponse.terminal != 1) { // stepResponse = RLGlue.RL_step(); // if (stepResponse.terminal != 1) { // /*Could optionally print state,action pairs */ // System.out.println( // ("(state,action)=(" + // stepResponse.o.intArray[0] + // "," + // stepResponse.a.intArray[0] + ")")); // } // } // // System.out.println("\n\n----------Summary----------"); // // int totalSteps = RLGlue.RL_num_steps(); // double totalReward = RLGlue.RL_return(); // System.out.println("It ran for " + totalSteps + // " steps, total reward was: " + totalReward); // } // RLGlue.RL_cleanup(); }