protected void runEpisode(MetaNEATGenome genome) { RLGlue.RL_agent_message(genome.toPhenotype().toString()); RLGlue.RL_init(); RLGlue.RL_episode(steps); double totalReward = RLGlue.RL_return(); genome.addFitness(rewardToFitness(totalReward)); genome.incrEpisodes(); RLGlue.RL_cleanup(); }
public static void main(String[] args) { Thread rlgluethread = new RLGlueThread(); rlgluethread.start(); EnvThread envthread = new EnvThread(); envthread.start(); AgentThread agentthread = new AgentThread(); agentthread.start(); int whichTrainingMDP = 0; // select the MDP to load for (int it = 0; it < 20; it++) { whichTrainingMDP = it; consoleTrainerHelper.loadTetris(whichTrainingMDP); // whichTrainingMDP should be in [0,19] RLGlue.RL_init(); int stepsRemaining = 50000; int totalEpisodes = 0; double returnThisMDP = 0.0d; while (stepsRemaining > 0) { RLGlue.RL_episode(stepsRemaining); int thisStepCount = RLGlue.RL_num_steps(); stepsRemaining -= thisStepCount; returnThisMDP += RLGlue.RL_return(); totalEpisodes++; } System.out.println( "MDP " + it + " completed with " + totalEpisodes + " episodes, got " + returnThisMDP + " reward"); // clean up the environment and end the program RLGlue.RL_cleanup(); } System.exit(0); }
public static void main(String[] args) { /* Log level*/ Logger.getRootLogger().setLevel(Level.INFO); /** ************* Setup Agent ************* */ /* create RLGlue Agent to get task specification (needed by the learners Q-function approximator) */ RlGlueAgent rlGlueAgent = new RlGlueAgent(); // create an agentLoader that will start the agent when its run method is called AgentLoader agentLoader = new AgentLoader(rlGlueAgent); // create thread so that the agent and environment can run asynchronously Thread agentThread = new Thread(agentLoader); // start the thread agentThread.start(); String taskSpec = RLGlue.RL_init(); System.out.println("Task-Specification: " + taskSpec); /** * ************************************ Configure Tile-Coding approximation * ************************************ */ // BEGIN TILE CODING APPROXIMATION // the number of tilings/layers int nTilings = 5; // the configuration. {from, to, number if discs} double[][] config = new double[][] { {0, 1.0, 5}, {0, 1.0, 5}, }; // create square tilings Network net = new Network(); net.setIsNormalized(true); double[][] optimizationConfig = config.clone(); net.setFeatureGenerator( new GridHashFeatureGenerator(optimizationConfig, new TileAndIndexBoundingBoxCalculator())); net.add(TileCodingFactory.createTilings(config, nTilings)); // setup Q-Function QFeatureFunction Q = new QFeatureFunction(net, rlGlueAgent.getTeachingboxActionSet()); // END TILE-CODING /** * *************************************** setup policy, learner & the TB's agent * *************************************** */ // the ActionSet for the policy is read from the rlGlueAgent (RL_init must have been called // before!) EpsilonGreedyPolicy pi = new EpsilonGreedyPolicy(Q, rlGlueAgent.getTeachingboxActionSet(), 0.1); System.out.println("POLICY-LEARNER ActionSet: " + rlGlueAgent.getTeachingboxActionSet()); GradientDescentSarsaLearner learner = new GradientDescentSarsaLearner(Q, net, rlGlueAgent.getTeachingboxActionSet()); learner.setAlpha(0.5); learner.setGamma(1.0); learner.setLambda(0.9); Agent tbAgent = new Agent(pi); tbAgent.addObserver(learner); /* SET THE TEACHINGBOX-AGENT IN THE RL-GLUE-AGENT-ADAPTER */ rlGlueAgent.setTeachingBoxAgent(tbAgent); /** * ******************************* Setup Experiment and Plotting ******************************* */ RLGlueRemoteEnvironment rlEnv = new RLGlueRemoteEnvironment(); Experiment experiment = new Experiment(tbAgent, rlEnv, 100, 1000); // 3D PLOTTING // draw the maximum value of the QFunction // to plot the corresponding VFunction we just have to pass in the policy // as well as the actionSet ValueFunctionEQ V = new ValueFunctionEQ(Q); V.costfunction = true; Plotter Vplotter = new ValueFunctionSurfacePlotter(V, "[0:0.02:1.0]", "[0:0.02:1.0]", "PuddleWorld"); // use action runtime plotter, that calls the ValueFunctionPlotter every 10th episode Vplotter = new RuntimePlotter(Vplotter, RuntimePlotter.Mode.EPISODE, 10, net); // add the plotter as an observer to the experiment experiment.addObserver((RuntimePlotter) Vplotter); // RUN THE EXPERIMENT experiment.run(); // cleanup rl-glue at the end RLGlue.RL_cleanup(); System.exit(1); }
public void runExperiment() { System.out.println("\n\nExperiment starting up!"); String taskSpec = RLGlue.RL_init(); System.out.println("RL_init called, the environment sent task spec: " + taskSpec); System.out.println("\n\n----------Sending some sample messages----------"); /* Talk to the agent and environment a bit... */ String responseMessage = RLGlue.RL_agent_message("what is your name?"); System.out.println("Agent responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_agent_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Agent responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage + "\n"); responseMessage = RLGlue.RL_env_message("what is your name?"); System.out.println("Environment responded to \"what is your name?\" with: " + responseMessage); responseMessage = RLGlue.RL_env_message("If at first you don't succeed; call it version 1.0"); System.out.println( "Environment responded to \"If at first you don't succeed; call it version 1.0 \" with: " + responseMessage); System.out.println("\n\n----------Running a few episodes----------"); for (int i = 0; i < 1; i++) runEpisode(20000); runEpisode(1); /* Remember that stepLimit of 0 means there is no limit at all! */ // runEpisode(0); RLGlue.RL_cleanup(); System.out.println("\n\n----------Stepping through an episode----------"); // taskSpec = RLGlue.RL_init(); // for(int i = 0; i < 2; i++) { // /*We could also start over and do another experiment */ // /*We could run one step at a time instead of one episode at a time */ // /*Start the episode */ // Observation_action startResponse = RLGlue.RL_start(); // int firstObservation = startResponse.o.intArray[0]; // int firstAction = startResponse.a.intArray[0]; // System.out.println("First observation and action were: " + // firstObservation + " and: " + firstAction); // // /*Run one step */ // Reward_observation_action_terminal stepResponse = RLGlue.RL_step(); // // /*Run until the episode ends*/ // while (stepResponse.terminal != 1) { // stepResponse = RLGlue.RL_step(); // if (stepResponse.terminal != 1) { // /*Could optionally print state,action pairs */ // System.out.println( // ("(state,action)=(" + // stepResponse.o.intArray[0] + // "," + // stepResponse.a.intArray[0] + ")")); // } // } // // System.out.println("\n\n----------Summary----------"); // // int totalSteps = RLGlue.RL_num_steps(); // double totalReward = RLGlue.RL_return(); // System.out.println("It ran for " + totalSteps + // " steps, total reward was: " + totalReward); // } // RLGlue.RL_cleanup(); }