コード例 #1
0
  public static void main(String[] args) {

    /* Log level*/
    Logger.getRootLogger().setLevel(Level.INFO);

    /** ************* Setup Agent ************* */
    /* create RLGlue Agent to get task specification (needed by the learners Q-function approximator) */
    RlGlueAgent rlGlueAgent = new RlGlueAgent();
    // create an agentLoader that will start the agent when its run method is called
    AgentLoader agentLoader = new AgentLoader(rlGlueAgent);
    // create thread so that the agent and environment can run asynchronously
    Thread agentThread = new Thread(agentLoader);
    // start the thread
    agentThread.start();

    String taskSpec = RLGlue.RL_init();
    System.out.println("Task-Specification: " + taskSpec);

    /**
     * ************************************ Configure Tile-Coding approximation
     * ************************************
     */
    // BEGIN TILE CODING APPROXIMATION
    // the number of tilings/layers
    int nTilings = 5;

    // the configuration. {from, to, number if discs}
    double[][] config =
        new double[][] {
          {0, 1.0, 5},
          {0, 1.0, 5},
        };

    // create square tilings
    Network net = new Network();
    net.setIsNormalized(true);
    double[][] optimizationConfig = config.clone();
    net.setFeatureGenerator(
        new GridHashFeatureGenerator(optimizationConfig, new TileAndIndexBoundingBoxCalculator()));
    net.add(TileCodingFactory.createTilings(config, nTilings));

    // setup Q-Function
    QFeatureFunction Q = new QFeatureFunction(net, rlGlueAgent.getTeachingboxActionSet());
    // END TILE-CODING

    /**
     * *************************************** setup policy, learner & the TB's agent
     * ***************************************
     */
    // the ActionSet for the policy is read from the rlGlueAgent (RL_init must have been called
    // before!)
    EpsilonGreedyPolicy pi = new EpsilonGreedyPolicy(Q, rlGlueAgent.getTeachingboxActionSet(), 0.1);
    System.out.println("POLICY-LEARNER ActionSet: " + rlGlueAgent.getTeachingboxActionSet());
    GradientDescentSarsaLearner learner =
        new GradientDescentSarsaLearner(Q, net, rlGlueAgent.getTeachingboxActionSet());
    learner.setAlpha(0.5);
    learner.setGamma(1.0);
    learner.setLambda(0.9);
    Agent tbAgent = new Agent(pi);
    tbAgent.addObserver(learner);

    /* SET THE TEACHINGBOX-AGENT IN THE RL-GLUE-AGENT-ADAPTER */
    rlGlueAgent.setTeachingBoxAgent(tbAgent);

    /**
     * ******************************* Setup Experiment and Plotting *******************************
     */
    RLGlueRemoteEnvironment rlEnv = new RLGlueRemoteEnvironment();
    Experiment experiment = new Experiment(tbAgent, rlEnv, 100, 1000);

    // 3D PLOTTING
    // draw the maximum value of the QFunction
    // to plot the corresponding VFunction we just have to pass in the policy
    // as well as the actionSet
    ValueFunctionEQ V = new ValueFunctionEQ(Q);
    V.costfunction = true;
    Plotter Vplotter =
        new ValueFunctionSurfacePlotter(V, "[0:0.02:1.0]", "[0:0.02:1.0]", "PuddleWorld");
    // use action runtime plotter, that calls the ValueFunctionPlotter every 10th episode
    Vplotter = new RuntimePlotter(Vplotter, RuntimePlotter.Mode.EPISODE, 10, net);
    // add the plotter as an observer to the experiment
    experiment.addObserver((RuntimePlotter) Vplotter);

    // RUN THE EXPERIMENT
    experiment.run();

    // cleanup rl-glue at the end
    RLGlue.RL_cleanup();

    System.exit(1);
  }
コード例 #2
0
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    Logger.getRootLogger().setLevel(Level.DEBUG);

    // setup environment
    MountainCarEnv env = new MountainCarEnv();

    // choose simga
    final double POS_STEP = (env.MAX_POS - env.MIN_POS) / 50;
    final double VEL_STEP = (env.MAX_VEL - env.MIN_VEL) / 50;
    final double[] sigma = new double[] {POS_STEP, VEL_STEP};

    // create adaptive network adding rbfs
    Network net =
        new Network(
            new NoNodeNearby(new RadialBasisFunction(sigma, sigma), new RBFDistanceCalculator()));
    net.setIsNormalized(true);

    // setup Q-Function
    // QFeatureFunction Q = new QFeatureFunction(net, MountainCarEnv.ACTION_SET);
    QFeatureFunction Q = ObjectSerializer.load("MC_Q_NRBF.ser");

    // setup policy
    GreedyPolicy pi = new GreedyPolicy(Q, MountainCarEnv.ACTION_SET);

    // create agent
    Agent agent = new Agent(pi);

    // experiment setups
    final int MAX_EPISODES = 1000;
    final int MAX_STEPS = 5000;
    final double alpha = 0.4;
    final double gamma = 1;
    final double lambda = 0.9;

    // setup experiment
    Experiment experiment = new Experiment(agent, env, MAX_EPISODES, MAX_STEPS);

    // setup learner
    GradientDescentQLearner learner =
        new GradientDescentQLearner(Q, net, MountainCarEnv.ACTION_SET);
    learner.setAlpha(alpha);
    learner.setGamma(gamma);
    learner.setLambda(lambda);

    // attach learner to agent
    agent.addObserver(learner);

    // Mountain-Car boundaries
    double[] bound = new double[] {env.MIN_POS, env.MAX_POS, env.MIN_VEL, env.MAX_VEL};

    // grid discretization
    int[] isosamples = new int[] {60, 60};

    // Initialize Policy Plotter
    PolicyPlotter3D policyPlotter = new PolicyPlotter3D(pi, bound, isosamples);
    policyPlotter.setLabels("Position", "Velocity", "Action");
    policyPlotter.setFilename("mc-policy.gnuplot");
    policyPlotter.setTitle("Mountain-Car Policy");
    policyPlotter.setView(0.01, 0.01);

    // visualize Q-function
    //        ValueFunctionEQ V = new ValueFunctionEQ(Q);
    //        V.costfunction = true;
    //        ValueFunctionSurfacePlotter qp = new ValueFunctionSurfacePlotter(
    //                V, new double[] {env.MIN_POS, isosamples[0], env.MAX_POS},
    //                new double[] {env.MIN_VEL, isosamples[1], env.MAX_VEL},
    //                "Mountain-Car Q-Function");
    //        qp.getPlotter().setView(50, 19);
    //        qp.getPlotter().getAxis("x").setLabel("Position");
    //        qp.getPlotter().getAxis("y").setLabel("Velocity");
    //        qp.getPlotter().getAxis("z").setLabel("Costs");
    ////    	qp.setFilename("mc-Q.gnuplot");

    // Initialize Q-function Plotter
    QFunctionPlotter3D Qplotter = new QFunctionPlotter3D(Q, bound, isosamples);
    Qplotter.setLabels("Position", "Velocity", "Costs");
    Qplotter.setFilename("mc-Q.gnuplot");
    Qplotter.setTitle("Mountain-Car Q-Function");
    Qplotter.setCosts(true);
    Qplotter.setView(50, 19);

    // generate a plot every 20 episodes
    experiment.addObserver(new RuntimePlotter(policyPlotter, Mode.EPISODE, 20, net));
    experiment.addObserver(new RuntimePlotter(Qplotter, Mode.EPISODE, 20, net));

    // run experiment
    experiment.run();

    ObjectSerializer.save("MC_Q_NRBF.ser", Q);
  }