コード例 #1
0
  public static void main(String[] args) {

    GridWorldDomain gw = new GridWorldDomain(11, 11); // 11x11 grid world
    gw.setMapToFourRooms(); // four rooms layout
    gw.setProbSucceedTransitionDynamics(0.8); // stochastic transitions with
    // 0.8 success rate
    final Domain domain = gw.generateDomain(); // generate the grid world
    // domain

    // setup initial state
    State s = GridWorldDomain.getOneAgentOneLocationState(domain);
    GridWorldDomain.setAgent(s, 0, 0);
    GridWorldDomain.setLocation(s, 0, 10, 10);

    // ends when the agent reaches a location
    final TerminalFunction tf =
        new SinglePFTF(domain.getPropFunction(GridWorldDomain.PFATLOCATION));

    // reward function definition
    final RewardFunction rf = new GoalBasedRF(new TFGoalCondition(tf), 5., -0.1);

    // initial state generator
    final ConstantStateGenerator sg = new ConstantStateGenerator(s);

    // set up the state hashing system for looking up states
    final DiscreteStateHashFactory hashingFactory = new DiscreteStateHashFactory();

    /** Create factory for Q-learning agent */
    LearningAgentFactory qLearningFactory =
        new LearningAgentFactory() {

          @Override
          public String getAgentName() {
            return "Q-learning";
          }

          @Override
          public LearningAgent generateAgent() {
            return new QLearning(domain, rf, tf, 0.99, hashingFactory, 0.3, 0.1);
          }
        };

    // define experiment
    LearningAlgorithmExperimenter exp =
        new LearningAlgorithmExperimenter((SADomain) domain, rf, sg, 10, 100, qLearningFactory);

    exp.setUpPlottingConfiguration(
        500,
        250,
        2,
        1000,
        TrialMode.MOSTRECENTANDAVERAGE,
        PerformanceMetric.CUMULATIVESTEPSPEREPISODE,
        PerformanceMetric.AVERAGEEPISODEREWARD);

    // start experiment
    exp.startExperiment();
  }