Exemple #1
0
 public void testBasic() {
   learner1.setExperimentation(0.99);
   System.out.println("testBasic()");
   SummaryStats stats = new SummaryStats("action");
   int correctActions = 0;
   for (int i = 0; i < 100; i++) {
     int action = learner1.act();
     stats.newData(action);
     if (action == CORRECT_ACTION) {
       learner1.reward(1.0);
       correctActions++;
     } else {
       learner1.reward(0);
     }
     checkProbabilities(learner1);
   }
   System.out.println("final state of learner1 = " + learner1);
   System.out.println("learner1 score = " + correctActions + "%");
   System.out.println(stats);
 }
Exemple #2
0
 public void testDistribution() {
   System.out.println("\ntestDistribution()");
   double q[] = {55, 5, 5, 5, 5, 5, 5, 5, 5, 5};
   SummaryStats action1Data = new SummaryStats("action1");
   for (int r = 0; r < 10000; r++) {
     learner1 = new NPTRothErevLearner(10, 0.2, 0.2, 1, prng);
     learner1.setPropensities(q);
     SummaryStats choiceData = new SummaryStats("choice");
     int action1Chosen = 0;
     for (int i = 0; i < 100; i++) {
       int choice = learner1.act();
       choiceData.newData(choice);
       action1Chosen = 0;
       if (choice == 0) {
         action1Chosen = 1;
       }
       action1Data.newData(action1Chosen);
     }
   }
   System.out.println(action1Data);
   assertTrue(action1Data.getMean() <= 0.57 && action1Data.getMean() >= 0.53);
 }