public void testBasic() { learner1.setExperimentation(0.99); System.out.println("testBasic()"); SummaryStats stats = new SummaryStats("action"); int correctActions = 0; for (int i = 0; i < 100; i++) { int action = learner1.act(); stats.newData(action); if (action == CORRECT_ACTION) { learner1.reward(1.0); correctActions++; } else { learner1.reward(0); } checkProbabilities(learner1); } System.out.println("final state of learner1 = " + learner1); System.out.println("learner1 score = " + correctActions + "%"); System.out.println(stats); }
public void testDistribution() { System.out.println("\ntestDistribution()"); double q[] = {55, 5, 5, 5, 5, 5, 5, 5, 5, 5}; SummaryStats action1Data = new SummaryStats("action1"); for (int r = 0; r < 10000; r++) { learner1 = new NPTRothErevLearner(10, 0.2, 0.2, 1, prng); learner1.setPropensities(q); SummaryStats choiceData = new SummaryStats("choice"); int action1Chosen = 0; for (int i = 0; i < 100; i++) { int choice = learner1.act(); choiceData.newData(choice); action1Chosen = 0; if (choice == 0) { action1Chosen = 1; } action1Data.newData(action1Chosen); } } System.out.println(action1Data); assertTrue(action1Data.getMean() <= 0.57 && action1Data.getMean() >= 0.53); }