/** When the state has widely varying Q-values. */ public boolean giveAdvice(BasicRLPacMan student, MOVE _choice, MOVE _advice) { double[] qvalues = student.getQValues(); double gap = Stats.max(qvalues) - Stats.min(qvalues); // System.out.println(gap); boolean uncertain = (gap < threshold); if (uncertain) { left--; lastStudentActionCorrect = false; return true; } else lastStudentActionCorrect = true; return false; }
/** When the state has widely varying Q-values, and the student doesn't take the advice action. */ public boolean giveAdvice(BasicRLPacMan teacher, MOVE choice, MOVE advice, Game game) { double[] qvalues = teacher.getQValues(); double gap = Stats.max(qvalues) - Stats.min(qvalues); boolean important = (gap > threshold); if (important) { boolean mistake = (choice != advice); if (mistake && step >= m) { left--; step = 0; return true; } } step++; return false; }