public static double nearestValue(double energy, double distance, double action) { // if(distance>0.43&&energy>0.5) // System.out.print("test"); // scale energy to nearest 0.1 energy *= 10; energy = Math.round(energy); energy /= 10; double closestDist = 100; Transition trans; double bestValue = 1; int bestIndex = 1; for (int index = 0; index < transitionList.size(); index += 2) { trans = transitionList.get(index); if (Math.abs(trans.getCurrentState().getOgreEnergy() - energy) < 0.001 && Math.abs(trans.getCurrentState().getEnemyDistance() - distance) < closestDist) { closestDist = Math.abs(trans.getCurrentState().getEnemyDistance() - distance); bestIndex = index; } } // value of evade bestValue = Q_table[bestIndex][0]; // if attack then use that value if (bestIndex + 1 < transitionList.size()) if (action < 0.5) bestValue = Q_table[bestIndex + 1][0]; return bestValue; }
/* * input transition exp loop through transition experiences * * for each transition t use Q update rule oldState and action comes from t * new state comes from t * * for Q update fill up Q_table (output) */ public void Qsweep(List<Transition> transitions) { if (firstTimeQ) initTable(transitions); for (int index = 0; index < transitions.size(); index++) { Transition t = transitions.get(index); // get reward (if any) float reward = -0.08f; // small penalty for each timestep used if (t.getNextState().getEnemyHealth() < 0.001) { reward = 5; } else if (t.getNextState().getOgreHealth() < 0.001) { reward = -10; } else if ( // t.getNextState().getEnemyHealth() < // t.getCurrentState() // .getEnemyHealth()&& t.getNextState().getOgreEnergy() < t.getCurrentState().getOgreEnergy()) { // if we just got him with the bow reward = 5; // System.out.println("got reward"); } Q_table[index][0] += alpha * (reward + gamma * queryBestState(t) - Q_table[index][0]); // original version if (Q_table[index][0] > 10) Q_table[index][0] = 10; if (Q_table[index][0] < -10) Q_table[index][0] = -10; // System.out.println("Q: "+index+": "+Q_tableNN[index][0]); } firstTimeQ = false; }
private void printTrans(Transition t) { System.out.print( "trans:" + t.getCurrentState().getOgreHealth() + " " + t.getCurrentState().getOgreEnergy() + " " + t.getCurrentState().getEnemyHealth() + " " + t.getCurrentState().getEnemyDistance() + " " + t.getAction() + " " + t.getNextState().getOgreHealth() + " " + t.getNextState().getOgreEnergy() + " " + t.getNextState().getEnemyHealth() + " " + t.getNextState().getEnemyDistance()); }