/** * Monte-Carlo simulation for playout. * * @param node Node to start at * @param timeout Time limit * @return Scores for all players */ private int[] playout(Node node, long timeout) { if (node instanceof TerminalNode) { return ((TerminalNode) node).goal; } MachineState state = ((NonTerminalNode) node).state; while (System.currentTimeMillis() < timeout) { if (theMachine.isTerminal(state)) { List<Integer> s; try { s = theMachine.getGoals(state); } catch (GoalDefinitionException e) { System.err.println("Could not compute goal values."); return null; } int[] scores = new int[s.size()]; for (int i = 0; i < scores.length; i++) { scores[i] = s.get(i); } return scores; } try { state = theMachine.getNextState(state, theMachine.getRandomJointMove(state)); } catch (TransitionDefinitionException e) { System.err.println("Could not perform state update."); return null; } catch (MoveDefinitionException e) { System.err.println("Could not compute legal moves."); return null; } } return null; }
/** * Performs Monte-Carlo tree search. * * @param timeout Time limit * @return Best move in the root state */ private Move mcts(long timeout) { counter = 0; // Move bestMove = null; // double bestScore = Double.NEGATIVE_INFINITY; while (System.currentTimeMillis() < timeout) { mctsStep(timeout); /* int next = mctsStep(timeout); if(next == -1){ continue; } double score = root.q_action[role][next] - uct(root.n, root.n_action[role][next]); if(score > bestScore){ bestScore = score; bestMove = root.legal.get(role).get(next); } */ } System.out.println("Did " + counter + " MCTS steps."); int best = -1; int bestScore = Integer.MIN_VALUE; confidence = 0; for (int i = 0; i < root.n_action[role].length; i++) { if (root.n_action[role][i] > bestScore) { bestScore = root.n_action[role][i]; best = i; confidence = (int) (root.q_action[role][i] - uct(root.n, root.n_action[role][i])); } } // System.out.println("MonteCarlo: Computed confidence value of " + confidence + " for move with // score " + bestScore + "."); if (confidence < MIN_SCORE) { confidence = MIN_SCORE; } return (best == -1) ? null : root.legal.get(role).get(best); }
/** * Performs one MCTS step. * * @param timeout Time limit * @return Index of the move performed by the player in the root state * @throws TimeoutException Time limit reached */ private int mctsStep(long timeout) { List<NonTerminalNode> path = new ArrayList<NonTerminalNode>(); int[] scores = null; Node current = root; NonTerminalNode c = (NonTerminalNode) root; // int rootMove = -1; // Selection List<List<Integer>> select = new ArrayList<List<Integer>>(); List<Move> selectM = new ArrayList<Move>(); while (current != null) { if (System.currentTimeMillis() > timeout) { return -1; } if (current instanceof TerminalNode) { scores = ((TerminalNode) current).goal; break; } c = (NonTerminalNode) current; path.add(c); List<Integer> selectEntry = new ArrayList<Integer>(); selectM = new ArrayList<Move>(); for (int p = 0; p < theMachine.getRoles().size(); p++) { List<Move> moves = c.legal.get(p); double bestval = Double.NEGATIVE_INFINITY; Move bestmove; int bestindex; if (c.n == 0) { bestmove = moves.get(0); bestindex = 0; } else { bestmove = null; bestindex = -1; for (int i = 0; i < moves.size(); i++) { Move m = moves.get(i); double value = c.q_action[p][i] + uct(c.n, c.n_action[p][i]); // System.out.println("Found a move with score " + value); if (value > bestval) { bestval = value; bestmove = m; bestindex = i; } } } selectEntry.add(bestindex); selectM.add(bestmove); } select.add(selectEntry); /* if(current == root){ rootMove = select.get(select.size() - 1)[role]; } */ current = c.successors.get(selectEntry); } // Expansion & Playout if (!(current instanceof TerminalNode)) { MachineState next; try { next = theMachine.getNextState(c.state, selectM); } catch (TransitionDefinitionException e) { System.err.println("Could not perform state update."); return -1; } if (theMachine.isTerminal(next)) { current = new TerminalNode(next); } else { current = new NonTerminalNode(next); } c.successors.put(select.get(select.size() - 1), current); scores = playout(current, timeout); } if (scores == null) { return -1; } // Backpropagation for (int i = 0; i < path.size(); i++) { NonTerminalNode node = path.get(i); node.n++; for (int j = 0; j < theMachine.getRoles().size(); j++) { node.n_action[j][select.get(i).get(j)]++; node.q_action[j][select.get(i).get(j)] = (node.q_action[j][select.get(i).get(j)] * node.n_action[j][select.get(i).get(j)] + scores[j]) / (node.n_action[j][select.get(i).get(j)] + 1); } } counter++; return select.get(0).get(role); }