/** * Monte-Carlo simulation for playout. * * @param node Node to start at * @param timeout Time limit * @return Scores for all players */ private int[] playout(Node node, long timeout) { if (node instanceof TerminalNode) { return ((TerminalNode) node).goal; } MachineState state = ((NonTerminalNode) node).state; while (System.currentTimeMillis() < timeout) { if (theMachine.isTerminal(state)) { List<Integer> s; try { s = theMachine.getGoals(state); } catch (GoalDefinitionException e) { System.err.println("Could not compute goal values."); return null; } int[] scores = new int[s.size()]; for (int i = 0; i < scores.length; i++) { scores[i] = s.get(i); } return scores; } try { state = theMachine.getNextState(state, theMachine.getRandomJointMove(state)); } catch (TransitionDefinitionException e) { System.err.println("Could not perform state update."); return null; } catch (MoveDefinitionException e) { System.err.println("Could not compute legal moves."); return null; } } return null; }
@Override public void run() { try { if (match.getAnalysisClock() >= 0) { sendAnalyzeRequests(); } notifyObservers(new ServerNewMatchEvent(stateMachine.getRoles(), currentState)); notifyObservers(new ServerTimeEvent(match.getStartClock() * 1000)); sendStartRequests(); appendErrorsToMatchDescription(); while (!stateMachine.isTerminal(currentState)) { publishWhenNecessary(); saveWhenNecessary(); notifyObservers(new ServerNewGameStateEvent(currentState)); notifyObservers(new ServerTimeEvent(match.getPlayClock() * 1000)); notifyObservers(new ServerMatchUpdatedEvent(match, spectatorServerKey, saveToFilename)); previousMoves = sendPlayRequests(); notifyObservers(new ServerNewMovesEvent(previousMoves)); currentState = stateMachine.getNextState(currentState, previousMoves); match.appendMoves2(previousMoves); match.appendState(currentState.getContents()); appendErrorsToMatchDescription(); } match.markCompleted(stateMachine.getGoals(currentState)); publishWhenNecessary(); saveWhenNecessary(); notifyObservers(new ServerNewGameStateEvent(currentState)); notifyObservers(new ServerCompletedMatchEvent(getGoals())); notifyObservers(new ServerMatchUpdatedEvent(match, spectatorServerKey, saveToFilename)); sendStopRequests(previousMoves); } catch (InterruptedException ie) { if (match.isAborted()) { return; } else { ie.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } }
/** * Performs one MCTS step. * * @param timeout Time limit * @return Index of the move performed by the player in the root state * @throws TimeoutException Time limit reached */ private int mctsStep(long timeout) { List<NonTerminalNode> path = new ArrayList<NonTerminalNode>(); int[] scores = null; Node current = root; NonTerminalNode c = (NonTerminalNode) root; // int rootMove = -1; // Selection List<List<Integer>> select = new ArrayList<List<Integer>>(); List<Move> selectM = new ArrayList<Move>(); while (current != null) { if (System.currentTimeMillis() > timeout) { return -1; } if (current instanceof TerminalNode) { scores = ((TerminalNode) current).goal; break; } c = (NonTerminalNode) current; path.add(c); List<Integer> selectEntry = new ArrayList<Integer>(); selectM = new ArrayList<Move>(); for (int p = 0; p < theMachine.getRoles().size(); p++) { List<Move> moves = c.legal.get(p); double bestval = Double.NEGATIVE_INFINITY; Move bestmove; int bestindex; if (c.n == 0) { bestmove = moves.get(0); bestindex = 0; } else { bestmove = null; bestindex = -1; for (int i = 0; i < moves.size(); i++) { Move m = moves.get(i); double value = c.q_action[p][i] + uct(c.n, c.n_action[p][i]); // System.out.println("Found a move with score " + value); if (value > bestval) { bestval = value; bestmove = m; bestindex = i; } } } selectEntry.add(bestindex); selectM.add(bestmove); } select.add(selectEntry); /* if(current == root){ rootMove = select.get(select.size() - 1)[role]; } */ current = c.successors.get(selectEntry); } // Expansion & Playout if (!(current instanceof TerminalNode)) { MachineState next; try { next = theMachine.getNextState(c.state, selectM); } catch (TransitionDefinitionException e) { System.err.println("Could not perform state update."); return -1; } if (theMachine.isTerminal(next)) { current = new TerminalNode(next); } else { current = new NonTerminalNode(next); } c.successors.put(select.get(select.size() - 1), current); scores = playout(current, timeout); } if (scores == null) { return -1; } // Backpropagation for (int i = 0; i < path.size(); i++) { NonTerminalNode node = path.get(i); node.n++; for (int j = 0; j < theMachine.getRoles().size(); j++) { node.n_action[j][select.get(i).get(j)]++; node.q_action[j][select.get(i).get(j)] = (node.q_action[j][select.get(i).get(j)] * node.n_action[j][select.get(i).get(j)] + scores[j]) / (node.n_action[j][select.get(i).get(j)] + 1); } } counter++; return select.get(0).get(role); }
@Override public void stateMachineMetaGame(long timeout) throws TransitionDefinitionException, MoveDefinitionException, GoalDefinitionException { System.out.println("Metagame. Preparing to test the state machine"); StateMachine stateMachine = getStateMachine(); ProverStateMachine psm = (ProverStateMachine) stateMachine; List gdlDescription = psm.gdlDescription; // The only line you have to adapt in this file StateMachine stateMachineX = new CachedStateMachine(new PropNetStateMachine()); stateMachineX.initialize(gdlDescription); MachineState rootState = stateMachine.getInitialState(); MachineState rootStateX = stateMachineX.getInitialState(); if (!compare(rootState, rootStateX)) { System.out.println("Initial states are different"); System.out.println(rootState); System.out.println(rootStateX); return; } long finishBy = timeout - 1000; int nbExpansion = 0; boolean abort = false; while (System.currentTimeMillis() < finishBy && !abort) { MachineState state = rootState; while (true) { boolean isTerminal = stateMachine.isTerminal(state); boolean isTerminalX = stateMachineX.isTerminal(state); if (!compare(isTerminal, isTerminalX)) { System.out.println("DISCREPANCY between isTerminal values"); System.out.println("State : " + state); abort = true; break; } if (isTerminal) { List goal = stateMachine.getGoals(state); List goalX = stateMachineX.getGoals(state); if (!compare(goal, goalX)) { System.out.println("DISCREPANCY between goal values"); System.out.println(goal); System.out.println(goalX); abort = true; break; } break; } for (Role role : stateMachine.getRoles()) { List moves = stateMachine.getLegalMoves(state, role); List movesX = stateMachineX.getLegalMoves(state, role); if (!compare(moves, movesX, role)) { System.out.println("DISCREPANCY between legal moves for role " + role); System.out.println(moves); System.out.println(movesX); abort = true; break; } } List jointMove = stateMachine.getRandomJointMove(state); MachineState nextState = stateMachine.getNextState(state, jointMove); MachineState nextStateX = stateMachineX.getNextState(state, jointMove); if (!compare(nextState, nextStateX)) { System.out.println("DISCREPANCY between next states"); System.out.println("Previous state : " + state); System.out.println("Joint move : " + jointMove); System.out.println("New state : " + nextState); System.out.println("New stateX : " + nextStateX); abort = true; break; } state = nextState; nbExpansion++; } } System.out.println("Metagaming finished"); System.out.println("Nb expansion : " + nbExpansion); }