Exemplo n.º 1
0
 /**
  * Monte-Carlo simulation for playout.
  *
  * @param node Node to start at
  * @param timeout Time limit
  * @return Scores for all players
  */
 private int[] playout(Node node, long timeout) {
   if (node instanceof TerminalNode) {
     return ((TerminalNode) node).goal;
   }
   MachineState state = ((NonTerminalNode) node).state;
   while (System.currentTimeMillis() < timeout) {
     if (theMachine.isTerminal(state)) {
       List<Integer> s;
       try {
         s = theMachine.getGoals(state);
       } catch (GoalDefinitionException e) {
         System.err.println("Could not compute goal values.");
         return null;
       }
       int[] scores = new int[s.size()];
       for (int i = 0; i < scores.length; i++) {
         scores[i] = s.get(i);
       }
       return scores;
     }
     try {
       state = theMachine.getNextState(state, theMachine.getRandomJointMove(state));
     } catch (TransitionDefinitionException e) {
       System.err.println("Could not perform state update.");
       return null;
     } catch (MoveDefinitionException e) {
       System.err.println("Could not compute legal moves.");
       return null;
     }
   }
   return null;
 }
Exemplo n.º 2
0
 /**
  * Performs Monte-Carlo tree search.
  *
  * @param timeout Time limit
  * @return Best move in the root state
  */
 private Move mcts(long timeout) {
   counter = 0;
   // Move bestMove = null;
   // double bestScore = Double.NEGATIVE_INFINITY;
   while (System.currentTimeMillis() < timeout) {
     mctsStep(timeout);
     /*
     int next = mctsStep(timeout);
     if(next == -1){
     	continue;
     }
     double score = root.q_action[role][next] - uct(root.n, root.n_action[role][next]);
     if(score > bestScore){
     	bestScore = score;
     	bestMove = root.legal.get(role).get(next);
     }
     */
   }
   System.out.println("Did " + counter + " MCTS steps.");
   int best = -1;
   int bestScore = Integer.MIN_VALUE;
   confidence = 0;
   for (int i = 0; i < root.n_action[role].length; i++) {
     if (root.n_action[role][i] > bestScore) {
       bestScore = root.n_action[role][i];
       best = i;
       confidence = (int) (root.q_action[role][i] - uct(root.n, root.n_action[role][i]));
     }
   }
   // System.out.println("MonteCarlo: Computed confidence value of " + confidence + " for move with
   // score " + bestScore + ".");
   if (confidence < MIN_SCORE) {
     confidence = MIN_SCORE;
   }
   return (best == -1) ? null : root.legal.get(role).get(best);
 }
Exemplo n.º 3
0
  /**
   * Performs one MCTS step.
   *
   * @param timeout Time limit
   * @return Index of the move performed by the player in the root state
   * @throws TimeoutException Time limit reached
   */
  private int mctsStep(long timeout) {
    List<NonTerminalNode> path = new ArrayList<NonTerminalNode>();
    int[] scores = null;
    Node current = root;
    NonTerminalNode c = (NonTerminalNode) root;
    // int rootMove = -1;

    // Selection
    List<List<Integer>> select = new ArrayList<List<Integer>>();
    List<Move> selectM = new ArrayList<Move>();
    while (current != null) {
      if (System.currentTimeMillis() > timeout) {
        return -1;
      }
      if (current instanceof TerminalNode) {
        scores = ((TerminalNode) current).goal;
        break;
      }
      c = (NonTerminalNode) current;
      path.add(c);
      List<Integer> selectEntry = new ArrayList<Integer>();
      selectM = new ArrayList<Move>();
      for (int p = 0; p < theMachine.getRoles().size(); p++) {
        List<Move> moves = c.legal.get(p);
        double bestval = Double.NEGATIVE_INFINITY;
        Move bestmove;
        int bestindex;
        if (c.n == 0) {
          bestmove = moves.get(0);
          bestindex = 0;
        } else {
          bestmove = null;
          bestindex = -1;
          for (int i = 0; i < moves.size(); i++) {
            Move m = moves.get(i);
            double value = c.q_action[p][i] + uct(c.n, c.n_action[p][i]);
            // System.out.println("Found a move with score " + value);
            if (value > bestval) {
              bestval = value;
              bestmove = m;
              bestindex = i;
            }
          }
        }
        selectEntry.add(bestindex);
        selectM.add(bestmove);
      }
      select.add(selectEntry);
      /*
      if(current == root){
      	rootMove = select.get(select.size() - 1)[role];
      }
      */
      current = c.successors.get(selectEntry);
    }

    // Expansion & Playout
    if (!(current instanceof TerminalNode)) {
      MachineState next;
      try {
        next = theMachine.getNextState(c.state, selectM);
      } catch (TransitionDefinitionException e) {
        System.err.println("Could not perform state update.");
        return -1;
      }
      if (theMachine.isTerminal(next)) {
        current = new TerminalNode(next);
      } else {
        current = new NonTerminalNode(next);
      }
      c.successors.put(select.get(select.size() - 1), current);
      scores = playout(current, timeout);
    }

    if (scores == null) {
      return -1;
    }

    // Backpropagation
    for (int i = 0; i < path.size(); i++) {
      NonTerminalNode node = path.get(i);
      node.n++;
      for (int j = 0; j < theMachine.getRoles().size(); j++) {
        node.n_action[j][select.get(i).get(j)]++;
        node.q_action[j][select.get(i).get(j)] =
            (node.q_action[j][select.get(i).get(j)] * node.n_action[j][select.get(i).get(j)]
                    + scores[j])
                / (node.n_action[j][select.get(i).get(j)] + 1);
      }
    }

    counter++;
    return select.get(0).get(role);
  }