コード例 #1
0
  public DownsamplingUCTNode(
      int maxplayer,
      int minplayer,
      GameState a_gs,
      DownsamplingUCTNode a_parent,
      long MAXACTIONS,
      float bound)
      throws Exception {
    parent = a_parent;
    gs = a_gs;
    if (parent == null) depth = 0;
    else depth = parent.depth + 1;
    evaluation_bound = bound;

    while (gs.winner() == -1
        && !gs.gameover()
        && !gs.canExecuteAnyAction(maxplayer)
        && !gs.canExecuteAnyAction(minplayer)) gs.cycle();
    if (gs.winner() != -1 || gs.gameover()) {
      type = -1;
    } else if (gs.canExecuteAnyAction(maxplayer)) {
      type = 0;
      moveGenerator = new PlayerActionGenerator(a_gs, maxplayer);
      moveGenerator.randomizeOrder();
    } else if (gs.canExecuteAnyAction(minplayer)) {
      type = 1;
      moveGenerator = new PlayerActionGenerator(a_gs, minplayer);
      moveGenerator.randomizeOrder();
    } else {
      type = -1;
      System.err.println("RTMCTSNode: This should not have happened...");
    }
  }
コード例 #2
0
  public DownsamplingUCTNode UCTSelectLeaf(
      int maxplayer, int minplayer, long MAXACTIONS, long cutOffTime, int max_depth)
      throws Exception {
    // Cut the tree policy at a predefined depth
    if (depth >= max_depth) return this;

    // Downsample the number of actions:
    if (moveGenerator != null && actions == null) {
      actions = new ArrayList<PlayerAction>();
      children = new ArrayList<DownsamplingUCTNode>();
      if (moveGenerator.getSize() > 2 * MAXACTIONS) {
        for (int i = 0; i < MAXACTIONS; i++) {
          actions.add(moveGenerator.getRandom());
        }
      } else {
        PlayerAction pa = null;
        long count = 0;
        do {
          pa = moveGenerator.getNextAction(cutOffTime);
          if (pa != null) {
            actions.add(pa);
            count++;
            if (count >= 2 * MAXACTIONS)
              break; // this is needed since some times, moveGenerator.size() overflows
          }
        } while (pa != null);
        while (actions.size() > MAXACTIONS) actions.remove(r.nextInt(actions.size()));
      }
    }

    if (hasMoreActions) {
      if (moveGenerator == null) return this;
      if (children.size() >= actions.size()) {
        hasMoreActions = false;
      } else {
        PlayerAction a = actions.get(children.size());
        GameState gs2 = gs.cloneIssue(a);
        DownsamplingUCTNode node =
            new DownsamplingUCTNode(
                maxplayer, minplayer, gs2.clone(), this, MAXACTIONS, evaluation_bound);
        children.add(node);
        return node;
      }
    }

    // Bandit policy:
    double best_score = 0;
    DownsamplingUCTNode best = null;
    for (int i = 0; i < children.size(); i++) {
      DownsamplingUCTNode child = children.get(i);
      double exploitation = ((double) child.accum_evaluation) / child.visit_count;
      double exploration = Math.sqrt(Math.log((double) visit_count) / child.visit_count);
      if (type == 0) {
        // max node:
        exploitation = (exploitation + evaluation_bound) / (2 * evaluation_bound);
      } else {
        exploitation = -(exploitation - evaluation_bound) / (2 * evaluation_bound);
      }
      double tmp = C * exploitation + exploration;
      if (best == null || tmp > best_score) {
        best = child;
        best_score = tmp;
      }
    }

    if (best == null) return this;
    return best.UCTSelectLeaf(maxplayer, minplayer, MAXACTIONS, cutOffTime, max_depth);
  }