Java Option.getExpectedRewards примеры использования

Язык программирования: Java

Пространство имен/Пакет: burlap.behavior.singleagent.options

Класс/Тип: Option

Метод/Функция: getExpectedRewards

Примеров на hotexamples.com: 2

Java Option.getExpectedRewards - 2 примера найдено. Это лучшие примеры Java кода для burlap.behavior.singleagent.options.Option.getExpectedRewards, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getExpectedRewards(2)

getLastCumulativeReward(2)

getLastExecutionResults(2)

getLastNumSteps(2)

getTransitions(1)

setExpectationHashingFactory(1)

toggleShouldAnnotateResults(1)

toggleShouldRecordResults(1)

Пример #1

Показать файл

Файл: ValueFunctionPlanner.java Проект: jmeehan16/burlap

  /**
   * Computes the Q-value using the uncached transition dynamics produced by the Action object
   * methods. This computation *is* compatible with {@link
   * burlap.behavior.singleagent.options.Option} objects.
   *
   * @param sh the given state
   * @param ga the given action
   * @return the double value of a Q-value for the given state-aciton pair.
   */
  protected double computeQ(StateHashTuple sh, GroundedAction ga) {

    double q = 0.;

    if (ga.action instanceof Option) {

      Option o = (Option) ga.action;
      double expectedR = o.getExpectedRewards(sh.s, ga.params);
      q += expectedR;

      List<TransitionProbability> tps = o.getTransitions(sh.s, ga.params);
      for (TransitionProbability tp : tps) {
        double vp = this.value(tp.s);

        // note that for options, tp.p will be the *discounted* probability of transition to s',
        // so there is no need for a discount factor to be included
        q += tp.p * vp;
      }

    } else {

      List<TransitionProbability> tps = ga.action.getTransitions(sh.s, ga.params);
      for (TransitionProbability tp : tps) {
        double vp = this.value(tp.s);

        double discount = this.gamma;
        double r = rf.reward(sh.s, ga, tp.s);
        q += tp.p * (r + (discount * vp));
      }
    }

    return q;
  }

Пример #2

Показать файл

Файл: ValueFunctionPlanner.java Проект: jmeehan16/burlap

  /**
   * Returns the Q-value for a given set and the possible transitions from it for a given action.
   * This computation *is* compatible with {@link burlap.behavior.singleagent.options.Option}
   * objects.
   *
   * @param s the given state
   * @param trans the given action transitions
   * @return the double value of a Q-value
   */
  protected double computeQ(State s, ActionTransitions trans) {

    double q = 0.;

    if (trans.ga.action instanceof Option) {

      Option o = (Option) trans.ga.action;
      double expectedR = o.getExpectedRewards(s, trans.ga.params);
      q += expectedR;

      for (HashedTransitionProbability tp : trans.transitions) {

        double vp = this.value(tp.sh);

        // note that for options, tp.p will be the *discounted* probability of transition to s',
        // so there is no need for a discount factor to be included
        q += tp.p * vp;
      }

    } else {

      for (HashedTransitionProbability tp : trans.transitions) {

        double vp = this.value(tp.sh);

        double discount = this.gamma;
        double r = rf.reward(s, trans.ga, tp.sh.s);
        q += tp.p * (r + (discount * vp));
      }
    }

    return q;
  }