Java Policy.getAction Examples

Programming Language: Java

Namespace/Package Name: burlap.behavior.singleagent

Class/Type: Policy

Method/Function: getAction

Examples at hotexamples.com: 2

Java Policy.getAction - 2 examples found. These are the top rated real world Java examples of burlap.behavior.singleagent.Policy.getAction extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

evaluateBehavior(4)

getAction(2)

getActionDistributionForState(2)

getProbOfActionGivenDistribution(1)

Example #1

Show file

File: ARTDP.java Project: jskonhovd/burlap

  @Override
  public EpisodeAnalysis runLearningEpisodeFrom(State initialState, int maxSteps) {

    EpisodeAnalysis ea = new EpisodeAnalysis(initialState);

    State curState = initialState;
    int steps = 0;
    while (!this.tf.isTerminal(curState) && steps < maxSteps) {
      GroundedAction ga = (GroundedAction) policy.getAction(curState);
      State nextState = ga.executeIn(curState);
      double r = this.rf.reward(curState, ga, nextState);

      ea.recordTransitionTo(nextState, ga, r);

      this.model.updateModel(curState, ga, nextState, r, this.tf.isTerminal(nextState));

      this.modelPlanner.performBellmanUpdateOn(curState);

      curState = nextState;
      steps++;
    }

    return ea;
  }

Example #2

Show file

File: QLearning.java Project: jskonhovd/burlap

  @Override
  public EpisodeAnalysis runLearningEpisodeFrom(State initialState, int maxSteps) {

    this.toggleShouldAnnotateOptionDecomposition(shouldAnnotateOptions);

    EpisodeAnalysis ea = new EpisodeAnalysis(initialState);

    StateHashTuple curState = this.stateHash(initialState);
    eStepCounter = 0;

    maxQChangeInLastEpisode = 0.;

    while (!tf.isTerminal(curState.s) && eStepCounter < maxSteps) {

      GroundedAction action = (GroundedAction) learningPolicy.getAction(curState.s);
      QValue curQ = this.getQ(curState, action);

      StateHashTuple nextState = this.stateHash(action.executeIn(curState.s));
      double maxQ = 0.;

      if (!tf.isTerminal(nextState.s)) {
        maxQ = this.getMaxQ(nextState);
      }

      // manage option specifics
      double r = 0.;
      double discount = this.gamma;
      if (action.action.isPrimitive()) {
        r = rf.reward(curState.s, action, nextState.s);
        eStepCounter++;
        ea.recordTransitionTo(nextState.s, action, r);
      } else {
        Option o = (Option) action.action;
        r = o.getLastCumulativeReward();
        int n = o.getLastNumSteps();
        discount = Math.pow(this.gamma, n);
        eStepCounter += n;
        if (this.shouldDecomposeOptions) {
          ea.appendAndMergeEpisodeAnalysis(o.getLastExecutionResults());
        } else {
          ea.recordTransitionTo(nextState.s, action, r);
        }
      }

      double oldQ = curQ.q;

      // update Q-value
      curQ.q =
          curQ.q
              + this.learningRate.pollLearningRate(curState.s, action)
                  * (r + (discount * maxQ) - curQ.q);

      double deltaQ = Math.abs(oldQ - curQ.q);
      if (deltaQ > maxQChangeInLastEpisode) {
        maxQChangeInLastEpisode = deltaQ;
      }

      // move on
      curState = nextState;
    }

    if (episodeHistory.size() >= numEpisodesToStore) {
      episodeHistory.poll();
    }
    episodeHistory.offer(ea);

    return ea;
  }