Java DifferentiableSparseSampling.setValueForLeafNodes 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: behavior.burlapirlext

메소드/함수: setValueForLeafNodes

hotexamples.com에서의 예제들: 1

Java DifferentiableSparseSampling.setValueForLeafNodes - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 behavior.burlapirlext.DifferentiableSparseSampling.setValueForLeafNodes에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

toggleDebugPrinting(2)

resetPlannerResults(1)

setTf(1)

setValueForLeafNodes(1)

예제 #1

파일 보기

파일: GeneralPuddlesIRL.java 프로젝트: jmacglashan/generalResearch

  public void runVFRFIRL() {

    PuddleMapExactFV fvgen = new PuddleMapExactFV(this.puddleMap, 5);
    PuddleMapDistOnlyFV vfFvGen = new PuddleMapDistOnlyFV(this.puddleMap, 5, 20, 20);
    GridWorldTerminalFunction tf = new GridWorldTerminalFunction(20, 20);
    LinearStateDifferentiableRF objectiveRF =
        new LinearStateDifferentiableRF(fvgen, fvgen.getDim());
    objectiveRF.setParameters(new double[] {1., -10, -10, 0, -10});
    // LinearStateDiffVF vinit = new LinearStateDiffVF(vfFvGen, 5);

    // DiffVFRF rf = new DiffVFRF(objectiveRF, vinit);
    LinearDiffRFVInit rfvf = new LinearDiffRFVInit(fvgen, vfFvGen, 5, 5);

    java.util.List<EpisodeAnalysis> eas =
        EpisodeAnalysis.parseFilesIntoEAList(this.expertDir, domain, this.sp);

    int depth = 4;
    double beta = 10;
    DifferentiableSparseSampling dss =
        new DifferentiableSparseSampling(
            domain,
            rfvf,
            new NullTermination(),
            0.99,
            new NameDependentStateHashFactory(),
            depth,
            -1,
            beta);
    dss.setValueForLeafNodes(rfvf);
    dss.toggleDebugPrinting(false);

    MLIRLRequest request = new MLIRLRequest(domain, dss, eas, rfvf);
    request.setBoltzmannBeta(beta);

    MLIRL irl = new MLIRL(request, 0.001, 0.01, 10); // use this for only the given features
    // MLIRL irl = new MLIRL(request, 0.00001, 0.01, 10);
    // MLIRL irl = new MLIRL(request, 0.0001, 0.01, 10);

    irl.performIRL();

    // System.out.println(this.getFVAndShapeString(rf.getParameters()));

    String baseName = "SSRFVFD3";

    SparseSampling ss =
        new SparseSampling(
            domain,
            rfvf,
            new NullTermination(),
            0.99,
            new NameDependentStateHashFactory(),
            depth,
            -1);
    ss.toggleDebugPrinting(false);
    ss.setValueForLeafNodes(rfvf);

    // Policy p = new GreedyQPolicy(dss);
    Policy p = new GreedyQPolicy(ss);

    State simple = this.initialState.copy();
    GridWorldDomain.setAgent(simple, 18, 0);
    EpisodeAnalysis trainedEp1 = p.evaluateBehavior(simple, objectiveRF, tf, 200);
    trainedEp1.writeToFile(trainedDir + "/IRL" + baseName + "EpSimple", this.sp);

    State hardAgent = this.initialState.copy();
    GridWorldDomain.setAgent(hardAgent, 0, 9);
    EpisodeAnalysis trainedEp2 = p.evaluateBehavior(hardAgent, objectiveRF, tf, 200);
    trainedEp2.writeToFile(trainedDir + "/IRL" + baseName + "EpHardAgent", this.sp);

    dss.resetPlannerResults();
    ss.resetPlannerResults();

    int ngx = 12;
    int ngy = 14;

    tf = new GridWorldTerminalFunction(ngx, ngy);
    // dss.setTf(tf);
    this.puddleMap[ngx][ngy] = 1;
    this.puddleMap[20][20] = 0;
    vfFvGen.setGoal(ngx, ngy);
    State hardGoal = this.initialState.copy();
    GridWorldDomain.setAgent(hardGoal, 0, 0);

    EpisodeAnalysis trainedEp3 = p.evaluateBehavior(hardGoal, objectiveRF, tf, 200);
    trainedEp3.writeToFile(trainedDir + "/IRL" + baseName + "EpHardGoal", this.sp);

    new EpisodeSequenceVisualizer(this.v, this.domain, this.sp, this.trainedDir);
  }