private OnPolicyRewardMonitor createRewardMonitor(
     String prefix, int nbBins, Parameters parameters) {
   int nbEpisode = parameters.nbEpisode();
   int maxEpisodeTimeSteps = parameters.maxEpisodeTimeSteps();
   if (nbEpisode == 1 || parameters.hasFlag(RLParameters.OnPolicyTimeStepsEvaluationFlag))
     return new RewardMonitorAverage(prefix, nbBins, maxEpisodeTimeSteps);
   return new RewardMonitorEpisode(prefix, nbBins, nbEpisode);
 }
Beispiel #2
0
 public static List<Parameters> combine(List<Parameters> existing, String label, double[] values) {
   assert existing.size() > 0;
   List<Parameters> combination = new ArrayList<Parameters>();
   for (Parameters parameters : existing) {
     for (double value : values) {
       Parameters combinedParameters = new Parameters(parameters);
       combinedParameters.putSweepParam(label, value);
       combination.add(combinedParameters);
     }
   }
   return combination;
 }
Beispiel #3
0
 public static List<Parameters> filter(List<Parameters> parameters, String... filters) {
   Map<String, Double> filterMap = new LinkedHashMap<String, Double>();
   for (String filterString : filters) {
     int equalIndex = filterString.indexOf('=');
     filterMap.put(
         filterString.substring(0, equalIndex),
         Double.parseDouble(filterString.substring(equalIndex + 1)));
   }
   List<Parameters> result = new ArrayList<Parameters>();
   for (Parameters parameter : parameters) {
     boolean satisfy = true;
     for (Map.Entry<String, Double> entry : filterMap.entrySet()) {
       if (!parameter.hasKey(entry.getKey())) continue;
       double parameterValue = parameter.get(entry.getKey());
       if (parameterValue != entry.getValue()) {
         satisfy = false;
         break;
       }
     }
     if (satisfy) result.add(parameter);
   }
   return result;
 }