private OnPolicyRewardMonitor createRewardMonitor( String prefix, int nbBins, Parameters parameters) { int nbEpisode = parameters.nbEpisode(); int maxEpisodeTimeSteps = parameters.maxEpisodeTimeSteps(); if (nbEpisode == 1 || parameters.hasFlag(RLParameters.OnPolicyTimeStepsEvaluationFlag)) return new RewardMonitorAverage(prefix, nbBins, maxEpisodeTimeSteps); return new RewardMonitorEpisode(prefix, nbBins, nbEpisode); }
public static List<Parameters> combine(List<Parameters> existing, String label, double[] values) { assert existing.size() > 0; List<Parameters> combination = new ArrayList<Parameters>(); for (Parameters parameters : existing) { for (double value : values) { Parameters combinedParameters = new Parameters(parameters); combinedParameters.putSweepParam(label, value); combination.add(combinedParameters); } } return combination; }
public static List<Parameters> filter(List<Parameters> parameters, String... filters) { Map<String, Double> filterMap = new LinkedHashMap<String, Double>(); for (String filterString : filters) { int equalIndex = filterString.indexOf('='); filterMap.put( filterString.substring(0, equalIndex), Double.parseDouble(filterString.substring(equalIndex + 1))); } List<Parameters> result = new ArrayList<Parameters>(); for (Parameters parameter : parameters) { boolean satisfy = true; for (Map.Entry<String, Double> entry : filterMap.entrySet()) { if (!parameter.hasKey(entry.getKey())) continue; double parameterValue = parameter.get(entry.getKey()); if (parameterValue != entry.getValue()) { satisfy = false; break; } } if (satisfy) result.add(parameter); } return result; }