/** * Add the map-related parameters into the space, except the ones in the excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectMapParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Get the maximum memory long taskMem = ProfileUtils.getTaskMemory(conf); long maxMem = (long) (MAX_MEM_RATIO * taskMem); long minMem = (long) (MIN_MEM_RATIO * taskMem); // Add parameters that effect the map tasks if (!exclude.contains(HadoopParameter.SORT_MB.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.SORT_MB, ParamTaskEffect.EFFECT_MAP, (int) (minMem >> 20), (int) (maxMem >> 20))); if (!exclude.contains(HadoopParameter.SPILL_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SPILL_PERC, ParamTaskEffect.EFFECT_MAP, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.SORT_REC_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SORT_REC_PERC, ParamTaskEffect.EFFECT_MAP, 0.01, 0.5)); if (conf.get(MR_COMBINE_CLASS) != null && !exclude.contains(HadoopParameter.NUM_SPILLS_COMBINE.toString())) { space.addParameterDescriptor( new ListParamDescriptor( HadoopParameter.NUM_SPILLS_COMBINE, ParamTaskEffect.EFFECT_MAP, "3", "9999")); } }
/** * Returns a parameter space with all the parameter descriptors for a map-only job * * @param conf the job configuration * @param exclude the exclusion set * @return the parameter space */ private static ParameterSpace getMapOnlyParamSpace(Configuration conf, Set<String> exclude) { ParameterSpace space = new ParameterSpace(); if (!exclude.contains(HadoopParameter.COMPRESS_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMPRESS_OUT, ParamTaskEffect.EFFECT_MAP)); return space; }
/** * Returns a parameter space with all the parameter descriptors that can effect the execution of * the next MapReduce job (currently, the number of reducers and output compression) * * @param conf the job configuration * @return the parameter space */ public static ParameterSpace getParamSpaceForNextJob(Configuration conf) { Set<String> exclude = buildParamExclusionSet(conf); ParameterSpace space = new ParameterSpace(); if (conf.getInt(MR_RED_TASKS, 1) != 0 && !exclude.contains(HadoopParameter.RED_TASKS.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.RED_TASKS, ParamTaskEffect.EFFECT_REDUCE, 1, 100)); if (!exclude.contains(HadoopParameter.COMPRESS_MAP_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor( HadoopParameter.COMPRESS_MAP_OUT, ParamTaskEffect.EFFECT_BOTH)); return space; }
/** * Add the parameters that effect both map and reducers into the space, except the ones in the * excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectBothParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Add parameters the effect both map and reduce tasks if (!exclude.contains(HadoopParameter.SORT_FACTOR.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.SORT_FACTOR, ParamTaskEffect.EFFECT_BOTH, 10, 100)); if (!exclude.contains(HadoopParameter.COMPRESS_MAP_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor( HadoopParameter.COMPRESS_MAP_OUT, ParamTaskEffect.EFFECT_BOTH)); if (conf.get(MR_COMBINE_CLASS) != null && !exclude.contains(HadoopParameter.COMBINE.toString())) { space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMBINE, ParamTaskEffect.EFFECT_BOTH)); } }
/** * Add the reduce-related parameters into the space, except the ones in the excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectReduceParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Add parameters the effect the reduce tasks if (!exclude.contains(HadoopParameter.RED_TASKS.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.RED_TASKS, ParamTaskEffect.EFFECT_REDUCE, 1, 100)); if (!exclude.contains(HadoopParameter.INMEM_MERGE.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.INMEM_MERGE, ParamTaskEffect.EFFECT_REDUCE, 10, 1000)); if (!exclude.contains(HadoopParameter.SHUFFLE_IN_BUFF_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SHUFFLE_IN_BUFF_PERC, ParamTaskEffect.EFFECT_REDUCE, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.SHUFFLE_MERGE_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SHUFFLE_MERGE_PERC, ParamTaskEffect.EFFECT_REDUCE, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.RED_IN_BUFF_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.RED_IN_BUFF_PERC, ParamTaskEffect.EFFECT_REDUCE, 0, 0.8)); if (!exclude.contains(HadoopParameter.COMPRESS_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMPRESS_OUT, ParamTaskEffect.EFFECT_REDUCE)); }
/** * Adjusts the domain of some parameter descriptors based on information from the cluster, the * configuration, and the virtual job profile. Currently, the parameters adjusted are: * * <ul> * <li>io.sort.mb * <li>mapred.job.reduce.input.buffer.percent * <li>mapred.reduce.tasks * </ul> * * @param space the parameter space * @param cluster the cluster * @param conf the configuration * @param jobProfile the virtual job profile */ public static void adjustParameterDescriptors( ParameterSpace space, ClusterConfiguration cluster, Configuration conf, MRJobProfile jobProfile) { long taskMemory = ProfileUtils.getTaskMemory(conf); // Adjust the max value of io.sort.mb if (space.containsParamDescriptor(HadoopParameter.SORT_MB)) { adjustParamDescrSortMB( (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.SORT_MB), jobProfile, taskMemory); } // Adjust the max value of mapred.job.reduce.input.buffer.percent if (space.containsParamDescriptor(HadoopParameter.RED_IN_BUFF_PERC)) { adjustParamDescrRedInBufferPerc( (DoubleParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_IN_BUFF_PERC), jobProfile, taskMemory); } // Adjust the min and max number of mapred.reduce.tasks if (space.containsParamDescriptor(HadoopParameter.RED_TASKS)) { adjustParamDescrRedTasks( (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_TASKS), jobProfile, taskMemory, cluster.getTotalReduceSlots()); } }
/* does it */ public void doit() throws Exception { ParameterPoint pp = (ParameterPoint) pullInput(0); ExampleTable varET = (ExampleTable) pullInput(1); ParameterSpace ps = (new VariogramParamSpace()).getDefaultSpace(); // not including the last param, PowerExponent int numParams = 9; // whether the parameter at index i is // coefficient (true) vs range (false) boolean[] cvr = new boolean[numParams]; cvr[0] = true; cvr[1] = true; cvr[2] = false; cvr[3] = true; cvr[4] = false; cvr[5] = true; cvr[6] = false; cvr[7] = true; cvr[8] = true; double[] range = TableUtilities.getMinMax(varET, varET.getInputFeatures()[0]); double rMax = range[1]; range = TableUtilities.getMinMax(varET, varET.getOutputFeatures()[0]); double cMax = range[1]; if (debug) { System.out.println(this.getAlias() + ": Range Max:" + rMax + ", Coefficient Max:" + cMax); } double val = Double.NEGATIVE_INFINITY; double max = val; for (int i = 0; i < numParams; i++) { val = pp.getValue(i); if (cvr[i]) { max = cMax; } else { max = rMax; } if (Double.isNaN(val)) { ps.setMinValue(i, 0.0); ps.setMaxValue(i, max); ps.setResolution(i, this.allParametersResolution); } else { ps.setMinValue(i, val); ps.setMaxValue(i, val); ps.setResolution(i, 1); } } // the special case for the exponent if (Double.isNaN(val)) { int expIdx = 8; // leave the min at zero, change the max to 3 ps.setMaxValue(expIdx, 3); ps.setResolution(expIdx, 30); } pushOutput(ps, 0); }