/** * Add the map-related parameters into the space, except the ones in the excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectMapParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Get the maximum memory long taskMem = ProfileUtils.getTaskMemory(conf); long maxMem = (long) (MAX_MEM_RATIO * taskMem); long minMem = (long) (MIN_MEM_RATIO * taskMem); // Add parameters that effect the map tasks if (!exclude.contains(HadoopParameter.SORT_MB.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.SORT_MB, ParamTaskEffect.EFFECT_MAP, (int) (minMem >> 20), (int) (maxMem >> 20))); if (!exclude.contains(HadoopParameter.SPILL_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SPILL_PERC, ParamTaskEffect.EFFECT_MAP, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.SORT_REC_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SORT_REC_PERC, ParamTaskEffect.EFFECT_MAP, 0.01, 0.5)); if (conf.get(MR_COMBINE_CLASS) != null && !exclude.contains(HadoopParameter.NUM_SPILLS_COMBINE.toString())) { space.addParameterDescriptor( new ListParamDescriptor( HadoopParameter.NUM_SPILLS_COMBINE, ParamTaskEffect.EFFECT_MAP, "3", "9999")); } }
/** * Returns a parameter space with all the parameter descriptors for a map-only job * * @param conf the job configuration * @param exclude the exclusion set * @return the parameter space */ private static ParameterSpace getMapOnlyParamSpace(Configuration conf, Set<String> exclude) { ParameterSpace space = new ParameterSpace(); if (!exclude.contains(HadoopParameter.COMPRESS_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMPRESS_OUT, ParamTaskEffect.EFFECT_MAP)); return space; }
/** * Returns a parameter space with all the parameter descriptors that can effect the execution of * the next MapReduce job (currently, the number of reducers and output compression) * * @param conf the job configuration * @return the parameter space */ public static ParameterSpace getParamSpaceForNextJob(Configuration conf) { Set<String> exclude = buildParamExclusionSet(conf); ParameterSpace space = new ParameterSpace(); if (conf.getInt(MR_RED_TASKS, 1) != 0 && !exclude.contains(HadoopParameter.RED_TASKS.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.RED_TASKS, ParamTaskEffect.EFFECT_REDUCE, 1, 100)); if (!exclude.contains(HadoopParameter.COMPRESS_MAP_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor( HadoopParameter.COMPRESS_MAP_OUT, ParamTaskEffect.EFFECT_BOTH)); return space; }
/** * Add the parameters that effect both map and reducers into the space, except the ones in the * excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectBothParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Add parameters the effect both map and reduce tasks if (!exclude.contains(HadoopParameter.SORT_FACTOR.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.SORT_FACTOR, ParamTaskEffect.EFFECT_BOTH, 10, 100)); if (!exclude.contains(HadoopParameter.COMPRESS_MAP_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor( HadoopParameter.COMPRESS_MAP_OUT, ParamTaskEffect.EFFECT_BOTH)); if (conf.get(MR_COMBINE_CLASS) != null && !exclude.contains(HadoopParameter.COMBINE.toString())) { space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMBINE, ParamTaskEffect.EFFECT_BOTH)); } }
/** * Add the reduce-related parameters into the space, except the ones in the excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectReduceParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Add parameters the effect the reduce tasks if (!exclude.contains(HadoopParameter.RED_TASKS.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.RED_TASKS, ParamTaskEffect.EFFECT_REDUCE, 1, 100)); if (!exclude.contains(HadoopParameter.INMEM_MERGE.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.INMEM_MERGE, ParamTaskEffect.EFFECT_REDUCE, 10, 1000)); if (!exclude.contains(HadoopParameter.SHUFFLE_IN_BUFF_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SHUFFLE_IN_BUFF_PERC, ParamTaskEffect.EFFECT_REDUCE, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.SHUFFLE_MERGE_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SHUFFLE_MERGE_PERC, ParamTaskEffect.EFFECT_REDUCE, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.RED_IN_BUFF_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.RED_IN_BUFF_PERC, ParamTaskEffect.EFFECT_REDUCE, 0, 0.8)); if (!exclude.contains(HadoopParameter.COMPRESS_OUT.toString())) space.addParameterDescriptor( new BooleanParamDescriptor(HadoopParameter.COMPRESS_OUT, ParamTaskEffect.EFFECT_REDUCE)); }