/** * Add the map-related parameters into the space, except the ones in the excluded set * * @param space the parameter space * @param conf the configuration * @param exclude the exclusion set */ private static void addEffectMapParameters( ParameterSpace space, Configuration conf, Set<String> exclude) { // Get the maximum memory long taskMem = ProfileUtils.getTaskMemory(conf); long maxMem = (long) (MAX_MEM_RATIO * taskMem); long minMem = (long) (MIN_MEM_RATIO * taskMem); // Add parameters that effect the map tasks if (!exclude.contains(HadoopParameter.SORT_MB.toString())) space.addParameterDescriptor( new IntegerParamDescriptor( HadoopParameter.SORT_MB, ParamTaskEffect.EFFECT_MAP, (int) (minMem >> 20), (int) (maxMem >> 20))); if (!exclude.contains(HadoopParameter.SPILL_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SPILL_PERC, ParamTaskEffect.EFFECT_MAP, 0.2, 0.9)); if (!exclude.contains(HadoopParameter.SORT_REC_PERC.toString())) space.addParameterDescriptor( new DoubleParamDescriptor( HadoopParameter.SORT_REC_PERC, ParamTaskEffect.EFFECT_MAP, 0.01, 0.5)); if (conf.get(MR_COMBINE_CLASS) != null && !exclude.contains(HadoopParameter.NUM_SPILLS_COMBINE.toString())) { space.addParameterDescriptor( new ListParamDescriptor( HadoopParameter.NUM_SPILLS_COMBINE, ParamTaskEffect.EFFECT_MAP, "3", "9999")); } }
/** * Generate and return a virtual job profile representing how the job will behave under the * provided configuration settings. * * @param conf the configuration settings * @param dataModel the data model that can reason about the data * @return a virtual job profile */ public MRJobProfile whatif(Configuration conf, DataSetModel dataModel) { this.virtualProf = new MRJobProfile(VIRTUAL + sourceProf.getJobId()); this.conf = conf; // Set the cluster name and job inputs virtualProf.setClusterName(sourceProf.getClusterName()); virtualProf.setJobInputs(ProfileUtils.getInputDirs(conf)); // Get the input specs List<MapInputSpecs> inputSpecs = dataModel.generateMapInputSpecs(this.conf); // Predict the map execution int numMappers = 0; for (MapInputSpecs inputSpec : inputSpecs) { MRMapProfile mapProf = mapOracles.get(inputSpec.getInputIndex()).whatif(conf, inputSpec); numMappers += inputSpec.getNumSplits(); virtualProf.addMapProfile(mapProf); } // Predict the reduce execution int numReducers = conf.getInt(MR_RED_TASKS, 1); if (numReducers > 0 && !ignoreReducers) { // Get the shuffle specs List<ReduceShuffleSpecs> shuffleSpecs = dataModel.generateReduceShuffleSpecs(conf, virtualProf.getMapProfiles()); for (ReduceShuffleSpecs shuffleSpec : shuffleSpecs) { MRReduceProfile redProf = redOracle.whatif(conf, shuffleSpec); virtualProf.addReduceProfile(redProf); } } // Update the averaged task profiles virtualProf.updateProfile(); virtualProf.addCounter(MRCounter.MAP_TASKS, (long) numMappers); virtualProf.addCounter(MRCounter.REDUCE_TASKS, (long) numReducers); return virtualProf; }
/** * Adjusts the domain of some parameter descriptors based on information from the cluster, the * configuration, and the virtual job profile. Currently, the parameters adjusted are: * * <ul> * <li>io.sort.mb * <li>mapred.job.reduce.input.buffer.percent * <li>mapred.reduce.tasks * </ul> * * @param space the parameter space * @param cluster the cluster * @param conf the configuration * @param jobProfile the virtual job profile */ public static void adjustParameterDescriptors( ParameterSpace space, ClusterConfiguration cluster, Configuration conf, MRJobProfile jobProfile) { long taskMemory = ProfileUtils.getTaskMemory(conf); // Adjust the max value of io.sort.mb if (space.containsParamDescriptor(HadoopParameter.SORT_MB)) { adjustParamDescrSortMB( (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.SORT_MB), jobProfile, taskMemory); } // Adjust the max value of mapred.job.reduce.input.buffer.percent if (space.containsParamDescriptor(HadoopParameter.RED_IN_BUFF_PERC)) { adjustParamDescrRedInBufferPerc( (DoubleParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_IN_BUFF_PERC), jobProfile, taskMemory); } // Adjust the min and max number of mapred.reduce.tasks if (space.containsParamDescriptor(HadoopParameter.RED_TASKS)) { adjustParamDescrRedTasks( (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_TASKS), jobProfile, taskMemory, cluster.getTotalReduceSlots()); } }