/** * Generate and return a virtual job profile representing how the job will behave under the * provided configuration settings. * * @param conf the configuration settings * @param dataModel the data model that can reason about the data * @return a virtual job profile */ public MRJobProfile whatif(Configuration conf, DataSetModel dataModel) { this.virtualProf = new MRJobProfile(VIRTUAL + sourceProf.getJobId()); this.conf = conf; // Set the cluster name and job inputs virtualProf.setClusterName(sourceProf.getClusterName()); virtualProf.setJobInputs(ProfileUtils.getInputDirs(conf)); // Get the input specs List<MapInputSpecs> inputSpecs = dataModel.generateMapInputSpecs(this.conf); // Predict the map execution int numMappers = 0; for (MapInputSpecs inputSpec : inputSpecs) { MRMapProfile mapProf = mapOracles.get(inputSpec.getInputIndex()).whatif(conf, inputSpec); numMappers += inputSpec.getNumSplits(); virtualProf.addMapProfile(mapProf); } // Predict the reduce execution int numReducers = conf.getInt(MR_RED_TASKS, 1); if (numReducers > 0 && !ignoreReducers) { // Get the shuffle specs List<ReduceShuffleSpecs> shuffleSpecs = dataModel.generateReduceShuffleSpecs(conf, virtualProf.getMapProfiles()); for (ReduceShuffleSpecs shuffleSpec : shuffleSpecs) { MRReduceProfile redProf = redOracle.whatif(conf, shuffleSpec); virtualProf.addReduceProfile(redProf); } } // Update the averaged task profiles virtualProf.updateProfile(); virtualProf.addCounter(MRCounter.MAP_TASKS, (long) numMappers); virtualProf.addCounter(MRCounter.REDUCE_TASKS, (long) numReducers); return virtualProf; }