Example #1
0
  /**
   * Add the map-related parameters into the space, except the ones in the excluded set
   *
   * @param space the parameter space
   * @param conf the configuration
   * @param exclude the exclusion set
   */
  private static void addEffectMapParameters(
      ParameterSpace space, Configuration conf, Set<String> exclude) {

    // Get the maximum memory
    long taskMem = ProfileUtils.getTaskMemory(conf);
    long maxMem = (long) (MAX_MEM_RATIO * taskMem);
    long minMem = (long) (MIN_MEM_RATIO * taskMem);

    // Add parameters that effect the map tasks
    if (!exclude.contains(HadoopParameter.SORT_MB.toString()))
      space.addParameterDescriptor(
          new IntegerParamDescriptor(
              HadoopParameter.SORT_MB,
              ParamTaskEffect.EFFECT_MAP,
              (int) (minMem >> 20),
              (int) (maxMem >> 20)));
    if (!exclude.contains(HadoopParameter.SPILL_PERC.toString()))
      space.addParameterDescriptor(
          new DoubleParamDescriptor(
              HadoopParameter.SPILL_PERC, ParamTaskEffect.EFFECT_MAP, 0.2, 0.9));
    if (!exclude.contains(HadoopParameter.SORT_REC_PERC.toString()))
      space.addParameterDescriptor(
          new DoubleParamDescriptor(
              HadoopParameter.SORT_REC_PERC, ParamTaskEffect.EFFECT_MAP, 0.01, 0.5));

    if (conf.get(MR_COMBINE_CLASS) != null
        && !exclude.contains(HadoopParameter.NUM_SPILLS_COMBINE.toString())) {
      space.addParameterDescriptor(
          new ListParamDescriptor(
              HadoopParameter.NUM_SPILLS_COMBINE, ParamTaskEffect.EFFECT_MAP, "3", "9999"));
    }
  }
  /**
   * Generate and return a virtual job profile representing how the job will behave under the
   * provided configuration settings.
   *
   * @param conf the configuration settings
   * @param dataModel the data model that can reason about the data
   * @return a virtual job profile
   */
  public MRJobProfile whatif(Configuration conf, DataSetModel dataModel) {
    this.virtualProf = new MRJobProfile(VIRTUAL + sourceProf.getJobId());
    this.conf = conf;

    // Set the cluster name and job inputs
    virtualProf.setClusterName(sourceProf.getClusterName());
    virtualProf.setJobInputs(ProfileUtils.getInputDirs(conf));

    // Get the input specs
    List<MapInputSpecs> inputSpecs = dataModel.generateMapInputSpecs(this.conf);

    // Predict the map execution
    int numMappers = 0;
    for (MapInputSpecs inputSpec : inputSpecs) {
      MRMapProfile mapProf = mapOracles.get(inputSpec.getInputIndex()).whatif(conf, inputSpec);
      numMappers += inputSpec.getNumSplits();
      virtualProf.addMapProfile(mapProf);
    }

    // Predict the reduce execution
    int numReducers = conf.getInt(MR_RED_TASKS, 1);
    if (numReducers > 0 && !ignoreReducers) {
      // Get the shuffle specs
      List<ReduceShuffleSpecs> shuffleSpecs =
          dataModel.generateReduceShuffleSpecs(conf, virtualProf.getMapProfiles());

      for (ReduceShuffleSpecs shuffleSpec : shuffleSpecs) {
        MRReduceProfile redProf = redOracle.whatif(conf, shuffleSpec);
        virtualProf.addReduceProfile(redProf);
      }
    }

    // Update the averaged task profiles
    virtualProf.updateProfile();
    virtualProf.addCounter(MRCounter.MAP_TASKS, (long) numMappers);
    virtualProf.addCounter(MRCounter.REDUCE_TASKS, (long) numReducers);

    return virtualProf;
  }
Example #3
0
  /**
   * Adjusts the domain of some parameter descriptors based on information from the cluster, the
   * configuration, and the virtual job profile. Currently, the parameters adjusted are:
   *
   * <ul>
   *   <li>io.sort.mb
   *   <li>mapred.job.reduce.input.buffer.percent
   *   <li>mapred.reduce.tasks
   * </ul>
   *
   * @param space the parameter space
   * @param cluster the cluster
   * @param conf the configuration
   * @param jobProfile the virtual job profile
   */
  public static void adjustParameterDescriptors(
      ParameterSpace space,
      ClusterConfiguration cluster,
      Configuration conf,
      MRJobProfile jobProfile) {

    long taskMemory = ProfileUtils.getTaskMemory(conf);

    // Adjust the max value of io.sort.mb
    if (space.containsParamDescriptor(HadoopParameter.SORT_MB)) {

      adjustParamDescrSortMB(
          (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.SORT_MB),
          jobProfile,
          taskMemory);
    }

    // Adjust the max value of mapred.job.reduce.input.buffer.percent
    if (space.containsParamDescriptor(HadoopParameter.RED_IN_BUFF_PERC)) {

      adjustParamDescrRedInBufferPerc(
          (DoubleParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_IN_BUFF_PERC),
          jobProfile,
          taskMemory);
    }

    // Adjust the min and max number of mapred.reduce.tasks
    if (space.containsParamDescriptor(HadoopParameter.RED_TASKS)) {

      adjustParamDescrRedTasks(
          (IntegerParamDescriptor) space.getParameterDescriptor(HadoopParameter.RED_TASKS),
          jobProfile,
          taskMemory,
          cluster.getTotalReduceSlots());
    }
  }