Ejemplo n.º 1
   * Adjusts the domain of the <tt>mapred.job.reduce.input.buffer.percent</tt> parameter descriptor
   * based on the virtual job profile.
   * <li>mapred.job.reduce.input.buffer.percent
   * <li>mapred.reduce.tasks
   * @param paramDescr the current parameter descriptor
   * @param jobProfile the job profile
   * @param taskMemory the task memory (in bytes)
  public static void adjustParamDescrRedInBufferPerc(
      DoubleParamDescriptor paramDescr, MRJobProfile jobProfile, long taskMemory) {

    MRReduceProfile redProfile = jobProfile.getAvgReduceProfile();
    if (redProfile == null || redProfile.isEmpty()) return;

    // Find the memory required by the reduce tasks
    long redMemory = WhatIfUtils.getReduceMemoryRequired(redProfile);

    // Calculate the percent of memory to be used to buffer input
    double percent = (taskMemory - redMemory) / (double) taskMemory;
    if (percent < 0.0) percent = 0;
    else if (percent > 0.8) percent = 0.8;

Ejemplo n.º 2
   * Adjusts the domain of the <tt>mapred.reduce.tasks</tt> parameter descriptor based on the
   * virtual job profile.
   * @param paramDescr the current parameter descriptor
   * @param jobProfile the job profile
   * @param taskMemory the task memory (in bytes)
   * @param numRedSlots the total number of reduce slots in the cluster
  public static void adjustParamDescrRedTasks(
      IntegerParamDescriptor paramDescr,
      MRJobProfile jobProfile,
      long taskMemory,
      int numRedSlots) {

    // Get the reduce profile
    MRReduceProfile redProfile = jobProfile.getAvgReduceProfile();
    if (redProfile == null || redProfile.isEmpty()) return;

    // Calculate the (uncompressed) reduce input size
    double shuffleSize =
            * redProfile.getCounter(MRCounter.REDUCE_SHUFFLE_BYTES)
            / redProfile.getStatistic(MRStatistics.INTERM_COMPRESS_RATIO, 1d);

    // Calculate the number of reduce groups
    long numGroups =
        redProfile.getNumTasks() * redProfile.getCounter(MRCounter.REDUCE_INPUT_GROUPS, 1l);

    // Calculate the min and max number of reducers
    double min = Math.ceil(shuffleSize / (2 * taskMemory));
    double max = Math.ceil(4 * shuffleSize / taskMemory);
    max = Math.min(max, numGroups);
    max = Math.max(max, numRedSlots);
    if (max < min) max = min;

    // Set the min and max number of reducers
    paramDescr.setMinMaxValue((int) min, (int) max);
Ejemplo n.º 3
  protected void exportXML(MRJobProfile jobProfile, Document doc) {

    // Create the job profile element
    Element job = doc.createElement(JOB_PROFILE);

    // Add the job properties
    job.setAttribute(ID, jobProfile.getJobId());
    job.setAttribute(NUM_MAPPERS, jobProfile.getCounter(MRCounter.MAP_TASKS, 0l).toString());
    job.setAttribute(NUM_REDUCERS, jobProfile.getCounter(MRCounter.REDUCE_TASKS, 0l).toString());
    if (jobProfile.getClusterName() != null) {
      job.setAttribute(CLUSTER_NAME, jobProfile.getClusterName());

    // Add the job inputs
    Element inputs = doc.createElement(INPUTS);
    for (String jobInput : jobProfile.getJobInputs()) {
      Element input = doc.createElement(INPUT);

    // Add the map elements
    for (MRMapProfile mapProfile : jobProfile.getAvgMapProfiles()) {
      Element map = buildTaskProfileElement(mapProfile, doc, MAP_PROFILE);
      map.setAttribute(INPUT_INDEX, Integer.toString(mapProfile.getInputIndex()));

    // Add the reduce element
    MRReduceProfile redProfile = jobProfile.getAvgReduceProfile();
    if (!redProfile.isEmpty()) {
      Element reducer = buildTaskProfileElement(redProfile, doc, REDUCE_PROFILE);
Ejemplo n.º 4
  protected MRJobProfile importXML(Document doc) {

    // Get the root element
    Element root = doc.getDocumentElement();
    if (!JOB_PROFILE.equals(root.getTagName()))
      throw new RuntimeException("ERROR: Bad XML File: top-level element not <job_profile>");

    // Get the profile attributes
    MRJobProfile jobProfile = new MRJobProfile(root.getAttribute(ID));
    jobProfile.addCounter(MRCounter.MAP_TASKS, Long.parseLong(root.getAttribute(NUM_MAPPERS)));
    jobProfile.addCounter(MRCounter.REDUCE_TASKS, Long.parseLong(root.getAttribute(NUM_REDUCERS)));

    String clusterName = root.getAttribute(CLUSTER_NAME);
    if (clusterName != null && !clusterName.equals("")) {

    // Get the profile inputs
    NodeList inputs = root.getElementsByTagName(INPUTS).item(0).getChildNodes();
    ArrayList<String> inputList = new ArrayList<String>(1);
    for (int i = 0; i < inputs.getLength(); ++i) {
      if (inputs.item(i) instanceof Element) {
    jobProfile.setJobInputs(inputList.toArray(new String[0]));

    // Get the map profiles
    NodeList maps = root.getElementsByTagName(MAP_PROFILE);
    for (int i = 0; i < maps.getLength(); ++i) {
      if (maps.item(i) instanceof Element) {
        Element map = (Element) maps.item(i);

        // Get the map profile attributes
        MRMapProfile mapProf = new MRMapProfile(map.getAttribute(ID));

        // Get the enum maps
        loadTaskProfileCounters(mapProf, map);
        loadTaskProfileStatistics(mapProf, map);
        loadTaskProfileCostFactors(mapProf, map);
        loadTaskProfileTimings(mapProf, map);
        loadTaskProfileAuxCounters(mapProf, map);


    // Get the reduce profiles
    NodeList reducers = root.getElementsByTagName(REDUCE_PROFILE);
    for (int i = 0; i < reducers.getLength(); ++i) {
      if (reducers.item(i) instanceof Element) {
        Element reducer = (Element) reducers.item(i);

        // Get the reducer profile attributes
        MRReduceProfile redProf = new MRReduceProfile(reducer.getAttribute(ID));

        // Get the enum maps
        loadTaskProfileCounters(redProf, reducer);
        loadTaskProfileStatistics(redProf, reducer);
        loadTaskProfileCostFactors(redProf, reducer);
        loadTaskProfileTimings(redProf, reducer);
        loadTaskProfileAuxCounters(redProf, reducer);


    // Update the profile to calculate the average task profiles
    return jobProfile;