/** * Adjusts the domain of the <tt>mapred.job.reduce.input.buffer.percent</tt> parameter descriptor * based on the virtual job profile. * <li>mapred.job.reduce.input.buffer.percent * <li>mapred.reduce.tasks * * @param paramDescr the current parameter descriptor * @param jobProfile the job profile * @param taskMemory the task memory (in bytes) */ public static void adjustParamDescrRedInBufferPerc( DoubleParamDescriptor paramDescr, MRJobProfile jobProfile, long taskMemory) { MRReduceProfile redProfile = jobProfile.getAvgReduceProfile(); if (redProfile == null || redProfile.isEmpty()) return; // Find the memory required by the reduce tasks long redMemory = WhatIfUtils.getReduceMemoryRequired(redProfile); // Calculate the percent of memory to be used to buffer input double percent = (taskMemory - redMemory) / (double) taskMemory; if (percent < 0.0) percent = 0; else if (percent > 0.8) percent = 0.8; paramDescr.setMaxValue(percent); }
/** * Adjusts the domain of the <tt>mapred.reduce.tasks</tt> parameter descriptor based on the * virtual job profile. * * @param paramDescr the current parameter descriptor * @param jobProfile the job profile * @param taskMemory the task memory (in bytes) * @param numRedSlots the total number of reduce slots in the cluster */ public static void adjustParamDescrRedTasks( IntegerParamDescriptor paramDescr, MRJobProfile jobProfile, long taskMemory, int numRedSlots) { // Get the reduce profile MRReduceProfile redProfile = jobProfile.getAvgReduceProfile(); if (redProfile == null || redProfile.isEmpty()) return; // Calculate the (uncompressed) reduce input size double shuffleSize = redProfile.getNumTasks() * redProfile.getCounter(MRCounter.REDUCE_SHUFFLE_BYTES) / redProfile.getStatistic(MRStatistics.INTERM_COMPRESS_RATIO, 1d); // Calculate the number of reduce groups long numGroups = redProfile.getNumTasks() * redProfile.getCounter(MRCounter.REDUCE_INPUT_GROUPS, 1l); // Calculate the min and max number of reducers double min = Math.ceil(shuffleSize / (2 * taskMemory)); double max = Math.ceil(4 * shuffleSize / taskMemory); max = Math.min(max, numGroups); max = Math.max(max, numRedSlots); if (max < min) max = min; // Set the min and max number of reducers paramDescr.setMinMaxValue((int) min, (int) max); }
@Override protected void exportXML(MRJobProfile jobProfile, Document doc) { // Create the job profile element Element job = doc.createElement(JOB_PROFILE); doc.appendChild(job); // Add the job properties job.setAttribute(ID, jobProfile.getJobId()); job.setAttribute(NUM_MAPPERS, jobProfile.getCounter(MRCounter.MAP_TASKS, 0l).toString()); job.setAttribute(NUM_REDUCERS, jobProfile.getCounter(MRCounter.REDUCE_TASKS, 0l).toString()); if (jobProfile.getClusterName() != null) { job.setAttribute(CLUSTER_NAME, jobProfile.getClusterName()); } // Add the job inputs Element inputs = doc.createElement(INPUTS); job.appendChild(inputs); for (String jobInput : jobProfile.getJobInputs()) { Element input = doc.createElement(INPUT); inputs.appendChild(input); input.appendChild(doc.createTextNode(jobInput)); } // Add the map elements for (MRMapProfile mapProfile : jobProfile.getAvgMapProfiles()) { Element map = buildTaskProfileElement(mapProfile, doc, MAP_PROFILE); map.setAttribute(INPUT_INDEX, Integer.toString(mapProfile.getInputIndex())); job.appendChild(map); } // Add the reduce element MRReduceProfile redProfile = jobProfile.getAvgReduceProfile(); if (!redProfile.isEmpty()) { Element reducer = buildTaskProfileElement(redProfile, doc, REDUCE_PROFILE); job.appendChild(reducer); } }
@Override protected MRJobProfile importXML(Document doc) { // Get the root element Element root = doc.getDocumentElement(); if (!JOB_PROFILE.equals(root.getTagName())) throw new RuntimeException("ERROR: Bad XML File: top-level element not <job_profile>"); // Get the profile attributes MRJobProfile jobProfile = new MRJobProfile(root.getAttribute(ID)); jobProfile.addCounter(MRCounter.MAP_TASKS, Long.parseLong(root.getAttribute(NUM_MAPPERS))); jobProfile.addCounter(MRCounter.REDUCE_TASKS, Long.parseLong(root.getAttribute(NUM_REDUCERS))); String clusterName = root.getAttribute(CLUSTER_NAME); if (clusterName != null && !clusterName.equals("")) { jobProfile.setClusterName(clusterName); } // Get the profile inputs NodeList inputs = root.getElementsByTagName(INPUTS).item(0).getChildNodes(); ArrayList<String> inputList = new ArrayList<String>(1); for (int i = 0; i < inputs.getLength(); ++i) { if (inputs.item(i) instanceof Element) { inputList.add(inputs.item(i).getTextContent()); } } jobProfile.setJobInputs(inputList.toArray(new String[0])); // Get the map profiles NodeList maps = root.getElementsByTagName(MAP_PROFILE); for (int i = 0; i < maps.getLength(); ++i) { if (maps.item(i) instanceof Element) { Element map = (Element) maps.item(i); // Get the map profile attributes MRMapProfile mapProf = new MRMapProfile(map.getAttribute(ID)); mapProf.setInputIndex(Integer.parseInt(map.getAttribute(INPUT_INDEX))); mapProf.setNumTasks(Integer.parseInt(map.getAttribute(NUM_TASKS))); // Get the enum maps loadTaskProfileCounters(mapProf, map); loadTaskProfileStatistics(mapProf, map); loadTaskProfileCostFactors(mapProf, map); loadTaskProfileTimings(mapProf, map); loadTaskProfileAuxCounters(mapProf, map); jobProfile.addMapProfile(mapProf); } } // Get the reduce profiles NodeList reducers = root.getElementsByTagName(REDUCE_PROFILE); for (int i = 0; i < reducers.getLength(); ++i) { if (reducers.item(i) instanceof Element) { Element reducer = (Element) reducers.item(i); // Get the reducer profile attributes MRReduceProfile redProf = new MRReduceProfile(reducer.getAttribute(ID)); redProf.setNumTasks(Integer.parseInt(reducer.getAttribute(NUM_TASKS))); // Get the enum maps loadTaskProfileCounters(redProf, reducer); loadTaskProfileStatistics(redProf, reducer); loadTaskProfileCostFactors(redProf, reducer); loadTaskProfileTimings(redProf, reducer); loadTaskProfileAuxCounters(redProf, reducer); jobProfile.addReduceProfile(redProf); } } // Update the profile to calculate the average task profiles jobProfile.updateProfile(); return jobProfile; }