/** * Calculate how many maps to run. Number of maps is bounded by a minimum of the cumulative size * of the copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the command line) and at * most (distcp.max.map.tasks, default MAX_MAPS_PER_NODE * nodes in the cluster). * * @param totalBytes Count of total bytes for job * @param job The job to configure * @return Count of maps to run. */ private static void setMapCount(long totalBytes, JobConf job) throws IOException { int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP)); numMaps = Math.min( numMaps, job.getInt( MAX_MAPS_LABEL, MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers())); job.setNumMapTasks(Math.max(numMaps, 1)); }
@Override public void reduce( Text key, /*[*/ Iterator /*]*/<IntWritable> values, /*[*/ OutputCollector<Text, IntWritable> output, Reporter reporter /*]*/) throws IOException { int maxValue = Integer.MIN_VALUE; while ( /*[*/ values.hasNext() /*]*/) { maxValue = Math.max(maxValue, /*[*/ values.next().get() /*]*/); } /*[*/ output.collect /*]*/(key, new IntWritable(maxValue)); }