/** * Chains the <code>reduce(...)</code> method of the Reducer with the <code>map(...) </code> * methods of the Mappers in the chain. */ @SuppressWarnings({"unchecked"}) public void reduce(Object key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { Reducer reducer = chain.getReducer(); if (reducer != null) { reducer.reduce(key, values, chain.getReducerCollector(output, reporter), reporter); } }
/** * Adds a Mapper class to the chain job's JobConf. * * <p>It has to be specified how key and values are passed from one element of the chain to the * next, by value or by reference. If a Mapper leverages the assumed semantics that the key and * values are not modified by the collector 'by value' must be used. If the Mapper does not expect * this semantics, as an optimization to avoid serialization and deserialization 'by reference' * can be used. * * <p>For the added Mapper the configuration given for it, <code>mapperConf</code>, have * precedence over the job's JobConf. This precedence is in effect when the task is running. * * <p>IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, * this is done by the addMapper for the last mapper in the chain . * * @param job chain job's JobConf to add the Mapper class. * @param klass the Mapper class to add. * @param inputKeyClass mapper input key class. * @param inputValueClass mapper input value class. * @param outputKeyClass mapper output key class. * @param outputValueClass mapper output value class. * @param byValue indicates if key/values should be passed by value to the next Mapper in the * chain, if any. * @param mapperConf a JobConf with the configuration for the Mapper class. It is recommended to * use a JobConf without default values using the <code>JobConf(boolean loadDefaults)</code> * constructor with FALSE. */ public static <K1, V1, K2, V2> void addMapper( JobConf job, Class<? extends Mapper<K1, V1, K2, V2>> klass, Class<? extends K1> inputKeyClass, Class<? extends V1> inputValueClass, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass, boolean byValue, JobConf mapperConf) { job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Chain.addMapper( false, job, klass, inputKeyClass, inputValueClass, outputKeyClass, outputValueClass, byValue, mapperConf); }
/** * Sets the Reducer class to the chain job's JobConf. * * <p>It has to be specified how key and values are passed from one element of the chain to the * next, by value or by reference. If a Reducer leverages the assumed semantics that the key and * values are not modified by the collector 'by value' must be used. If the Reducer does not * expect this semantics, as an optimization to avoid serialization and deserialization 'by * reference' can be used. * * <p>For the added Reducer the configuration given for it, <code>reducerConf</code>, have * precedence over the job's JobConf. This precedence is in effect when the task is running. * * <p>IMPORTANT: There is no need to specify the output key/value classes for the ChainReducer, * this is done by the setReducer or the addMapper for the last element in the chain. * * @param job job's JobConf to add the Reducer class. * @param klass the Reducer class to add. * @param inputKeyClass reducer input key class. * @param inputValueClass reducer input value class. * @param outputKeyClass reducer output key class. * @param outputValueClass reducer output value class. * @param byValue indicates if key/values should be passed by value to the next Mapper in the * chain, if any. * @param reducerConf a JobConf with the configuration for the Reducer class. It is recommended to * use a JobConf without default values using the <code>JobConf(boolean loadDefaults)</code> * constructor with FALSE. */ public static <K1, V1, K2, V2> void setReducer( JobConf job, Class<? extends Reducer<K1, V1, K2, V2>> klass, Class<? extends K1> inputKeyClass, Class<? extends V1> inputValueClass, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass, boolean byValue, JobConf reducerConf) { job.setReducerClass(ChainReducer.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Chain.setReducer( job, klass, inputKeyClass, inputValueClass, outputKeyClass, outputValueClass, byValue, reducerConf); }
/** * Closes the ChainReducer, the Reducer and all the Mappers in the chain. * * <p>If this method is overriden <code>super.close()</code> should be invoked at the end of the * overwriter method. */ public void close() throws IOException { chain.close(); }
/** * Configures the ChainReducer, the Reducer and all the Mappers in the chain. * * <p>If this method is overriden <code>super.configure(...)</code> should be invoked at the * beginning of the overwriter method. */ public void configure(JobConf job) { chain.configure(job); }