private static void compactASinglePartition( String existingInputPath, String deltaInputPath, String primaryKeyList, String maxColumns, String outputPath, String numberOfReducers) throws IOException { JobConf conf = new JobConf(new Configuration(), PartitionCompactor.class); // hadoop conf.setJobName("PartitionCompactor"); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(BytesRefArrayWritable.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(BytesRefArrayWritable.class); conf.setMapperClass(CustomMapper.class); conf.setReducerClass(CustomReducer.class); conf.setPartitionerClass(CustomPartitioner.class); conf.setInputFormat(RCFileInputFormat.class); conf.setNumReduceTasks(Integer.parseInt(numberOfReducers)); conf.set(EXISTING_FILE_PATH_CONF, existingInputPath); conf.set(DELTA_FILE_PATH_CONF, deltaInputPath); conf.set(PRIMARY_KEYS_CONF, primaryKeyList); conf.set(RCFile.COLUMN_NUMBER_CONF_STR, maxColumns); RCFileInputFormat.addInputPath(conf, new Path(existingInputPath)); RCFileInputFormat.addInputPath(conf, new Path(deltaInputPath)); conf.setOutputFormat(RCFileOutputFormat.class); RCFileOutputFormat.setOutputPath(conf, new Path(outputPath)); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); }