/** * The main driver for sort program. Invoke this method to submit the map/reduce job. * * @throws IOException When there is communication problems with the job tracker. */ public static void main(String[] args) throws Exception { JobConf jobConf = new JobConf(); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); jobConf.setNumReduceTasks(1); jobConf.setMapOutputKeySchema(SchemaUtils.fromString("key:bigint")); jobConf.setMapOutputValueSchema(SchemaUtils.fromString("value:bigint")); InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), jobConf); OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), jobConf); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); }
/** * 通过配置和运行时参数构建JobConf * * @param conf 程序配置 * @param dateYmd 基准时间 * @return JobConf对象 * @throws Exception */ @SuppressWarnings("unchecked") public static JobConf makeMapreduceJobConf(MapreduceConfigInfo conf, Date dateYmd) throws Exception { JobConf job = new JobConf(); if (conf == null) { throw new Exception("Parse base.mapred.xml failed!"); } if (conf.getMapper() == null || conf.getMapper().isEmpty()) { throw new Exception("No mapper class specified"); } // 设置mapper String mapperClassName = conf.getMapper(); Class<Mapper> mapperClz = (Class<Mapper>) Class.forName(mapperClassName); job.setMapperClass(mapperClz); // 增加可选的reducer if (conf.getReducer() != null && !conf.getReducer().isEmpty()) { String reducerClassName = conf.getReducer(); Class<Reducer> reducerClz = (Class<Reducer>) Class.forName(reducerClassName); job.setReducerClass(reducerClz); } // 增加可选的combiner if (conf.getCombiner() != null && !conf.getCombiner().isEmpty()) { String combinerClassName = conf.getCombiner(); Class<Reducer> combinerClz = (Class<Reducer>) Class.forName(combinerClassName); job.setCombinerClass(combinerClz); } // 设置task if (conf.getMapOutputKey() == null || conf.getMapOutputKey().isEmpty()) { throw new Exception("No mapOutputValue specified"); } if (conf.getMapOutputValue() == null || conf.getMapOutputValue().isEmpty()) { throw new Exception("No mapOutputValue specified"); } job.setMapOutputKeySchema(SchemaUtils.fromString(conf.getMapOutputKey())); job.setMapOutputValueSchema(SchemaUtils.fromString(conf.getMapOutputValue())); if (conf.getPartitionColumns() != null && !conf.getPartitionColumns().isEmpty()) { job.setPartitionColumns(conf.getPartitionColumns().split(",")); } if (conf.getOutputKeySortColumns() != null && !conf.getOutputKeySortColumns().isEmpty()) { job.setOutputKeySortColumns(conf.getOutputKeySortColumns().split(",")); } if (conf.getOutputKeySortOrders() != null && !conf.getOutputKeySortOrders().isEmpty()) { String[] orders = conf.getOutputKeySortOrders().split(","); SortOrder[] sortOrders = new SortOrder[orders.length]; for (int i = 0; i < orders.length; i++) { String order = orders[i].trim().toLowerCase(); sortOrders[i] = order.equals("desc") ? SortOrder.DESC : SortOrder.ASC; } job.setOutputKeySortOrder(sortOrders); } if (conf.getOutputGroupingColumns() != null && !conf.getOutputGroupingColumns().isEmpty()) { job.setOutputGroupingColumns(conf.getOutputGroupingColumns().split(",")); } if (conf.getNumReduceTask() > 0) { job.setNumReduceTasks(conf.getNumReduceTask()); } if (conf.getMemoryForMapTask() > 0) { job.setMemoryForMapTask(conf.getMemoryForMapTask()); } if (conf.getMemoryForReduceTask() > 0) { job.setMemoryForReduceTask(conf.getMemoryForReduceTask()); } // 设置输入表 for (OdpsTableInfo it : conf.getInputTables()) { if (it.getPartitions() == null || it.getPartitions().size() == 0) { InputUtils.addTable(TableInfo.builder().tableName(it.getName()).build(), job); } else { for (String p : it.getPartitions()) { InputUtils.addTable( TableInfo.builder() .tableName(it.getName()) .partSpec(expandMacroDateYmd(p, dateYmd)) .build(), job); } } } // 设置输出表 if (conf.getOutputTable() == null) { throw new Exception("No output table specified"); } if (conf.getOutputTable().getPartitions() != null && conf.getOutputTable().getPartitions().size() > 1) { throw new Exception("Output table can not has multiple partitions"); } if (conf.getOutputTable().getPartitions() == null || conf.getOutputTable().getPartitions().size() == 0) { OutputUtils.addTable( TableInfo.builder().tableName(conf.getOutputTable().getName()).build(), job); } else { OutputUtils.addTable( TableInfo.builder() .tableName(conf.getOutputTable().getName()) .partSpec(expandMacroDateYmd(conf.getOutputTable().getPartitions().get(0), dateYmd)) .build(), job); } return job; }