예제 #1
0
  /**
   * The main driver for sort program. Invoke this method to submit the map/reduce job.
   *
   * @throws IOException When there is communication problems with the job tracker.
   */
  public static void main(String[] args) throws Exception {

    JobConf jobConf = new JobConf();

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    jobConf.setNumReduceTasks(1);

    jobConf.setMapOutputKeySchema(SchemaUtils.fromString("key:bigint"));
    jobConf.setMapOutputValueSchema(SchemaUtils.fromString("value:bigint"));

    InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), jobConf);
    OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), jobConf);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);

    JobClient.runJob(jobConf);

    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
        "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
  }
예제 #2
0
  /**
   * 通过配置和运行时参数构建JobConf
   *
   * @param conf 程序配置
   * @param dateYmd 基准时间
   * @return JobConf对象
   * @throws Exception
   */
  @SuppressWarnings("unchecked")
  public static JobConf makeMapreduceJobConf(MapreduceConfigInfo conf, Date dateYmd)
      throws Exception {
    JobConf job = new JobConf();

    if (conf == null) {
      throw new Exception("Parse base.mapred.xml failed!");
    }
    if (conf.getMapper() == null || conf.getMapper().isEmpty()) {
      throw new Exception("No mapper class specified");
    }

    // 设置mapper
    String mapperClassName = conf.getMapper();
    Class<Mapper> mapperClz = (Class<Mapper>) Class.forName(mapperClassName);
    job.setMapperClass(mapperClz);

    // 增加可选的reducer
    if (conf.getReducer() != null && !conf.getReducer().isEmpty()) {
      String reducerClassName = conf.getReducer();
      Class<Reducer> reducerClz = (Class<Reducer>) Class.forName(reducerClassName);
      job.setReducerClass(reducerClz);
    }
    // 增加可选的combiner
    if (conf.getCombiner() != null && !conf.getCombiner().isEmpty()) {
      String combinerClassName = conf.getCombiner();
      Class<Reducer> combinerClz = (Class<Reducer>) Class.forName(combinerClassName);
      job.setCombinerClass(combinerClz);
    }

    // 设置task
    if (conf.getMapOutputKey() == null || conf.getMapOutputKey().isEmpty()) {
      throw new Exception("No mapOutputValue specified");
    }
    if (conf.getMapOutputValue() == null || conf.getMapOutputValue().isEmpty()) {
      throw new Exception("No mapOutputValue specified");
    }
    job.setMapOutputKeySchema(SchemaUtils.fromString(conf.getMapOutputKey()));
    job.setMapOutputValueSchema(SchemaUtils.fromString(conf.getMapOutputValue()));

    if (conf.getPartitionColumns() != null && !conf.getPartitionColumns().isEmpty()) {
      job.setPartitionColumns(conf.getPartitionColumns().split(","));
    }
    if (conf.getOutputKeySortColumns() != null && !conf.getOutputKeySortColumns().isEmpty()) {
      job.setOutputKeySortColumns(conf.getOutputKeySortColumns().split(","));
    }
    if (conf.getOutputKeySortOrders() != null && !conf.getOutputKeySortOrders().isEmpty()) {
      String[] orders = conf.getOutputKeySortOrders().split(",");
      SortOrder[] sortOrders = new SortOrder[orders.length];
      for (int i = 0; i < orders.length; i++) {
        String order = orders[i].trim().toLowerCase();
        sortOrders[i] = order.equals("desc") ? SortOrder.DESC : SortOrder.ASC;
      }
      job.setOutputKeySortOrder(sortOrders);
    }
    if (conf.getOutputGroupingColumns() != null && !conf.getOutputGroupingColumns().isEmpty()) {
      job.setOutputGroupingColumns(conf.getOutputGroupingColumns().split(","));
    }
    if (conf.getNumReduceTask() > 0) {
      job.setNumReduceTasks(conf.getNumReduceTask());
    }
    if (conf.getMemoryForMapTask() > 0) {
      job.setMemoryForMapTask(conf.getMemoryForMapTask());
    }
    if (conf.getMemoryForReduceTask() > 0) {
      job.setMemoryForReduceTask(conf.getMemoryForReduceTask());
    }

    // 设置输入表
    for (OdpsTableInfo it : conf.getInputTables()) {
      if (it.getPartitions() == null || it.getPartitions().size() == 0) {
        InputUtils.addTable(TableInfo.builder().tableName(it.getName()).build(), job);
      } else {
        for (String p : it.getPartitions()) {
          InputUtils.addTable(
              TableInfo.builder()
                  .tableName(it.getName())
                  .partSpec(expandMacroDateYmd(p, dateYmd))
                  .build(),
              job);
        }
      }
    }

    // 设置输出表
    if (conf.getOutputTable() == null) {
      throw new Exception("No output table specified");
    }
    if (conf.getOutputTable().getPartitions() != null
        && conf.getOutputTable().getPartitions().size() > 1) {
      throw new Exception("Output table can not has multiple partitions");
    }
    if (conf.getOutputTable().getPartitions() == null
        || conf.getOutputTable().getPartitions().size() == 0) {
      OutputUtils.addTable(
          TableInfo.builder().tableName(conf.getOutputTable().getName()).build(), job);
    } else {
      OutputUtils.addTable(
          TableInfo.builder()
              .tableName(conf.getOutputTable().getName())
              .partSpec(expandMacroDateYmd(conf.getOutputTable().getPartitions().get(0), dateYmd))
              .build(),
          job);
    }

    return job;
  }