Ejemplo n.º 1
0
  /**
   * 通过配置和运行时参数构建JobConf
   *
   * @param conf 程序配置
   * @param dateYmd 基准时间
   * @return JobConf对象
   * @throws Exception
   */
  @SuppressWarnings("unchecked")
  public static JobConf makeMapreduceJobConf(MapreduceConfigInfo conf, Date dateYmd)
      throws Exception {
    JobConf job = new JobConf();

    if (conf == null) {
      throw new Exception("Parse base.mapred.xml failed!");
    }
    if (conf.getMapper() == null || conf.getMapper().isEmpty()) {
      throw new Exception("No mapper class specified");
    }

    // 设置mapper
    String mapperClassName = conf.getMapper();
    Class<Mapper> mapperClz = (Class<Mapper>) Class.forName(mapperClassName);
    job.setMapperClass(mapperClz);

    // 增加可选的reducer
    if (conf.getReducer() != null && !conf.getReducer().isEmpty()) {
      String reducerClassName = conf.getReducer();
      Class<Reducer> reducerClz = (Class<Reducer>) Class.forName(reducerClassName);
      job.setReducerClass(reducerClz);
    }
    // 增加可选的combiner
    if (conf.getCombiner() != null && !conf.getCombiner().isEmpty()) {
      String combinerClassName = conf.getCombiner();
      Class<Reducer> combinerClz = (Class<Reducer>) Class.forName(combinerClassName);
      job.setCombinerClass(combinerClz);
    }

    // 设置task
    if (conf.getMapOutputKey() == null || conf.getMapOutputKey().isEmpty()) {
      throw new Exception("No mapOutputValue specified");
    }
    if (conf.getMapOutputValue() == null || conf.getMapOutputValue().isEmpty()) {
      throw new Exception("No mapOutputValue specified");
    }
    job.setMapOutputKeySchema(SchemaUtils.fromString(conf.getMapOutputKey()));
    job.setMapOutputValueSchema(SchemaUtils.fromString(conf.getMapOutputValue()));

    if (conf.getPartitionColumns() != null && !conf.getPartitionColumns().isEmpty()) {
      job.setPartitionColumns(conf.getPartitionColumns().split(","));
    }
    if (conf.getOutputKeySortColumns() != null && !conf.getOutputKeySortColumns().isEmpty()) {
      job.setOutputKeySortColumns(conf.getOutputKeySortColumns().split(","));
    }
    if (conf.getOutputKeySortOrders() != null && !conf.getOutputKeySortOrders().isEmpty()) {
      String[] orders = conf.getOutputKeySortOrders().split(",");
      SortOrder[] sortOrders = new SortOrder[orders.length];
      for (int i = 0; i < orders.length; i++) {
        String order = orders[i].trim().toLowerCase();
        sortOrders[i] = order.equals("desc") ? SortOrder.DESC : SortOrder.ASC;
      }
      job.setOutputKeySortOrder(sortOrders);
    }
    if (conf.getOutputGroupingColumns() != null && !conf.getOutputGroupingColumns().isEmpty()) {
      job.setOutputGroupingColumns(conf.getOutputGroupingColumns().split(","));
    }
    if (conf.getNumReduceTask() > 0) {
      job.setNumReduceTasks(conf.getNumReduceTask());
    }
    if (conf.getMemoryForMapTask() > 0) {
      job.setMemoryForMapTask(conf.getMemoryForMapTask());
    }
    if (conf.getMemoryForReduceTask() > 0) {
      job.setMemoryForReduceTask(conf.getMemoryForReduceTask());
    }

    // 设置输入表
    for (OdpsTableInfo it : conf.getInputTables()) {
      if (it.getPartitions() == null || it.getPartitions().size() == 0) {
        InputUtils.addTable(TableInfo.builder().tableName(it.getName()).build(), job);
      } else {
        for (String p : it.getPartitions()) {
          InputUtils.addTable(
              TableInfo.builder()
                  .tableName(it.getName())
                  .partSpec(expandMacroDateYmd(p, dateYmd))
                  .build(),
              job);
        }
      }
    }

    // 设置输出表
    if (conf.getOutputTable() == null) {
      throw new Exception("No output table specified");
    }
    if (conf.getOutputTable().getPartitions() != null
        && conf.getOutputTable().getPartitions().size() > 1) {
      throw new Exception("Output table can not has multiple partitions");
    }
    if (conf.getOutputTable().getPartitions() == null
        || conf.getOutputTable().getPartitions().size() == 0) {
      OutputUtils.addTable(
          TableInfo.builder().tableName(conf.getOutputTable().getName()).build(), job);
    } else {
      OutputUtils.addTable(
          TableInfo.builder()
              .tableName(conf.getOutputTable().getName())
              .partSpec(expandMacroDateYmd(conf.getOutputTable().getPartitions().get(0), dateYmd))
              .build(),
          job);
    }

    return job;
  }
Ejemplo n.º 2
0
  /**
   * 解析base.mapred.xml配置信息到java对象
   *
   * @return 配置信息pojo对象
   */
  public static MapreduceConfigInfo parseConfig(String extraPartitions) {
    Digester digester = new Digester();
    digester.setValidating(false);

    digester.addObjectCreate("mapred", MapreduceConfigInfo.class);
    digester.addBeanPropertySetter("mapred/baseId");
    digester.addBeanPropertySetter("mapred/projectId");
    digester.addBeanPropertySetter("mapred/resourceName");
    digester.addBeanPropertySetter("mapred/idePath");

    digester.addBeanPropertySetter("mapred/mapOutputKey");
    digester.addBeanPropertySetter("mapred/mapOutputValue");
    digester.addBeanPropertySetter("mapred/partitionColumns");
    digester.addBeanPropertySetter("mapred/outputKeySortColumns");
    digester.addBeanPropertySetter("mapred/outputKeySortOrders");
    digester.addBeanPropertySetter("mapred/outputGroupingColumns");
    digester.addBeanPropertySetter("mapred/numReduceTask");
    digester.addBeanPropertySetter("mapred/memoryForMapTask");
    digester.addBeanPropertySetter("mapred/memoryForReduceTask");

    digester.addBeanPropertySetter("mapred/jobLauncher");
    digester.addBeanPropertySetter("mapred/mapper");
    digester.addBeanPropertySetter("mapred/reducer");
    digester.addBeanPropertySetter("mapred/combiner");

    digester.addObjectCreate("mapred/inputTables/table", OdpsTableInfo.class);
    digester.addBeanPropertySetter("mapred/inputTables/table/name");
    digester.addCallMethod("mapred/inputTables/table/partitions/partition", "addPartition", 1);
    digester.addCallParam("mapred/inputTables/table/partitions/partition", 0);
    digester.addSetNext("mapred/inputTables/table", "addInputTable");

    digester.addObjectCreate("mapred/outputTable", OdpsTableInfo.class);
    digester.addBeanPropertySetter("mapred/outputTable/name");
    digester.addCallMethod("mapred/outputTable/partition", "addPartition", 1);
    digester.addCallParam("mapred/outputTable/partition", 0);
    digester.addSetNext("mapred/outputTable", "setOutputTable");

    InputStream is = ClassLoader.getSystemResourceAsStream("META-INF/base.mapred.xml");
    try {
      MapreduceConfigInfo conf = digester.parse(is);

      // 将额外分区合并入输入表和输出表
      if (!extraPartitions.isEmpty()) {
        String[] eps = extraPartitions.split(":");
        for (String ep : eps) {
          int pos = ep.indexOf("/");
          String tableName = ep.substring(0, pos);
          String partition = ep.substring(pos + 1);

          for (OdpsTableInfo t : conf.getInputTables()) {
            if (t.getName().equals(tableName)) {
              t.addPartition(partition);
            }
          }

          if (conf.getOutputTable().getName().equals(tableName)) {
            conf.getOutputTable().addPartition(partition);
          }
        }
      }

      return conf;
    } catch (Exception e) {
      return null;
    }
  }