Пример #1
0
  /**
   * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
   *
   * @param conf HiveConf
   * @param currTask current leaf task
   * @param mvWork MoveWork for the move task
   * @param mergeWork MapredWork for the merge task.
   * @param inputPath the input directory of the merge/move task
   * @return The conditional task
   */
  private ConditionalTask createCondTask(
      HiveConf conf,
      Task<? extends Serializable> currTask,
      MoveWork mvWork,
      MapredWork mergeWork,
      String inputPath) {

    Task<? extends Serializable> mergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> moveTask = TaskFactory.get(mvWork, conf);
    List<Serializable> listWorks = new ArrayList<Serializable>();
    listWorks.add(mvWork);
    listWorks.add(mergeWork);

    ConditionalWork cndWork = new ConditionalWork(listWorks);

    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    listTasks.add(moveTask);
    listTasks.add(mergeTask);

    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
    cndTsk.setListTasks(listTasks);

    // create resolver
    cndTsk.setResolver(new ConditionalResolverMergeFiles());
    ConditionalResolverMergeFilesCtx mrCtx =
        new ConditionalResolverMergeFilesCtx(listTasks, inputPath);
    cndTsk.setResolverCtx(mrCtx);

    // make the conditional task as the child of the current leaf task
    currTask.addDependentTask(cndTsk);

    return cndTsk;
  }
Пример #2
0
  /**
   * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
   *
   * @param conf HiveConf
   * @param currTask current leaf task
   * @param mvWork MoveWork for the move task
   * @param mergeWork MapredWork for the merge task.
   * @param inputPath the input directory of the merge/move task
   * @return The conditional task
   */
  private ConditionalTask createCondTask(
      HiveConf conf,
      Task<? extends Serializable> currTask,
      MoveWork mvWork,
      MapredWork mergeWork,
      String inputPath) {

    // There are 3 options for this ConditionalTask:
    // 1) Merge the partitions
    // 2) Move the partitions (i.e. don't merge the partitions)
    // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't
    // merge others) in this case the merge is done first followed by the move to prevent
    // conflicts.
    Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(mvWork, conf);
    Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(mvWork, conf);

    // NOTE! It is necessary merge task is the parent of the move task, and not
    // the other way around, for the proper execution of the execute method of
    // ConditionalTask
    mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask);

    List<Serializable> listWorks = new ArrayList<Serializable>();
    listWorks.add(mvWork);
    listWorks.add(mergeWork);

    ConditionalWork cndWork = new ConditionalWork(listWorks);

    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    listTasks.add(moveOnlyMoveTask);
    listTasks.add(mergeOnlyMergeTask);
    listTasks.add(mergeAndMoveMergeTask);

    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
    cndTsk.setListTasks(listTasks);

    // create resolver
    cndTsk.setResolver(new ConditionalResolverMergeFiles());
    ConditionalResolverMergeFilesCtx mrCtx =
        new ConditionalResolverMergeFilesCtx(listTasks, inputPath);
    cndTsk.setResolverCtx(mrCtx);

    // make the conditional task as the child of the current leaf task
    currTask.addDependentTask(cndTsk);

    return cndTsk;
  }