Esempio n. 1
0
  /**
   * File Sink Operator encountered.
   *
   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
        fsOp.getConf().getTableInfo().getTableName() != null
            && parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();

    // Mark this task as a final map reduce task (ignoring the optional merge task)
    ((MapredWork) currTask.getWork()).setFinalMapRed(true);

    // If this file sink desc has been processed due to a linked file sink desc,
    // use that task
    Map<FileSinkDesc, Task<? extends Serializable>> fileSinkDescs = ctx.getLinkedFileDescTasks();
    if (fileSinkDescs != null) {
      Task<? extends Serializable> childTask = fileSinkDescs.get(fsOp.getConf());
      processLinkedFileDesc(ctx, childTask);
      return null;
    }

    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<MoveWork>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal() && fsOp.getConf().canBeMerged()) {
          if (fsOp.getConf().isLinkedFileSink()) {
            // If the user has HIVEMERGEMAPREDFILES set to false, the idea was the
            // number of reducers are few, so the number of files anyway are small.
            // However, with this optimization, we are increasing the number of files
            // possibly by a big margin. So, merge aggresively.
            if (hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES)
                || hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES)) {
              chDir = true;
            }
          } else {
            // There are separate configuration parameters to control whether to
            // merge for a map-only job
            // or for a map-reduce job
            MapredWork currWork = (MapredWork) currTask.getWork();
            boolean mergeMapOnly =
                hconf.getBoolVar(ConfVars.HIVEMERGEMAPFILES) && currWork.getReducer() == null;
            boolean mergeMapRed =
                hconf.getBoolVar(ConfVars.HIVEMERGEMAPREDFILES) && currWork.getReducer() != null;
            if (mergeMapOnly || mergeMapRed) {
              chDir = true;
            }
          }
        }
      }
    }

    String finalName = processFS(fsOp, stack, opProcCtx, chDir);

    if (chDir) {
      // Merge the files in the destination table/partitions by creating Map-only merge job
      // If underlying data is RCFile or OrcFile a BlockMerge task would be created.
      LOG.info("using CombineHiveInputformat for the merge job");
      createMRWorkForMergingFiles(fsOp, ctx, finalName);
    }

    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (fileSinkDesc.isLinkedFileSink()) {
      Map<FileSinkDesc, Task<? extends Serializable>> linkedFileDescTasks =
          ctx.getLinkedFileDescTasks();
      if (linkedFileDescTasks == null) {
        linkedFileDescTasks = new HashMap<FileSinkDesc, Task<? extends Serializable>>();
        ctx.setLinkedFileDescTasks(linkedFileDescTasks);
      }

      // The child tasks may be null in case of a select
      if ((currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) {
        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
          linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
        }
      }
    }

    return null;
  }
Esempio n. 2
0
  private void outputPlan(Task<? extends Serializable> task) {
    if (task == null) return;

    out.printf("Stage: \n", task.getId());

    // real output
    Serializable work = task.getWork();
    if (work == null) return;

    if (work instanceof FetchWork) {
      out.println("Fetch");
      output(((FetchWork) work).getSource());
    } else if (work instanceof MapredLocalWork) {
      out.println("MapredLocalWork");
      // fetch
      try {
        out.println("Fetch Part");
        Collection<FetchWork> fetchWorkCollect =
            ((MapredLocalWork) work).getAliasToFetchWork().values();
        for (FetchWork f : fetchWorkCollect) {
          output(f.getSource());
        }
      } catch (Exception e) {
        out.println("Exception 1");
      }

      // others
      try {
        out.println("Other Parts");
        Collection<Operator<? extends OperatorDesc>> collect =
            ((MapredLocalWork) work).getAliasToWork().values();

        for (Operator<? extends OperatorDesc> c : collect) {
          output(c);
        }
      } catch (Exception e) {
        out.println("Exception 2");
      }
    } else if (work instanceof MapredWork) {
      out.println("MapredWork");
      try {
        Collection<Operator<? extends OperatorDesc>> collect =
            ((MapredWork) work).getAllOperators();

        for (Operator<? extends OperatorDesc> c : collect) {
          // out.println(1);
          output(c);
          break; // first operator will give out all info s
        }
      } catch (Exception e) {
        out.println("Exception 3");
      }
    } else {
      output(work);
    }

    // -------other cases--------------------
    if (task instanceof ConditionalTask && ((ConditionalTask) task).getListTasks() != null) {
      for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) {
        outputPlan(con);
      }
    }

    if (task.getChildTasks() != null) {
      for (Task<? extends Serializable> child : task.getChildTasks()) {
        outputPlan(child);
      }
    }
  }