Пример #1
0
 // push the feed to its subscribers
 protected void pushFeed(FeedType feedType, Object feedValue) {
   if (feedSubscribers != null) {
     for (Task<? extends Serializable> s : feedSubscribers) {
       s.receiveFeed(feedType, feedValue);
     }
   }
 }
Пример #2
0
  /**
   * Add the StatsTask as a dependent task of the MoveTask because StatsTask will change the
   * Table/Partition metadata. For atomicity, we should not change it before the data is actually
   * there done by MoveTask.
   *
   * @param nd the FileSinkOperator whose results are taken care of by the MoveTask.
   * @param mvTask The MoveTask that moves the FileSinkOperator's results.
   * @param currTask The MapRedTask that the FileSinkOperator belongs to.
   * @param hconf HiveConf
   */
  private void addStatsTask(
      FileSinkOperator nd, MoveTask mvTask, Task<? extends Serializable> currTask, HiveConf hconf) {

    MoveWork mvWork = ((MoveTask) mvTask).getWork();
    StatsWork statsWork = null;
    if (mvWork.getLoadTableWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadTableWork());
    } else if (mvWork.getLoadFileWork() != null) {
      statsWork = new StatsWork(mvWork.getLoadFileWork());
    }
    assert statsWork != null : "Error when genereting StatsTask";
    statsWork.setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    MapredWork mrWork = (MapredWork) currTask.getWork();

    // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix
    // in FileSinkDesc is used for stats publishing. They should be consistent.
    statsWork.setAggKey(((FileSinkOperator) nd).getConf().getStatsAggPrefix());
    Task<? extends Serializable> statsTask = TaskFactory.get(statsWork, hconf);

    // mark the MapredWork and FileSinkOperator for gathering stats
    nd.getConf().setGatherStats(true);
    mrWork.setGatheringStats(true);
    nd.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
    nd.getConf()
        .setMaxStatsKeyPrefixLength(hconf.getIntVar(ConfVars.HIVE_STATS_KEY_PREFIX_MAX_LENGTH));
    // mrWork.addDestinationTable(nd.getConf().getTableInfo().getTableName());

    // subscribe feeds from the MoveTask so that MoveTask can forward the list
    // of dynamic partition list to the StatsTask
    mvTask.addDependentTask(statsTask);
    statsTask.subscribeFeed(mvTask);
  }
Пример #3
0
    @Override
    public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs)
        throws SemanticException {
      Task<? extends Serializable> task = (Task<? extends Serializable>) nd;

      if (!task.isMapRedTask()
          || task instanceof ConditionalTask
          || ((MapredWork) task.getWork()).getReducer() == null) {
        return null;
      }

      SkewJoinProcCtx skewJoinProcContext =
          new SkewJoinProcCtx(task, physicalContext.getParseContext());

      Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
      opRules.put(
          new RuleRegExp("R1", CommonJoinOperator.getOperatorName() + "%"),
          SkewJoinProcFactory.getJoinProc());

      // The dispatcher fires the processor corresponding to the closest
      // matching rule and passes the context along
      Dispatcher disp =
          new DefaultRuleDispatcher(
              SkewJoinProcFactory.getDefaultProc(), opRules, skewJoinProcContext);
      GraphWalker ogw = new DefaultGraphWalker(disp);

      // iterator the reducer operator tree
      ArrayList<Node> topNodes = new ArrayList<Node>();
      topNodes.add(((MapredWork) task.getWork()).getReducer());
      ogw.startWalking(topNodes, null);
      return null;
    }
Пример #4
0
  /**
   * handle partial scan command.
   *
   * <p>It is composed of PartialScanTask followed by StatsTask.
   */
  private void handlePartialScanCommand(
      TableScanOperator tableScan,
      ParseContext parseContext,
      StatsWork statsWork,
      GenTezProcContext context,
      Task<StatsWork> statsTask)
      throws SemanticException {

    String aggregationKey = tableScan.getConf().getStatsAggPrefix();
    StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey);
    List<Path> inputPaths =
        GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();

    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);
    scanWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir(), parseContext.getConf());

    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);

    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> partialScanTask = TaskFactory.get(scanWork, parseContext.getConf());
    partialScanTask.initialize(parseContext.getConf(), null, driverCxt);
    partialScanTask.setWork(scanWork);
    statsWork.setSourceTask(partialScanTask);

    // task dependency
    context.rootTasks.remove(context.currentTask);
    context.rootTasks.add(partialScanTask);
    partialScanTask.addDependentTask(statsTask);
  }
  /**
   * handle partial scan command. It is composed of PartialScanTask followed by StatsTask .
   *
   * @param op
   * @param ctx
   * @param parseCtx
   * @param currTask
   * @param parseInfo
   * @param statsWork
   * @param statsTask
   * @throws SemanticException
   */
  private void handlePartialScanCommand(
      TableScanOperator op,
      GenMRProcContext ctx,
      ParseContext parseCtx,
      Task<? extends Serializable> currTask,
      StatsWork statsWork,
      Task<StatsWork> statsTask)
      throws SemanticException {
    String aggregationKey = op.getConf().getStatsAggPrefix();
    StringBuffer aggregationKeyBuffer = new StringBuffer(aggregationKey);
    List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer);
    aggregationKey = aggregationKeyBuffer.toString();

    // scan work
    PartialScanWork scanWork = new PartialScanWork(inputPaths);
    scanWork.setMapperCannotSpanPartns(true);
    scanWork.setAggKey(aggregationKey);

    // stats work
    statsWork.setPartialScanAnalyzeCommand(true);

    // partial scan task
    DriverContext driverCxt = new DriverContext();
    Task<PartialScanWork> psTask = TaskFactory.get(scanWork, parseCtx.getConf());
    psTask.initialize(parseCtx.getConf(), null, driverCxt);
    psTask.setWork(scanWork);

    // task dependency
    ctx.getRootTasks().remove(currTask);
    ctx.getRootTasks().add(psTask);
    psTask.addDependentTask(statsTask);
    List<Task<? extends Serializable>> parentTasks = new ArrayList<Task<? extends Serializable>>();
    parentTasks.add(psTask);
    statsTask.setParentTasks(parentTasks);
  }
Пример #6
0
  /*
   * Multiple file sink descriptors are linked.
   * Use the task created by the first linked file descriptor
   */
  private void processLinkedFileDesc(GenMRProcContext ctx, Task<? extends Serializable> childTask)
      throws SemanticException {
    Operator<? extends OperatorDesc> currTopOp = ctx.getCurrTopOp();
    String currAliasId = ctx.getCurrAliasId();
    List<Operator<? extends OperatorDesc>> seenOps = ctx.getSeenOps();
    List<Task<? extends Serializable>> rootTasks = ctx.getRootTasks();
    Task<? extends Serializable> currTask = ctx.getCurrTask();

    if (currTopOp != null) {
      if (!seenOps.contains(currTopOp)) {
        seenOps.add(currTopOp);
        GenMapRedUtils.setTaskPlan(
            currAliasId, currTopOp, (MapredWork) currTask.getWork(), false, ctx);
      }

      if (!rootTasks.contains(currTask)
          && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) {
        rootTasks.add(currTask);
      }
    }

    if (childTask != null) {
      currTask.addDependentTask(childTask);
    }
  }
Пример #7
0
 /**
  * Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor.
  * Feed is a generic form of execution-time feedback (type, value) pair from one task to another
  * task. Examples include dynamic partitions (which are only available at execution time). The
  * MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the
  * list of dynamic partitions are lost (MoveTask moves them to the table's destination directory
  * which is mixed with old partitions).
  *
  * @param publisher this feed provider.
  */
 public void subscribeFeed(Task<? extends Serializable> publisher) {
   if (publisher != this && publisher.ancestorOrSelf(this)) {
     if (publisher.getFeedSubscribers() == null) {
       publisher.setFeedSubscribers(new LinkedList<Task<? extends Serializable>>());
     }
     publisher.getFeedSubscribers().add(this);
   }
 }
 public boolean addRootIfPossible(Task<? extends Serializable> task) {
   if (task.getParentTasks() == null || task.getParentTasks().isEmpty()) {
     if (!rootTasks.contains(task)) {
       return rootTasks.add(task);
     }
   }
   return false;
 }
Пример #9
0
 /**
  * Remove the dependent task.
  *
  * @param dependent the task to remove
  */
 public void removeDependentTask(Task<? extends Serializable> dependent) {
   if ((getChildTasks() != null) && (getChildTasks().contains(dependent))) {
     getChildTasks().remove(dependent);
     if ((dependent.getParentTasks() != null) && (dependent.getParentTasks().contains(this))) {
       dependent.getParentTasks().remove(this);
     }
   }
 }
Пример #10
0
  /**
   * * Compile the query and extract metadata
   *
   * @param sqlOperationConf
   * @throws HiveSQLException
   */
  public void prepare(HiveConf sqlOperationConf) throws HiveSQLException {
    setState(OperationState.RUNNING);

    try {
      driver = new Driver(sqlOperationConf, getParentSession().getUserName());

      // set the operation handle information in Driver, so that thrift API users
      // can use the operation handle they receive, to lookup query information in
      // Yarn ATS
      String guid64 =
          Base64.encodeBase64URLSafeString(
                  getHandle().getHandleIdentifier().toTHandleIdentifier().getGuid())
              .trim();
      driver.setOperationId(guid64);

      // In Hive server mode, we are not able to retry in the FetchTask
      // case, when calling fetch queries since execute() has returned.
      // For now, we disable the test attempts.
      driver.setTryCount(Integer.MAX_VALUE);

      response = driver.compileAndRespond(statement);
      if (0 != response.getResponseCode()) {
        throw toSQLException("Error while compiling statement", response);
      }

      mResultSchema = driver.getSchema();

      // hasResultSet should be true only if the query has a FetchTask
      // "explain" is an exception for now
      if (driver.getPlan().getFetchTask() != null) {
        // Schema has to be set
        if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) {
          throw new HiveSQLException(
              "Error compiling query: Schema and FieldSchema "
                  + "should be set when query plan has a FetchTask");
        }
        resultSchema = new TableSchema(mResultSchema);
        setHasResultSet(true);
      } else {
        setHasResultSet(false);
      }
      // Set hasResultSet true if the plan has ExplainTask
      // TODO explain should use a FetchTask for reading
      for (Task<? extends Serializable> task : driver.getPlan().getRootTasks()) {
        if (task.getClass() == ExplainTask.class) {
          resultSchema = new TableSchema(mResultSchema);
          setHasResultSet(true);
          break;
        }
      }
    } catch (HiveSQLException e) {
      setState(OperationState.ERROR);
      throw e;
    } catch (Throwable e) {
      setState(OperationState.ERROR);
      throw new HiveSQLException("Error running query: " + e.toString(), e);
    }
  }
Пример #11
0
  /**
   * File Sink Operator encountered.
   *
   * @param nd the file sink operator encountered
   * @param opProcCtx context
   */
  public Object process(
      Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs)
      throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    boolean isInsertTable = // is INSERT OVERWRITE TABLE
        fsOp.getConf().getTableInfo().getTableName() != null
            && parseCtx.getQB().getParseInfo().isInsertToTable();
    HiveConf hconf = parseCtx.getConf();

    // Has the user enabled merging of files for map-only jobs or for all jobs
    if ((ctx.getMvTask() != null) && (!ctx.getMvTask().isEmpty())) {
      List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();

      // In case of unions or map-joins, it is possible that the file has
      // already been seen.
      // So, no need to attempt to merge the files again.
      if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {

        // no need of merging if the move is to a local file system
        MoveTask mvTask = (MoveTask) findMoveTask(mvTasks, fsOp);

        if (isInsertTable && hconf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
          addStatsTask(fsOp, mvTask, currTask, parseCtx.getConf());
        }

        if ((mvTask != null) && !mvTask.isLocal()) {
          // There are separate configuration parameters to control whether to
          // merge for a map-only job
          // or for a map-reduce job
          MapredWork currWork = (MapredWork) currTask.getWork();
          boolean mergeMapOnly =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPFILES)
                  && currWork.getReducer() == null;
          boolean mergeMapRed =
              hconf.getBoolVar(HiveConf.ConfVars.HIVEMERGEMAPREDFILES)
                  && currWork.getReducer() != null;
          if (mergeMapOnly || mergeMapRed) {
            chDir = true;
          }
        }
      }
    }

    String finalName = processFS(nd, stack, opProcCtx, chDir);

    // need to merge the files in the destination table/partitions
    if (chDir && (finalName != null)) {
      createMergeJob((FileSinkOperator) nd, ctx, finalName);
    }

    return null;
  }
Пример #12
0
  private void LinkMoveTask(
      GenMRProcContext ctx, FileSinkOperator newOutput, ConditionalTask cndTsk) {

    List<Task<? extends Serializable>> mvTasks = ctx.getMvTask();
    Task<? extends Serializable> mvTask = findMoveTask(mvTasks, newOutput);

    if (mvTask != null) {
      for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) {
        tsk.addDependentTask(mvTask);
      }
    }
  }
Пример #13
0
 public boolean isRunnable() {
   boolean isrunnable = true;
   if (parentTasks != null) {
     for (Task<? extends Serializable> parent : parentTasks) {
       if (!parent.done()) {
         isrunnable = false;
         break;
       }
     }
   }
   return isrunnable;
 }
Пример #14
0
  /**
   * Follows the task tree down from task and makes all leaves parents of mvTask
   *
   * @param ctx
   * @param mvTask
   * @param task
   */
  private void linkMoveTask(
      GenMRProcContext ctx, Task<MoveWork> mvTask, Task<? extends Serializable> task) {

    if (task.getDependentTasks() == null || task.getDependentTasks().isEmpty()) {
      // If it's a leaf, add the move task as a child
      addDependentMoveTasks(ctx, mvTask, task);
    } else {
      // Otherwise, for each child run this method recursively
      for (Task<? extends Serializable> childTask : task.getDependentTasks()) {
        linkMoveTask(ctx, mvTask, childTask);
      }
    }
  }
Пример #15
0
  public Task<? extends Serializable> getAndInitBackupTask() {
    if (backupTask != null) {
      // first set back the backup task with its children task.
      if (backupChildrenTasks != null) {
        for (Task<? extends Serializable> backupChild : backupChildrenTasks) {
          backupChild.getParentTasks().add(backupTask);
        }
      }

      // recursively remove task from its children tasks if this task doesn't have any parent task
      this.removeFromChildrenTasks();
    }
    return backupTask;
  }
Пример #16
0
 // return true if this task is an ancestor of itself of parameter desc
 private boolean ancestorOrSelf(Task<? extends Serializable> desc) {
   if (this == desc) {
     return true;
   }
   List<Task<? extends Serializable>> deps = getDependentTasks();
   if (deps != null) {
     for (Task<? extends Serializable> d : deps) {
       if (d.ancestorOrSelf(desc)) {
         return true;
       }
     }
   }
   return false;
 }
Пример #17
0
  /**
   * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
   *
   * @param conf HiveConf
   * @param currTask current leaf task
   * @param mvWork MoveWork for the move task
   * @param mergeWork MapredWork for the merge task.
   * @param inputPath the input directory of the merge/move task
   * @return The conditional task
   */
  private ConditionalTask createCondTask(
      HiveConf conf,
      Task<? extends Serializable> currTask,
      MoveWork mvWork,
      MapredWork mergeWork,
      String inputPath) {

    // There are 3 options for this ConditionalTask:
    // 1) Merge the partitions
    // 2) Move the partitions (i.e. don't merge the partitions)
    // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't
    // merge others) in this case the merge is done first followed by the move to prevent
    // conflicts.
    Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(mvWork, conf);
    Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(mvWork, conf);

    // NOTE! It is necessary merge task is the parent of the move task, and not
    // the other way around, for the proper execution of the execute method of
    // ConditionalTask
    mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask);

    List<Serializable> listWorks = new ArrayList<Serializable>();
    listWorks.add(mvWork);
    listWorks.add(mergeWork);

    ConditionalWork cndWork = new ConditionalWork(listWorks);

    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    listTasks.add(moveOnlyMoveTask);
    listTasks.add(mergeOnlyMergeTask);
    listTasks.add(mergeAndMoveMergeTask);

    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
    cndTsk.setListTasks(listTasks);

    // create resolver
    cndTsk.setResolver(new ConditionalResolverMergeFiles());
    ConditionalResolverMergeFilesCtx mrCtx =
        new ConditionalResolverMergeFilesCtx(listTasks, inputPath);
    cndTsk.setResolverCtx(mrCtx);

    // make the conditional task as the child of the current leaf task
    currTask.addDependentTask(cndTsk);

    return cndTsk;
  }
Пример #18
0
  /** Initialization when invoked from QL. */
  @Override
  public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
    super.initialize(conf, queryPlan, driverContext);

    job = new JobConf(conf, ExecDriver.class);

    // NOTE: initialize is only called if it is in non-local mode.
    // In case it's in non-local mode, we need to move the SessionState files
    // and jars to jobConf.
    // In case it's in local mode, MapRedTask will set the jobConf.
    //
    // "tmpfiles" and "tmpjars" are set by the method ExecDriver.execute(),
    // which will be called by both local and NON-local mode.
    String addedFiles = Utilities.getResourceFiles(job, SessionState.ResourceType.FILE);
    if (StringUtils.isNotBlank(addedFiles)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDFILES, addedFiles);
    }
    String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
    if (StringUtils.isNotBlank(addedJars)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDJARS, addedJars);
    }
    String addedArchives = Utilities.getResourceFiles(job, SessionState.ResourceType.ARCHIVE);
    if (StringUtils.isNotBlank(addedArchives)) {
      HiveConf.setVar(job, ConfVars.HIVEADDEDARCHIVES, addedArchives);
    }
    conf.stripHiddenConfigurations(job);
    this.jobExecHelper = new HadoopJobExecHelper(job, console, this, this);
  }
Пример #19
0
  @Override
  public void initialize(
      HiveConf conf, QueryPlan queryPlan, DriverContext ctx, CompilationOpContext opContext) {
    super.initialize(conf, queryPlan, ctx, opContext);
    work.initializeForFetch(opContext);

    try {
      // Create a file system handle
      JobConf job = new JobConf(conf);

      Operator<?> source = work.getSource();
      if (source instanceof TableScanOperator) {
        TableScanOperator ts = (TableScanOperator) source;
        // push down projections
        ColumnProjectionUtils.appendReadColumns(
            job, ts.getNeededColumnIDs(), ts.getNeededColumns());
        // push down filters
        HiveInputFormat.pushFilters(job, ts);
      }
      sink = work.getSink();
      fetch = new FetchOperator(work, job, source, getVirtualColumns(source));
      source.initialize(conf, new ObjectInspector[] {fetch.getOutputObjectInspector()});
      totalRows = 0;
      ExecMapper.setDone(false);

    } catch (Exception e) {
      // Bail out ungracefully - we should never hit
      // this here - but would have hit it in SemanticAnalyzer
      LOG.error(StringUtils.stringifyException(e));
      throw new RuntimeException(e);
    }
  }
Пример #20
0
  /**
   * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
   *
   * @param conf HiveConf
   * @param currTask current leaf task
   * @param mvWork MoveWork for the move task
   * @param mergeWork MapredWork for the merge task.
   * @param inputPath the input directory of the merge/move task
   * @return The conditional task
   */
  private ConditionalTask createCondTask(
      HiveConf conf,
      Task<? extends Serializable> currTask,
      MoveWork mvWork,
      MapredWork mergeWork,
      String inputPath) {

    Task<? extends Serializable> mergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> moveTask = TaskFactory.get(mvWork, conf);
    List<Serializable> listWorks = new ArrayList<Serializable>();
    listWorks.add(mvWork);
    listWorks.add(mergeWork);

    ConditionalWork cndWork = new ConditionalWork(listWorks);

    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    listTasks.add(moveTask);
    listTasks.add(mergeTask);

    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
    cndTsk.setListTasks(listTasks);

    // create resolver
    cndTsk.setResolver(new ConditionalResolverMergeFiles());
    ConditionalResolverMergeFilesCtx mrCtx =
        new ConditionalResolverMergeFilesCtx(listTasks, inputPath);
    cndTsk.setResolverCtx(mrCtx);

    // make the conditional task as the child of the current leaf task
    currTask.addDependentTask(cndTsk);

    return cndTsk;
  }
Пример #21
0
  /*
   * It is a idempotent function to add various intermediate files as the source
   * for the union. The plan has already been created.
   */
  public static void initUnionPlan(
      GenMRProcContext opProcCtx, Task<? extends Serializable> currTask, boolean local) {
    MapredWork plan = (MapredWork) currTask.getWork();
    UnionOperator currUnionOp = opProcCtx.getCurrUnionOp();
    assert currUnionOp != null;
    GenMRUnionCtx uCtx = opProcCtx.getUnionTask(currUnionOp);
    assert uCtx != null;

    List<String> taskTmpDirLst = uCtx.getTaskTmpDir();
    List<TableDesc> tt_descLst = uCtx.getTTDesc();
    assert !taskTmpDirLst.isEmpty() && !tt_descLst.isEmpty();
    assert taskTmpDirLst.size() == tt_descLst.size();
    int size = taskTmpDirLst.size();
    assert local == false;

    for (int pos = 0; pos < size; pos++) {
      String taskTmpDir = taskTmpDirLst.get(pos);
      TableDesc tt_desc = tt_descLst.get(pos);
      if (plan.getPathToAliases().get(taskTmpDir) == null) {
        plan.getPathToAliases().put(taskTmpDir, new ArrayList<String>());
        plan.getPathToAliases().get(taskTmpDir).add(taskTmpDir);
        plan.getPathToPartitionInfo().put(taskTmpDir, new PartitionDesc(tt_desc, null));
        plan.getAliasToWork().put(taskTmpDir, currUnionOp);
      }
    }
  }
Пример #22
0
 @Override
 public void shutdown() {
   super.shutdown();
   if (executor != null) {
     executor.destroy();
     executor = null;
   }
 }
Пример #23
0
 /**
  * Add a dependent task on the current task. Return if the dependency already existed or is this a
  * new one
  *
  * @return true if the task got added false if it already existed
  */
 public boolean addDependentTask(Task<? extends Serializable> dependent) {
   boolean ret = false;
   if (getChildTasks() == null) {
     setChildTasks(new ArrayList<Task<? extends Serializable>>());
   }
   if (!getChildTasks().contains(dependent)) {
     ret = true;
     getChildTasks().add(dependent);
     if (dependent.getParentTasks() == null) {
       dependent.setParentTasks(new ArrayList<Task<? extends Serializable>>());
     }
     if (!dependent.getParentTasks().contains(this)) {
       dependent.getParentTasks().add(this);
     }
   }
   return ret;
 }
Пример #24
0
  private Task<? extends Serializable> findMoveTask(
      List<Task<? extends Serializable>> mvTasks, FileSinkOperator fsOp) {
    // find the move task
    for (Task<? extends Serializable> mvTsk : mvTasks) {
      MoveWork mvWork = (MoveWork) mvTsk.getWork();
      String srcDir = null;
      if (mvWork.getLoadFileWork() != null) {
        srcDir = mvWork.getLoadFileWork().getSourceDir();
      } else if (mvWork.getLoadTableWork() != null) {
        srcDir = mvWork.getLoadTableWork().getSourceDir();
      }

      if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOp.getConf().getDirName()))) {
        return mvTsk;
      }
    }
    return null;
  }
Пример #25
0
  public void removeFromChildrenTasks() {

    List<Task<? extends Serializable>> childrenTasks = this.getChildTasks();
    if (childrenTasks == null) {
      return;
    }

    for (Task<? extends Serializable> childTsk : childrenTasks) {
      // remove this task from its children tasks
      childTsk.getParentTasks().remove(this);

      // recursively remove non-parent task from its children
      List<Task<? extends Serializable>> siblingTasks = childTsk.getParentTasks();
      if (siblingTasks == null || siblingTasks.size() == 0) {
        childTsk.removeFromChildrenTasks();
      }
    }
  }
Пример #26
0
  private Task<MoveWork> findMoveTask(List<Task<MoveWork>> mvTasks, FileSinkOperator fsOp) {
    // find the move task
    for (Task<MoveWork> mvTsk : mvTasks) {
      MoveWork mvWork = mvTsk.getWork();
      String srcDir = null;
      if (mvWork.getLoadFileWork() != null) {
        srcDir = mvWork.getLoadFileWork().getSourceDir();
      } else if (mvWork.getLoadTableWork() != null) {
        srcDir = mvWork.getLoadTableWork().getSourceDir();
      }

      String fsOpDirName = fsOp.getConf().getFinalDirName();
      if ((srcDir != null) && (srcDir.equalsIgnoreCase(fsOpDirName))) {
        return mvTsk;
      }
    }
    return null;
  }
Пример #27
0
 public synchronized <T extends Serializable> void updateStatus(Task<T> tTask) {
   this.taskState = tTask.getTaskState();
   if (externalHandle == null && tTask.getExternalHandle() != null) {
     this.externalHandle = tTask.getExternalHandle();
   }
   setStatusMessage(tTask.getStatusMessage());
   switch (taskState) {
     case RUNNING:
       if (beginTime == null) {
         beginTime = System.currentTimeMillis();
       }
       break;
     case FINISHED:
       if (endTime == null) {
         endTime = System.currentTimeMillis();
       }
       break;
   }
 }
Пример #28
0
  /**
   * Initialize the current plan by adding it to root tasks.
   *
   * @param op the reduce sink operator encountered
   * @param opProcCtx processing context
   */
  public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx)
      throws SemanticException {
    Operator<? extends Serializable> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends Serializable>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<? extends Serializable> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends Serializable>, Task<? extends Serializable>> opTaskMap =
        opProcCtx.getOpTaskMap();
    Operator<? extends Serializable> currTopOp = opProcCtx.getCurrTopOp();

    opTaskMap.put(reducer, currTask);
    plan.setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();

    plan.setNumReduceTasks(desc.getNumReducers());

    List<Task<? extends Serializable>> rootTasks = opProcCtx.getRootTasks();

    if (!rootTasks.contains(currTask)) {
      rootTasks.add(currTask);
    }
    if (reducer.getClass() == JoinOperator.class) {
      plan.setNeedsTagging(true);
    }

    assert currTopOp != null;
    List<Operator<? extends Serializable>> seenOps = opProcCtx.getSeenOps();
    String currAliasId = opProcCtx.getCurrAliasId();

    if (!seenOps.contains(currTopOp)) {
      seenOps.add(currTopOp);
      setTaskPlan(currAliasId, currTopOp, plan, false, opProcCtx);
    }

    currTopOp = null;
    currAliasId = null;

    opProcCtx.setCurrTask(currTask);
    opProcCtx.setCurrTopOp(currTopOp);
    opProcCtx.setCurrAliasId(currAliasId);
  }
Пример #29
0
  /**
   * Adds the dependencyTaskForMultiInsert in ctx as a dependent of parentTask. If mvTask is a load
   * table, and HIVE_MULTI_INSERT_ATOMIC_OUTPUTS is set, adds mvTask as a dependent of
   * dependencyTaskForMultiInsert in ctx, otherwise adds mvTask as a dependent of parentTask as
   * well.
   *
   * @param ctx
   * @param mvTask
   * @param parentTask
   */
  private void addDependentMoveTasks(
      GenMRProcContext ctx, Task<MoveWork> mvTask, Task<? extends Serializable> parentTask) {

    if (mvTask != null) {
      if (ctx.getConf().getBoolVar(ConfVars.HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES)) {
        DependencyCollectionTask dependencyTask = ctx.getDependencyTaskForMultiInsert();
        parentTask.addDependentTask(dependencyTask);
        if (mvTask.getWork().getLoadTableWork() != null) {
          // Moving tables/partitions depend on the dependencyTask
          dependencyTask.addDependentTask(mvTask);
        } else {
          // Moving files depends on the parentTask (we still want the dependencyTask to depend
          // on the parentTask)
          parentTask.addDependentTask(mvTask);
        }
      } else {
        parentTask.addDependentTask(mvTask);
      }
    }
  }
Пример #30
0
 @Override
 public void initialize(
     HiveConf conf,
     QueryPlan queryPlan,
     DriverContext driverContext,
     CompilationOpContext opContext) {
   super.initialize(conf, queryPlan, driverContext, opContext);
   job = new JobConf(conf, ExecDriver.class);
   execContext = new ExecMapperContext(job);
   // we don't use the HadoopJobExecHooks for local tasks
   this.jobExecHelper = new HadoopJobExecHelper(job, console, this, null);
 }