コード例 #1
0
  /**
   * Returns whether a component task-thread should be closed because the containing JobInProgress
   * has completed or the task is killed by the user
   */
  public boolean shouldClose(TaskAttemptID taskid) {
    /**
     * If the task hasn't been closed yet, and it belongs to a completed TaskInProgress close it.
     *
     * <p>However, for completed map tasks we do not close the task which actually was the one
     * responsible for _completing_ the TaskInProgress.
     */
    if (tasksReportedClosed.contains(taskid)) {
      if (tasksToKill.keySet().contains(taskid)) return true;
      else return false;
    }

    boolean close = false;
    TaskStatus ts = taskStatuses.get(taskid);

    if ((ts != null)
        && ((this.failed)
            || ((job.getStatus().getRunState() != JobStatus.RUNNING
                && (job.getStatus().getRunState() != JobStatus.PREP))))) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else if ((completes > 0)
        && // isComplete() is synchronized!
        !(isMapTask() && !jobSetup && !jobCleanup && isComplete(taskid))) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else if (isCommitPending(taskid) && !shouldCommit(taskid)) {
      tasksReportedClosed.add(taskid);
      close = true;
    } else {
      close = tasksToKill.keySet().contains(taskid);
    }
    return close;
  }
コード例 #2
0
  /** Return a Task that can be sent to a TaskTracker for execution. */
  public Task getTaskToRun(String taskTracker) {

    // Create the 'taskid'; do not count the 'killed' tasks against the job!
    TaskAttemptID taskid = null;
    if (nextTaskId < (MAX_TASK_EXECS + maxTaskAttempts + numKilledTasks)) {
      // Make sure that the attempts are unqiue across restarts
      int attemptId = job.getNumRestarts() * NUM_ATTEMPTS_PER_RESTART + nextTaskId;
      taskid = new TaskAttemptID(id, attemptId);
      ++nextTaskId;
    } else {
      LOG.warn(
          "Exceeded limit of "
              + (MAX_TASK_EXECS + maxTaskAttempts)
              + " (plus "
              + numKilledTasks
              + " killed)"
              + " attempts for the tip '"
              + getTIPId()
              + "'");
      return null;
    }
    // keep track of the last time we started an attempt at this TIP
    // used to calculate the progress rate of this TIP
    setDispatchTime(taskid, JobTracker.getClock().getTime());
    if (0 == execStartTime) {
      // assume task starts running now
      execStartTime = JobTracker.getClock().getTime();
    }
    return addRunningTask(taskid, taskTracker);
  }
コード例 #3
0
  /**
   * update progress rate for a task
   *
   * <p>The assumption is that the JIP lock is held entering this routine. So it's left
   * unsynchronized. Currently the only places it's called from are TIP.updateStatus and
   * JIP.refreshCandidate*
   */
  public void updateProgressRate(long currentTime) {

    double bestProgressRate = 0;

    for (TaskStatus ts : taskStatuses.values()) {
      if (ts.getRunState() == TaskStatus.State.RUNNING
          || ts.getRunState() == TaskStatus.State.SUCCEEDED
          || ts.getRunState() == TaskStatus.State.COMMIT_PENDING) {

        double tsProgressRate =
            ts.getProgress() / Math.max(1, currentTime - getDispatchTime(ts.getTaskID()));
        if (tsProgressRate > bestProgressRate) {
          bestProgressRate = tsProgressRate;
        }
      }
    }

    DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask());
    taskStats.updateStatistics(progressRate, bestProgressRate);

    progressRate = bestProgressRate;
  }
コード例 #4
0
  /**
   * Can this task be speculated? This requires that it isn't done or almost done and that it isn't
   * already being speculatively executed.
   *
   * <p>Added for use by queue scheduling algorithms.
   *
   * @param currentTime
   */
  boolean canBeSpeculated(long currentTime) {
    if (skipping
        || !isRunnable()
        || !isRunning()
        || completes != 0
        || isOnlyCommitPending()
        || activeTasks.size() > MAX_TASK_EXECS) {
      return false;
    }

    if (isSpeculativeForced()) {
      return true;
    }

    // no speculation for first few seconds
    if (currentTime - lastDispatchTime < speculativeLag) {
      return false;
    }

    DataStatistics taskStats = job.getRunningTaskStatistics(isMapTask());

    if (LOG.isDebugEnabled()) {
      LOG.debug(
          "activeTasks.size(): "
              + activeTasks.size()
              + " "
              + activeTasks.firstKey()
              + " task's progressrate: "
              + progressRate
              + " taskStats : "
              + taskStats);
    }

    // if the task is making progress fast enough to complete within
    // the acceptable duration allowed for each task - do not speculate
    if ((maxProgressRateForSpeculation > 0) && (progressRate > maxProgressRateForSpeculation)) {
      return false;
    }

    if (isMapTask()
        ? job.shouldSpeculateAllRemainingMaps()
        : job.shouldSpeculateAllRemainingReduces()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Speculate " + getTIPId() + " because the job is almost finished");
      }
      return true;
    }

    // Find if task should be speculated based on standard deviation
    // the max difference allowed between the tasks's progress rate
    // and the mean progress rate of sibling tasks.

    double maxDiff =
        (taskStats.std() == 0
            ? taskStats.mean() / 3
            : job.getSlowTaskThreshold() * taskStats.std());

    // if stddev > mean - we are stuck. cap the max difference at a
    // more meaningful number.
    maxDiff = Math.min(maxDiff, taskStats.mean() * job.getStddevMeanRatioMax());

    return (taskStats.mean() - progressRate > maxDiff);
  }
コード例 #5
0
  /** Indicate that one of the taskids in this TaskInProgress has failed. */
  public void incompleteSubTask(TaskAttemptID taskid, JobStatus jobStatus) {
    //
    // Note the failure and its location
    //
    TaskStatus status = taskStatuses.get(taskid);
    String trackerName;
    String trackerHostName = null;
    TaskStatus.State taskState = TaskStatus.State.FAILED;
    if (status != null) {
      trackerName = status.getTaskTracker();
      trackerHostName = JobInProgressTraits.convertTrackerNameToHostName(trackerName);
      // Check if the user manually KILLED/FAILED this task-attempt...
      Boolean shouldFail = tasksToKill.remove(taskid);
      if (shouldFail != null) {
        if (status.getRunState() == TaskStatus.State.FAILED
            || status.getRunState() == TaskStatus.State.KILLED) {
          taskState = (shouldFail) ? TaskStatus.State.FAILED : TaskStatus.State.KILLED;
        } else {
          taskState =
              (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN : TaskStatus.State.KILLED_UNCLEAN;
        }
        status.setRunState(taskState);
        addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user");
      }

      taskState = status.getRunState();
      if (taskState != TaskStatus.State.FAILED
          && taskState != TaskStatus.State.KILLED
          && taskState != TaskStatus.State.FAILED_UNCLEAN
          && taskState != TaskStatus.State.KILLED_UNCLEAN) {
        LOG.info(
            "Task '"
                + taskid
                + "' running on '"
                + trackerName
                + "' in state: '"
                + taskState
                + "' being failed!");
        status.setRunState(TaskStatus.State.FAILED);
        taskState = TaskStatus.State.FAILED;
      }

      // tasktracker went down and failed time was not reported.
      if (0 == status.getFinishTime()) {
        status.setFinishTime(JobTracker.getClock().getTime());
      }
    }

    this.activeTasks.remove(taskid);

    // Since we do not fail completed reduces (whose outputs go to hdfs), we
    // should note this failure only for completed maps, only if this taskid;
    // completed this map. however if the job is done, there is no need to
    // manipulate completed maps
    if (this.isMapTask()
        && !jobSetup
        && !jobCleanup
        && isComplete(taskid)
        && jobStatus.getRunState() != JobStatus.SUCCEEDED) {
      this.completes--;

      // Reset the successfulTaskId since we don't have a SUCCESSFUL task now
      resetSuccessfulTaskid();
    }

    // Note that there can be failures of tasks that are hosted on a machine
    // that has not yet registered with restarted jobtracker
    // recalculate the counts only if its a genuine failure
    if (tasks.contains(taskid)) {
      if (taskState == TaskStatus.State.FAILED) {
        numTaskFailures++;
        machinesWhereFailed.add(trackerHostName);
        if (maxSkipRecords > 0) {
          // skipping feature enabled
          LOG.debug("TaskInProgress adding" + status.getNextRecordRange());
          failedRanges.add(status.getNextRecordRange());
          skipping = startSkipping();
        }

      } else if (taskState == TaskStatus.State.KILLED) {
        numKilledTasks++;
      }
    }

    if (numTaskFailures >= maxTaskAttempts) {
      LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times.");
      kill();
    }
  }
コード例 #6
0
  /**
   * A status message from a client has arrived. It updates the status of a single
   * component-thread-task, which might result in an overall TaskInProgress status update.
   *
   * @return has the task changed its state noticeably?
   */
  synchronized boolean updateStatus(TaskStatus status) {
    TaskAttemptID taskid = status.getTaskID();
    String taskTracker = status.getTaskTracker();
    String diagInfo = status.getDiagnosticInfo();
    TaskStatus oldStatus = taskStatuses.get(taskid);
    boolean changed = true;
    if (diagInfo != null && diagInfo.length() > 0) {
      long runTime = status.getRunTime();
      LOG.info(
          "Error from "
              + taskid
              + " on "
              + taskTracker
              + " runTime(msec) "
              + runTime
              + ": "
              + diagInfo);
      addDiagnosticInfo(taskid, diagInfo);
    }

    if (skipping) {
      failedRanges.updateState(status);
    }

    if (oldStatus != null) {
      TaskStatus.State oldState = oldStatus.getRunState();
      TaskStatus.State newState = status.getRunState();

      // We should never recieve a duplicate success/failure/killed
      // status update for the same taskid! This is a safety check,
      // and is addressed better at the TaskTracker to ensure this.
      // @see {@link TaskTracker.transmitHeartbeat()}
      if ((newState != TaskStatus.State.RUNNING
              && newState != TaskStatus.State.COMMIT_PENDING
              && newState != TaskStatus.State.FAILED_UNCLEAN
              && newState != TaskStatus.State.KILLED_UNCLEAN
              && newState != TaskStatus.State.UNASSIGNED)
          && (oldState == newState)) {
        LOG.warn(
            "Recieved duplicate status update of '"
                + newState
                + "' for '"
                + taskid
                + "' of TIP '"
                + getTIPId()
                + "'"
                + "oldTT="
                + oldStatus.getTaskTracker()
                + " while newTT="
                + status.getTaskTracker());
        return false;
      }

      // The task is not allowed to move from completed back to running.
      // We have seen out of order status messagesmoving tasks from complete
      // to running. This is a spot fix, but it should be addressed more
      // globally.
      if ((newState == TaskStatus.State.RUNNING || newState == TaskStatus.State.UNASSIGNED)
          && (oldState == TaskStatus.State.FAILED
              || oldState == TaskStatus.State.KILLED
              || oldState == TaskStatus.State.FAILED_UNCLEAN
              || oldState == TaskStatus.State.KILLED_UNCLEAN
              || oldState == TaskStatus.State.SUCCEEDED
              || oldState == TaskStatus.State.COMMIT_PENDING)) {
        return false;
      }

      // Do not accept any status once the task is marked FAILED/KILLED
      // This is to handle the case of the JobTracker timing out a task
      // due to launch delay, but the TT comes back with any state or
      // TT got expired
      if (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED) {
        tasksToKill.put(taskid, true);
        return false;
      }

      changed = oldState != newState;
    }
    // if task is a cleanup attempt, do not replace the complete status,
    // update only specific fields.
    // For example, startTime should not be updated,
    // but finishTime has to be updated.
    if (!isCleanupAttempt(taskid)) {
      taskStatuses.put(taskid, status);
      // we don't want to include setup tasks in the task execution stats
      if (!isJobSetupTask()
          && !isJobCleanupTask()
          && ((isMapTask() && job.hasSpeculativeMaps())
              || (!isMapTask() && job.hasSpeculativeReduces()))) {
        updateProgressRate(JobTracker.getClock().getTime());
      }
    } else {
      taskStatuses
          .get(taskid)
          .statusUpdate(
              status.getRunState(),
              status.getProgress(),
              status.getStateString(),
              status.getPhase(),
              status.getFinishTime());
    }

    // Recompute progress
    recomputeProgress();
    return changed;
  }
コード例 #7
0
  /** Adds a previously running task to this tip. This is used in case of jobtracker restarts. */
  public Task addRunningTask(TaskAttemptID taskid, String taskTracker, boolean taskCleanup) {
    // 1 slot is enough for taskCleanup task
    int numSlotsNeeded = taskCleanup ? 1 : numSlotsRequired;
    // create the task
    Task t = null;
    if (isMapTask()) {
      LOG.debug(
          "attempt "
              + numTaskFailures
              + " sending skippedRecords "
              + failedRanges.getIndicesCount());
      String splitClass = null;
      BytesWritable split;
      if (!jobSetup && !jobCleanup) {
        splitClass = rawSplit.getClassName();
        split = rawSplit.getBytes();
      } else {
        split = new BytesWritable();
      }
      t = new MapTask(jobFile, taskid, partition, splitClass, split, numSlotsNeeded, job.getUser());
    } else {
      t = new ReduceTask(jobFile, taskid, partition, numMaps, numSlotsNeeded, job.getUser());
    }
    if (jobCleanup) {
      t.setJobCleanupTask();
    }
    if (jobSetup) {
      t.setJobSetupTask();
    }
    if (taskCleanup) {
      t.setTaskCleanupTask();
      t.setState(taskStatuses.get(taskid).getRunState());
      cleanupTasks.put(taskid, taskTracker);
    }
    t.setConf(conf);
    LOG.debug("Launching task with skipRanges:" + failedRanges.getSkipRanges());
    t.setSkipRanges(failedRanges.getSkipRanges());
    t.setSkipping(skipping);
    if (failedRanges.isTestAttempt()) {
      t.setWriteSkipRecs(false);
    }

    if (activeTasks.size() >= 1) {
      speculativeTaskId = taskid;
    } else {
      speculativeTaskId = null;
    }
    activeTasks.put(taskid, taskTracker);
    tasks.add(taskid);

    // Ask JobTracker to note that the task exists
    // jobtracker.createTaskEntry(taskid, taskTracker, this);

    /*
      // code to find call paths to createTaskEntry
      StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace();
      boolean found = false;
      for (StackTraceElement s: stackTraceElements) {
      if (s.getMethodName().indexOf("heartbeat") != -1 ||
      s.getMethodName().indexOf("findTask") != -1 ||
      s.getMethodName().indexOf("createAndAddAttempt") != -1 ||
      s.getMethodName().indexOf("processTaskAttempt") != -1) {
      found = true;
      break;
      }
      }

      if (!found) {
      RuntimeException e = new RuntimeException ("calling addRunningTask from outside heartbeat");
      LOG.info(StringUtils.stringifyException(e));
      throw (e);
      }
    */

    // check and set the first attempt
    if (firstTaskId == null) {
      firstTaskId = taskid;
    }
    return t;
  }