示例#1
0
  /**
   * Indicate that one of the taskids in this already-completed TaskInProgress has successfully
   * completed; hence we mark this taskid as {@link TaskStatus.State.KILLED}.
   */
  void alreadyCompletedTask(TaskAttemptID taskid) {
    // 'KILL' the task
    completedTask(taskid, TaskStatus.State.KILLED);

    // Note the reason for the task being 'KILLED'
    addDiagnosticInfo(taskid, "Already completed TIP");

    LOG.info("Already complete TIP " + getTIPId() + " has completed task " + taskid);
  }
示例#2
0
 /** Kill the given task */
 boolean killTask(TaskAttemptID taskId, boolean shouldFail, String diagnosticInfo) {
   TaskStatus st = taskStatuses.get(taskId);
   if (st != null
       && (st.getRunState() == TaskStatus.State.RUNNING
           || st.getRunState() == TaskStatus.State.COMMIT_PENDING
           || st.inTaskCleanupPhase()
           || st.getRunState() == TaskStatus.State.UNASSIGNED)
       && tasksToKill.put(taskId, shouldFail) == null) {
     addDiagnosticInfo(taskId, diagnosticInfo);
     LOG.info(diagnosticInfo);
     return true;
   }
   return false;
 }
示例#3
0
  /** Indicate that one of the taskids in this TaskInProgress has failed. */
  public void incompleteSubTask(TaskAttemptID taskid, JobStatus jobStatus) {
    //
    // Note the failure and its location
    //
    TaskStatus status = taskStatuses.get(taskid);
    String trackerName;
    String trackerHostName = null;
    TaskStatus.State taskState = TaskStatus.State.FAILED;
    if (status != null) {
      trackerName = status.getTaskTracker();
      trackerHostName = JobInProgressTraits.convertTrackerNameToHostName(trackerName);
      // Check if the user manually KILLED/FAILED this task-attempt...
      Boolean shouldFail = tasksToKill.remove(taskid);
      if (shouldFail != null) {
        if (status.getRunState() == TaskStatus.State.FAILED
            || status.getRunState() == TaskStatus.State.KILLED) {
          taskState = (shouldFail) ? TaskStatus.State.FAILED : TaskStatus.State.KILLED;
        } else {
          taskState =
              (shouldFail) ? TaskStatus.State.FAILED_UNCLEAN : TaskStatus.State.KILLED_UNCLEAN;
        }
        status.setRunState(taskState);
        addDiagnosticInfo(taskid, "Task has been " + taskState + " by the user");
      }

      taskState = status.getRunState();
      if (taskState != TaskStatus.State.FAILED
          && taskState != TaskStatus.State.KILLED
          && taskState != TaskStatus.State.FAILED_UNCLEAN
          && taskState != TaskStatus.State.KILLED_UNCLEAN) {
        LOG.info(
            "Task '"
                + taskid
                + "' running on '"
                + trackerName
                + "' in state: '"
                + taskState
                + "' being failed!");
        status.setRunState(TaskStatus.State.FAILED);
        taskState = TaskStatus.State.FAILED;
      }

      // tasktracker went down and failed time was not reported.
      if (0 == status.getFinishTime()) {
        status.setFinishTime(JobTracker.getClock().getTime());
      }
    }

    this.activeTasks.remove(taskid);

    // Since we do not fail completed reduces (whose outputs go to hdfs), we
    // should note this failure only for completed maps, only if this taskid;
    // completed this map. however if the job is done, there is no need to
    // manipulate completed maps
    if (this.isMapTask()
        && !jobSetup
        && !jobCleanup
        && isComplete(taskid)
        && jobStatus.getRunState() != JobStatus.SUCCEEDED) {
      this.completes--;

      // Reset the successfulTaskId since we don't have a SUCCESSFUL task now
      resetSuccessfulTaskid();
    }

    // Note that there can be failures of tasks that are hosted on a machine
    // that has not yet registered with restarted jobtracker
    // recalculate the counts only if its a genuine failure
    if (tasks.contains(taskid)) {
      if (taskState == TaskStatus.State.FAILED) {
        numTaskFailures++;
        machinesWhereFailed.add(trackerHostName);
        if (maxSkipRecords > 0) {
          // skipping feature enabled
          LOG.debug("TaskInProgress adding" + status.getNextRecordRange());
          failedRanges.add(status.getNextRecordRange());
          skipping = startSkipping();
        }

      } else if (taskState == TaskStatus.State.KILLED) {
        numKilledTasks++;
      }
    }

    if (numTaskFailures >= maxTaskAttempts) {
      LOG.info("TaskInProgress " + getTIPId() + " has failed " + numTaskFailures + " times.");
      kill();
    }
  }
示例#4
0
  /**
   * A status message from a client has arrived. It updates the status of a single
   * component-thread-task, which might result in an overall TaskInProgress status update.
   *
   * @return has the task changed its state noticeably?
   */
  synchronized boolean updateStatus(TaskStatus status) {
    TaskAttemptID taskid = status.getTaskID();
    String taskTracker = status.getTaskTracker();
    String diagInfo = status.getDiagnosticInfo();
    TaskStatus oldStatus = taskStatuses.get(taskid);
    boolean changed = true;
    if (diagInfo != null && diagInfo.length() > 0) {
      long runTime = status.getRunTime();
      LOG.info(
          "Error from "
              + taskid
              + " on "
              + taskTracker
              + " runTime(msec) "
              + runTime
              + ": "
              + diagInfo);
      addDiagnosticInfo(taskid, diagInfo);
    }

    if (skipping) {
      failedRanges.updateState(status);
    }

    if (oldStatus != null) {
      TaskStatus.State oldState = oldStatus.getRunState();
      TaskStatus.State newState = status.getRunState();

      // We should never recieve a duplicate success/failure/killed
      // status update for the same taskid! This is a safety check,
      // and is addressed better at the TaskTracker to ensure this.
      // @see {@link TaskTracker.transmitHeartbeat()}
      if ((newState != TaskStatus.State.RUNNING
              && newState != TaskStatus.State.COMMIT_PENDING
              && newState != TaskStatus.State.FAILED_UNCLEAN
              && newState != TaskStatus.State.KILLED_UNCLEAN
              && newState != TaskStatus.State.UNASSIGNED)
          && (oldState == newState)) {
        LOG.warn(
            "Recieved duplicate status update of '"
                + newState
                + "' for '"
                + taskid
                + "' of TIP '"
                + getTIPId()
                + "'"
                + "oldTT="
                + oldStatus.getTaskTracker()
                + " while newTT="
                + status.getTaskTracker());
        return false;
      }

      // The task is not allowed to move from completed back to running.
      // We have seen out of order status messagesmoving tasks from complete
      // to running. This is a spot fix, but it should be addressed more
      // globally.
      if ((newState == TaskStatus.State.RUNNING || newState == TaskStatus.State.UNASSIGNED)
          && (oldState == TaskStatus.State.FAILED
              || oldState == TaskStatus.State.KILLED
              || oldState == TaskStatus.State.FAILED_UNCLEAN
              || oldState == TaskStatus.State.KILLED_UNCLEAN
              || oldState == TaskStatus.State.SUCCEEDED
              || oldState == TaskStatus.State.COMMIT_PENDING)) {
        return false;
      }

      // Do not accept any status once the task is marked FAILED/KILLED
      // This is to handle the case of the JobTracker timing out a task
      // due to launch delay, but the TT comes back with any state or
      // TT got expired
      if (oldState == TaskStatus.State.FAILED || oldState == TaskStatus.State.KILLED) {
        tasksToKill.put(taskid, true);
        return false;
      }

      changed = oldState != newState;
    }
    // if task is a cleanup attempt, do not replace the complete status,
    // update only specific fields.
    // For example, startTime should not be updated,
    // but finishTime has to be updated.
    if (!isCleanupAttempt(taskid)) {
      taskStatuses.put(taskid, status);
      // we don't want to include setup tasks in the task execution stats
      if (!isJobSetupTask()
          && !isJobCleanupTask()
          && ((isMapTask() && job.hasSpeculativeMaps())
              || (!isMapTask() && job.hasSpeculativeReduces()))) {
        updateProgressRate(JobTracker.getClock().getTime());
      }
    } else {
      taskStatuses
          .get(taskid)
          .statusUpdate(
              status.getRunState(),
              status.getProgress(),
              status.getStateString(),
              status.getPhase(),
              status.getFinishTime());
    }

    // Recompute progress
    recomputeProgress();
    return changed;
  }