@Override
  public void launchTask(final ExecutorDriver executorDriver, final Protos.TaskInfo taskInfo) {
    LOGGER.info("Launching task in PinUserProfileExecutor ...");
    Protos.TaskStatus taskStatus =
        Protos.TaskStatus.newBuilder()
            .setTaskId(taskInfo.getTaskId())
            .setState(Protos.TaskState.TASK_RUNNING)
            .build();
    executorDriver.sendStatusUpdate(taskStatus);
    String url = taskInfo.getData().toStringUtf8();
    byte[] message = new byte[0];

    try {
      message = ("userprofile :" + getUserProfileInfo(url)).getBytes();
    } catch (IOException e) {
      LOGGER.error("Error parsing the Pinterest URL :" + e.getMessage());
    }

    LOGGER.info("Sending framework message and marking task finished." + getClass().getName());
    executorDriver.sendFrameworkMessage(message);

    taskStatus =
        Protos.TaskStatus.newBuilder()
            .setTaskId(taskInfo.getTaskId())
            .setState(Protos.TaskState.TASK_FINISHED)
            .build();
    executorDriver.sendStatusUpdate(taskStatus);
  }
Exemplo n.º 2
0
  @Override
  public void statusUpdate(SchedulerDriver schedulerDriver, Protos.TaskStatus taskStatus) {
    LOGGER.info(
        "Status update : Task ID "
            + taskStatus.getTaskId().getValue()
            + "in state : "
            + taskStatus.getState().getValueDescriptor().getName());
    if (taskStatus.getState() == Protos.TaskState.TASK_FINISHED) {
      finishedTasks++;
      LOGGER.info("Finished tasks : " + finishedTasks);
      if (finishedTasks == totalTasks) {
        schedulerDriver.stop();
      }
    }

    if (taskStatus.getState() == Protos.TaskState.TASK_FAILED
        || taskStatus.getState() == Protos.TaskState.TASK_KILLED
        || taskStatus.getState() == Protos.TaskState.TASK_LOST) {
      LOGGER.error(
          "Aborting because the task "
              + taskStatus.getTaskId().getValue()
              + " is in unexpected state : "
              + taskStatus.getState().getValueDescriptor().getName()
              + "with reason : "
              + taskStatus.getReason().getValueDescriptor().getName()
              + " from source : "
              + taskStatus.getSource().getValueDescriptor().getName()
              + " with message : "
              + taskStatus.getMessage());
      schedulerDriver.abort();
    }
  }
Exemplo n.º 3
0
  @Override
  public void statusUpdate(SchedulerDriver driver, Protos.TaskStatus status) {
    String taskId = status.getTaskId().getValue();
    Protos.TaskState state = status.getState();
    LOG.info("Task {} is in state {}", taskId, state);
    // TODO(jiri): Handle the case when an Alluxio master and/or worker task fails.
    // In particular, we should enable support for the fault tolerant mode of Alluxio to account
    // for Alluxio master process failures and keep track of the running number of Alluxio
    // masters.

    switch (status.getState()) {
      case TASK_FAILED: // intend to fall through
      case TASK_LOST: // intend to fall through
      case TASK_ERROR:
        if (status.getTaskId().getValue().equals(String.valueOf(mMasterTaskId))) {
          mMasterCount--;
        }
        break;
      case TASK_RUNNING:
        if (status.getTaskId().getValue().equals(String.valueOf(mMasterTaskId))) {
          mMasterLaunched = true;
        }
        break;
      default:
        break;
    }
  }
  /** Invoked when a Mesos task reaches a terminal status. */
  private void taskTerminated(Protos.TaskID taskID, Protos.TaskStatus status) {
    // this callback occurs for failed containers and for released containers alike

    final ResourceID id = extractResourceID(taskID);

    boolean existed;
    try {
      existed = workerStore.removeWorker(taskID);
    } catch (Exception ex) {
      fatalError("unable to remove worker", ex);
      return;
    }

    if (!existed) {
      LOG.info("Received a termination notice for an unrecognized worker: {}", id);
      return;
    }

    // check if this is a failed task or a released task
    if (workersBeingReturned.remove(id) != null) {
      // regular finished worker that we released
      LOG.info("Worker {} finished successfully with diagnostics: {}", id, status.getMessage());
    } else {
      // failed worker, either at startup, or running
      final MesosWorkerStore.Worker launched = workersInLaunch.remove(id);
      if (launched != null) {
        LOG.info(
            "Mesos task {} failed, with a TaskManager in launch or registration. "
                + "State: {} Reason: {} ({})",
            id,
            status.getState(),
            status.getReason(),
            status.getMessage());
        // we will trigger re-acquiring new workers at the end
      } else {
        // failed registered worker
        LOG.info(
            "Mesos task {} failed, with a registered TaskManager. " + "State: {} Reason: {} ({})",
            id,
            status.getState(),
            status.getReason(),
            status.getMessage());

        // notify the generic logic, which notifies the JobManager, etc.
        notifyWorkerFailed(id, "Mesos task " + id + " failed.  State: " + status.getState());
      }

      // general failure logging
      failedTasksSoFar++;

      String diagMessage =
          String.format(
              "Diagnostics for task %s in state %s : " + "reason=%s message=%s",
              id, status.getState(), status.getReason(), status.getMessage());
      sendInfoMessage(diagMessage);

      LOG.info(diagMessage);
      LOG.info("Total number of failed tasks so far: {}", failedTasksSoFar);

      // maxFailedTasks == -1 is infinite number of retries.
      if (maxFailedTasks >= 0 && failedTasksSoFar > maxFailedTasks) {
        String msg =
            "Stopping Mesos session because the number of failed tasks ("
                + failedTasksSoFar
                + ") exceeded the maximum failed tasks ("
                + maxFailedTasks
                + "). This number is controlled by the '"
                + ConfigConstants.MESOS_MAX_FAILED_TASKS
                + "' configuration setting. "
                + "By default its the number of requested tasks.";

        LOG.error(msg);
        self()
            .tell(
                decorateMessage(new StopCluster(ApplicationStatus.FAILED, msg)),
                ActorRef.noSender());

        // no need to do anything else
        return;
      }
    }

    // in case failed containers were among the finished containers, make
    // sure we re-examine and request new ones
    triggerCheckWorkers();
  }