@Override public void launchTask(final ExecutorDriver executorDriver, final Protos.TaskInfo taskInfo) { LOGGER.info("Launching task in PinUserProfileExecutor ..."); Protos.TaskStatus taskStatus = Protos.TaskStatus.newBuilder() .setTaskId(taskInfo.getTaskId()) .setState(Protos.TaskState.TASK_RUNNING) .build(); executorDriver.sendStatusUpdate(taskStatus); String url = taskInfo.getData().toStringUtf8(); byte[] message = new byte[0]; try { message = ("userprofile :" + getUserProfileInfo(url)).getBytes(); } catch (IOException e) { LOGGER.error("Error parsing the Pinterest URL :" + e.getMessage()); } LOGGER.info("Sending framework message and marking task finished." + getClass().getName()); executorDriver.sendFrameworkMessage(message); taskStatus = Protos.TaskStatus.newBuilder() .setTaskId(taskInfo.getTaskId()) .setState(Protos.TaskState.TASK_FINISHED) .build(); executorDriver.sendStatusUpdate(taskStatus); }
@Override public void statusUpdate(SchedulerDriver schedulerDriver, Protos.TaskStatus taskStatus) { LOGGER.info( "Status update : Task ID " + taskStatus.getTaskId().getValue() + "in state : " + taskStatus.getState().getValueDescriptor().getName()); if (taskStatus.getState() == Protos.TaskState.TASK_FINISHED) { finishedTasks++; LOGGER.info("Finished tasks : " + finishedTasks); if (finishedTasks == totalTasks) { schedulerDriver.stop(); } } if (taskStatus.getState() == Protos.TaskState.TASK_FAILED || taskStatus.getState() == Protos.TaskState.TASK_KILLED || taskStatus.getState() == Protos.TaskState.TASK_LOST) { LOGGER.error( "Aborting because the task " + taskStatus.getTaskId().getValue() + " is in unexpected state : " + taskStatus.getState().getValueDescriptor().getName() + "with reason : " + taskStatus.getReason().getValueDescriptor().getName() + " from source : " + taskStatus.getSource().getValueDescriptor().getName() + " with message : " + taskStatus.getMessage()); schedulerDriver.abort(); } }
@Override public void statusUpdate(SchedulerDriver driver, Protos.TaskStatus status) { String taskId = status.getTaskId().getValue(); Protos.TaskState state = status.getState(); LOG.info("Task {} is in state {}", taskId, state); // TODO(jiri): Handle the case when an Alluxio master and/or worker task fails. // In particular, we should enable support for the fault tolerant mode of Alluxio to account // for Alluxio master process failures and keep track of the running number of Alluxio // masters. switch (status.getState()) { case TASK_FAILED: // intend to fall through case TASK_LOST: // intend to fall through case TASK_ERROR: if (status.getTaskId().getValue().equals(String.valueOf(mMasterTaskId))) { mMasterCount--; } break; case TASK_RUNNING: if (status.getTaskId().getValue().equals(String.valueOf(mMasterTaskId))) { mMasterLaunched = true; } break; default: break; } }
/** Invoked when a Mesos task reaches a terminal status. */ private void taskTerminated(Protos.TaskID taskID, Protos.TaskStatus status) { // this callback occurs for failed containers and for released containers alike final ResourceID id = extractResourceID(taskID); boolean existed; try { existed = workerStore.removeWorker(taskID); } catch (Exception ex) { fatalError("unable to remove worker", ex); return; } if (!existed) { LOG.info("Received a termination notice for an unrecognized worker: {}", id); return; } // check if this is a failed task or a released task if (workersBeingReturned.remove(id) != null) { // regular finished worker that we released LOG.info("Worker {} finished successfully with diagnostics: {}", id, status.getMessage()); } else { // failed worker, either at startup, or running final MesosWorkerStore.Worker launched = workersInLaunch.remove(id); if (launched != null) { LOG.info( "Mesos task {} failed, with a TaskManager in launch or registration. " + "State: {} Reason: {} ({})", id, status.getState(), status.getReason(), status.getMessage()); // we will trigger re-acquiring new workers at the end } else { // failed registered worker LOG.info( "Mesos task {} failed, with a registered TaskManager. " + "State: {} Reason: {} ({})", id, status.getState(), status.getReason(), status.getMessage()); // notify the generic logic, which notifies the JobManager, etc. notifyWorkerFailed(id, "Mesos task " + id + " failed. State: " + status.getState()); } // general failure logging failedTasksSoFar++; String diagMessage = String.format( "Diagnostics for task %s in state %s : " + "reason=%s message=%s", id, status.getState(), status.getReason(), status.getMessage()); sendInfoMessage(diagMessage); LOG.info(diagMessage); LOG.info("Total number of failed tasks so far: {}", failedTasksSoFar); // maxFailedTasks == -1 is infinite number of retries. if (maxFailedTasks >= 0 && failedTasksSoFar > maxFailedTasks) { String msg = "Stopping Mesos session because the number of failed tasks (" + failedTasksSoFar + ") exceeded the maximum failed tasks (" + maxFailedTasks + "). This number is controlled by the '" + ConfigConstants.MESOS_MAX_FAILED_TASKS + "' configuration setting. " + "By default its the number of requested tasks."; LOG.error(msg); self() .tell( decorateMessage(new StopCluster(ApplicationStatus.FAILED, msg)), ActorRef.noSender()); // no need to do anything else return; } } // in case failed containers were among the finished containers, make // sure we re-examine and request new ones triggerCheckWorkers(); }