private void sendFailIntermediateResultPartitionsRpcCall() { final SimpleSlot slot = this.assignedResource; if (slot != null) { final Instance instance = slot.getInstance(); if (instance.isAlive()) { final ActorGateway gateway = instance.getActorGateway(); // TODO For some tests this could be a problem when querying too early if all resources were // released gateway.tell(new FailIntermediateResultPartitions(attemptId)); } } }
/** * Sends an UpdatePartitionInfo message to the instance of the consumerSlot. * * @param consumerSlot Slot to whose instance the message will be sent * @param updatePartitionInfo UpdatePartitionInfo message */ private void sendUpdatePartitionInfoRpcCall( final SimpleSlot consumerSlot, final UpdatePartitionInfo updatePartitionInfo) { if (consumerSlot != null) { final Instance instance = consumerSlot.getInstance(); final ActorGateway gateway = instance.getActorGateway(); Future<Object> futureUpdate = gateway.ask(updatePartitionInfo, timeout); futureUpdate.onFailure( new OnFailure() { @Override public void onFailure(Throwable failure) throws Throwable { fail( new IllegalStateException( "Update task on instance " + instance + " failed due to:", failure)); } }, executionContext); } }
@Override public String handleJsonRequest( Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager) throws Exception { try { if (jobManager != null) { // whether one task manager's metrics are requested, or all task manager, we // return them in an array. This avoids unnecessary code complexity. // If only one task manager is requested, we only fetch one task manager metrics. final List<Instance> instances = new ArrayList<>(); if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { try { InstanceID instanceID = new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY))); Future<Object> future = jobManager.ask( new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout); TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout); if (instance.instance().nonEmpty()) { instances.add(instance.instance().get()); } } // this means the id string was invalid. Keep the list empty. catch (IllegalArgumentException e) { // do nothing. } } else { Future<Object> future = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); RegisteredTaskManagers taskManagers = (RegisteredTaskManagers) Await.result(future, timeout); instances.addAll(taskManagers.asJavaCollection()); } StringWriter writer = new StringWriter(); JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer); gen.writeStartObject(); gen.writeArrayFieldStart("taskmanagers"); for (Instance instance : instances) { gen.writeStartObject(); gen.writeStringField("id", instance.getId().toString()); gen.writeStringField("path", instance.getActorGateway().path()); gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort()); gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat()); gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots()); gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots()); gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores()); gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory()); gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap()); gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory()); // only send metrics when only one task manager requests them. if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) { byte[] report = instance.getLastMetricsReport(); if (report != null) { gen.writeFieldName("metrics"); gen.writeRawValue(new String(report, "utf-8")); } } gen.writeEndObject(); } gen.writeEndArray(); gen.writeEndObject(); gen.close(); return writer.toString(); } else { throw new Exception("No connection to the leading JobManager."); } } catch (Exception e) { throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e); } }
public void deployToSlot(final SimpleSlot slot) throws JobException { // sanity checks if (slot == null) { throw new NullPointerException(); } if (!slot.isAlive()) { throw new JobException("Target slot for deployment is not alive."); } // make sure exactly one deployment call happens from the correct state // note: the transition from CREATED to DEPLOYING is for testing purposes only ExecutionState previous = this.state; if (previous == SCHEDULED || previous == CREATED) { if (!transitionState(previous, DEPLOYING)) { // race condition, someone else beat us to the deploying call. // this should actually not happen and indicates a race somewhere else throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race."); } } else { // vertex may have been cancelled, or it was already scheduled throw new IllegalStateException( "The vertex must be in CREATED or SCHEDULED state to be deployed. Found state " + previous); } try { // good, we are allowed to deploy if (!slot.setExecutedVertex(this)) { throw new JobException("Could not assign the ExecutionVertex to the slot " + slot); } this.assignedResource = slot; this.assignedResourceLocation = slot.getInstance().getInstanceConnectionInfo(); // race double check, did we fail/cancel and do we need to release the slot? if (this.state != DEPLOYING) { slot.releaseSlot(); return; } if (LOG.isInfoEnabled()) { LOG.info( String.format( "Deploying %s (attempt #%d) to %s", vertex.getSimpleName(), attemptNumber, slot.getInstance().getInstanceConnectionInfo().getHostname())); } final TaskDeploymentDescriptor deployment = vertex.createDeploymentDescriptor( attemptId, slot, operatorState, recoveryTimestamp, attemptNumber); // register this execution at the execution graph, to receive call backs vertex.getExecutionGraph().registerExecution(this); final Instance instance = slot.getInstance(); final ActorGateway gateway = instance.getActorGateway(); final Future<Object> deployAction = gateway.ask(new SubmitTask(deployment), timeout); deployAction.onComplete( new OnComplete<Object>() { @Override public void onComplete(Throwable failure, Object success) throws Throwable { if (failure != null) { if (failure instanceof TimeoutException) { String taskname = deployment.getTaskInfo().getTaskNameWithSubtasks() + " (" + attemptId + ')'; markFailed( new Exception( "Cannot deploy task " + taskname + " - TaskManager (" + instance + ") not responding after a timeout of " + timeout, failure)); } else { markFailed(failure); } } else { if (!(success.equals(Messages.getAcknowledge()))) { markFailed( new Exception( "Failed to deploy the task to slot " + slot + ": Response was not of type Acknowledge")); } } } }, executionContext); } catch (Throwable t) { markFailed(t); ExceptionUtils.rethrow(t); } }