コード例 #1
0
ファイル: Execution.java プロジェクト: f-sander/flink
  private void sendFailIntermediateResultPartitionsRpcCall() {
    final SimpleSlot slot = this.assignedResource;

    if (slot != null) {
      final Instance instance = slot.getInstance();

      if (instance.isAlive()) {
        final ActorGateway gateway = instance.getActorGateway();

        // TODO For some tests this could be a problem when querying too early if all resources were
        // released
        gateway.tell(new FailIntermediateResultPartitions(attemptId));
      }
    }
  }
コード例 #2
0
ファイル: Execution.java プロジェクト: f-sander/flink
  /**
   * Sends an UpdatePartitionInfo message to the instance of the consumerSlot.
   *
   * @param consumerSlot Slot to whose instance the message will be sent
   * @param updatePartitionInfo UpdatePartitionInfo message
   */
  private void sendUpdatePartitionInfoRpcCall(
      final SimpleSlot consumerSlot, final UpdatePartitionInfo updatePartitionInfo) {

    if (consumerSlot != null) {
      final Instance instance = consumerSlot.getInstance();
      final ActorGateway gateway = instance.getActorGateway();

      Future<Object> futureUpdate = gateway.ask(updatePartitionInfo, timeout);

      futureUpdate.onFailure(
          new OnFailure() {
            @Override
            public void onFailure(Throwable failure) throws Throwable {
              fail(
                  new IllegalStateException(
                      "Update task on instance " + instance + " failed due to:", failure));
            }
          },
          executionContext);
    }
  }
コード例 #3
0
  @Override
  public String handleJsonRequest(
      Map<String, String> pathParams, Map<String, String> queryParams, ActorGateway jobManager)
      throws Exception {
    try {
      if (jobManager != null) {
        // whether one task manager's metrics are requested, or all task manager, we
        // return them in an array. This avoids unnecessary code complexity.
        // If only one task manager is requested, we only fetch one task manager metrics.
        final List<Instance> instances = new ArrayList<>();
        if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
          try {
            InstanceID instanceID =
                new InstanceID(StringUtils.hexStringToByte(pathParams.get(TASK_MANAGER_ID_KEY)));
            Future<Object> future =
                jobManager.ask(
                    new JobManagerMessages.RequestTaskManagerInstance(instanceID), timeout);
            TaskManagerInstance instance = (TaskManagerInstance) Await.result(future, timeout);
            if (instance.instance().nonEmpty()) {
              instances.add(instance.instance().get());
            }
          }
          // this means the id string was invalid. Keep the list empty.
          catch (IllegalArgumentException e) {
            // do nothing.
          }
        } else {
          Future<Object> future =
              jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout);
          RegisteredTaskManagers taskManagers =
              (RegisteredTaskManagers) Await.result(future, timeout);
          instances.addAll(taskManagers.asJavaCollection());
        }

        StringWriter writer = new StringWriter();
        JsonGenerator gen = JsonFactory.jacksonFactory.createGenerator(writer);

        gen.writeStartObject();
        gen.writeArrayFieldStart("taskmanagers");

        for (Instance instance : instances) {
          gen.writeStartObject();
          gen.writeStringField("id", instance.getId().toString());
          gen.writeStringField("path", instance.getActorGateway().path());
          gen.writeNumberField("dataPort", instance.getTaskManagerLocation().dataPort());
          gen.writeNumberField("timeSinceLastHeartbeat", instance.getLastHeartBeat());
          gen.writeNumberField("slotsNumber", instance.getTotalNumberOfSlots());
          gen.writeNumberField("freeSlots", instance.getNumberOfAvailableSlots());
          gen.writeNumberField("cpuCores", instance.getResources().getNumberOfCPUCores());
          gen.writeNumberField("physicalMemory", instance.getResources().getSizeOfPhysicalMemory());
          gen.writeNumberField("freeMemory", instance.getResources().getSizeOfJvmHeap());
          gen.writeNumberField("managedMemory", instance.getResources().getSizeOfManagedMemory());

          // only send metrics when only one task manager requests them.
          if (pathParams.containsKey(TASK_MANAGER_ID_KEY)) {
            byte[] report = instance.getLastMetricsReport();
            if (report != null) {
              gen.writeFieldName("metrics");
              gen.writeRawValue(new String(report, "utf-8"));
            }
          }

          gen.writeEndObject();
        }

        gen.writeEndArray();
        gen.writeEndObject();

        gen.close();
        return writer.toString();
      } else {
        throw new Exception("No connection to the leading JobManager.");
      }
    } catch (Exception e) {
      throw new RuntimeException("Failed to fetch list of all task managers: " + e.getMessage(), e);
    }
  }
コード例 #4
0
ファイル: Execution.java プロジェクト: f-sander/flink
  public void deployToSlot(final SimpleSlot slot) throws JobException {
    // sanity checks
    if (slot == null) {
      throw new NullPointerException();
    }
    if (!slot.isAlive()) {
      throw new JobException("Target slot for deployment is not alive.");
    }

    // make sure exactly one deployment call happens from the correct state
    // note: the transition from CREATED to DEPLOYING is for testing purposes only
    ExecutionState previous = this.state;
    if (previous == SCHEDULED || previous == CREATED) {
      if (!transitionState(previous, DEPLOYING)) {
        // race condition, someone else beat us to the deploying call.
        // this should actually not happen and indicates a race somewhere else
        throw new IllegalStateException("Cannot deploy task: Concurrent deployment call race.");
      }
    } else {
      // vertex may have been cancelled, or it was already scheduled
      throw new IllegalStateException(
          "The vertex must be in CREATED or SCHEDULED state to be deployed. Found state "
              + previous);
    }

    try {
      // good, we are allowed to deploy
      if (!slot.setExecutedVertex(this)) {
        throw new JobException("Could not assign the ExecutionVertex to the slot " + slot);
      }
      this.assignedResource = slot;
      this.assignedResourceLocation = slot.getInstance().getInstanceConnectionInfo();

      // race double check, did we fail/cancel and do we need to release the slot?
      if (this.state != DEPLOYING) {
        slot.releaseSlot();
        return;
      }

      if (LOG.isInfoEnabled()) {
        LOG.info(
            String.format(
                "Deploying %s (attempt #%d) to %s",
                vertex.getSimpleName(),
                attemptNumber,
                slot.getInstance().getInstanceConnectionInfo().getHostname()));
      }

      final TaskDeploymentDescriptor deployment =
          vertex.createDeploymentDescriptor(
              attemptId, slot, operatorState, recoveryTimestamp, attemptNumber);

      // register this execution at the execution graph, to receive call backs
      vertex.getExecutionGraph().registerExecution(this);

      final Instance instance = slot.getInstance();
      final ActorGateway gateway = instance.getActorGateway();

      final Future<Object> deployAction = gateway.ask(new SubmitTask(deployment), timeout);

      deployAction.onComplete(
          new OnComplete<Object>() {

            @Override
            public void onComplete(Throwable failure, Object success) throws Throwable {
              if (failure != null) {
                if (failure instanceof TimeoutException) {
                  String taskname =
                      deployment.getTaskInfo().getTaskNameWithSubtasks() + " (" + attemptId + ')';

                  markFailed(
                      new Exception(
                          "Cannot deploy task "
                              + taskname
                              + " - TaskManager ("
                              + instance
                              + ") not responding after a timeout of "
                              + timeout,
                          failure));
                } else {
                  markFailed(failure);
                }
              } else {
                if (!(success.equals(Messages.getAcknowledge()))) {
                  markFailed(
                      new Exception(
                          "Failed to deploy the task to slot "
                              + slot
                              + ": Response was not of type Acknowledge"));
                }
              }
            }
          },
          executionContext);
    } catch (Throwable t) {
      markFailed(t);
      ExceptionUtils.rethrow(t);
    }
  }