Exemplo n.º 1
0
  void markFinished(
      Map<AccumulatorRegistry.Metric, Accumulator<?, ?>> flinkAccumulators,
      Map<String, Accumulator<?, ?>> userAccumulators) {

    // this call usually comes during RUNNING, but may also come while still in deploying (very fast
    // tasks!)
    while (true) {
      ExecutionState current = this.state;

      if (current == RUNNING || current == DEPLOYING) {

        if (transitionState(current, FINISHED)) {
          try {
            for (IntermediateResultPartition finishedPartition :
                getVertex().finishAllBlockingPartitions()) {

              IntermediateResultPartition[] allPartitions =
                  finishedPartition.getIntermediateResult().getPartitions();

              for (IntermediateResultPartition partition : allPartitions) {
                scheduleOrUpdateConsumers(partition.getConsumers());
              }
            }

            synchronized (accumulatorLock) {
              this.flinkAccumulators = flinkAccumulators;
              this.userAccumulators = userAccumulators;
            }

            assignedResource.releaseSlot();
            vertex.getExecutionGraph().deregisterExecution(this);
          } finally {
            vertex.executionFinished();
          }
          return;
        }
      } else if (current == CANCELING) {
        // we sent a cancel call, and the task manager finished before it arrived. We
        // will never get a CANCELED call back from the job manager
        cancelingComplete();
        return;
      } else if (current == CANCELED || current == FAILED) {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Task FINISHED, but concurrently went to state " + state);
        }
        return;
      } else {
        // this should not happen, we need to fail this
        markFailed(new Exception("Vertex received FINISHED message while being in state " + state));
        return;
      }
    }
  }
Exemplo n.º 2
0
  void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
    final int numConsumers = allConsumers.size();

    if (numConsumers > 1) {
      fail(
          new IllegalStateException(
              "Currently, only a single consumer group per partition is supported."));
    } else if (numConsumers == 0) {
      return;
    }

    for (ExecutionEdge edge : allConsumers.get(0)) {
      final ExecutionVertex consumerVertex = edge.getTarget();

      final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
      final ExecutionState consumerState = consumer.getState();

      final IntermediateResultPartition partition = edge.getSource();

      // ----------------------------------------------------------------
      // Consumer is created => try to deploy and cache input channel
      // descriptors if there is a deployment race
      // ----------------------------------------------------------------
      if (consumerState == CREATED) {
        final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();

        consumerVertex.cachePartitionInfo(
            PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));

        // When deploying a consuming task, its task deployment descriptor will contain all
        // deployment information available at the respective time. It is possible that some
        // of the partitions to be consumed have not been created yet. These are updated
        // runtime via the update messages.
        //
        // TODO The current approach may send many update messages even though the consuming
        // task has already been deployed with all necessary information. We have to check
        // whether this is a problem and fix it, if it is.
        future(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                try {
                  consumerVertex.scheduleForExecution(
                      consumerVertex.getExecutionGraph().getScheduler(),
                      consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
                } catch (Throwable t) {
                  fail(
                      new IllegalStateException(
                          "Could not schedule consumer " + "vertex " + consumerVertex, t));
                }

                return true;
              }
            },
            executionContext);

        // double check to resolve race conditions
        if (consumerVertex.getExecutionState() == RUNNING) {
          consumerVertex.sendPartitionInfos();
        }
      }
      // ----------------------------------------------------------------
      // Consumer is running => send update message now
      // ----------------------------------------------------------------
      else {
        if (consumerState == RUNNING) {
          final SimpleSlot consumerSlot = consumer.getAssignedResource();

          if (consumerSlot == null) {
            // The consumer has been reset concurrently
            continue;
          }

          final Instance consumerInstance = consumerSlot.getInstance();

          final ResultPartitionID partitionId =
              new ResultPartitionID(partition.getPartitionId(), attemptId);

          final Instance partitionInstance =
              partition.getProducer().getCurrentAssignedResource().getInstance();

          final ResultPartitionLocation partitionLocation;

          if (consumerInstance.equals(partitionInstance)) {
            // Consuming task is deployed to the same instance as the partition => local
            partitionLocation = ResultPartitionLocation.createLocal();
          } else {
            // Different instances => remote
            final ConnectionID connectionId =
                new ConnectionID(
                    partitionInstance.getInstanceConnectionInfo(),
                    partition.getIntermediateResult().getConnectionIndex());

            partitionLocation = ResultPartitionLocation.createRemote(connectionId);
          }

          final InputChannelDeploymentDescriptor descriptor =
              new InputChannelDeploymentDescriptor(partitionId, partitionLocation);

          final UpdatePartitionInfo updateTaskMessage =
              new UpdateTaskSinglePartitionInfo(
                  consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor);

          sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage);
        }
        // ----------------------------------------------------------------
        // Consumer is scheduled or deploying => cache input channel
        // deployment descriptors and send update message later
        // ----------------------------------------------------------------
        else if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
          final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();

          consumerVertex.cachePartitionInfo(
              PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));

          // double check to resolve race conditions
          if (consumerVertex.getExecutionState() == RUNNING) {
            consumerVertex.sendPartitionInfos();
          }
        }
      }
    }
  }