예제 #1
0
  @Override
  public void releaseSlot() {

    // try to transition to the CANCELED state. That state marks
    // that the releasing is in progress
    if (markCancelled()) {

      // kill all tasks currently running in this slot
      Execution exec = this.executedTask;
      if (exec != null && !exec.isFinished()) {
        exec.fail(
            new Exception(
                "The slot in which the task was executed has been released. Probably loss of TaskManager "
                    + getInstance()));
      }

      // release directly (if we are directly allocated),
      // otherwise release through the parent shared slot
      if (getParent() == null) {
        // we have to give back the slot to the owning instance
        getInstance().returnAllocatedSlot(this);
      } else {
        // we have to ask our parent to dispose us
        getParent().releaseChild(this);
      }
    }
  }
  @Test
  public void testRegistrationOfExecutionsFailingFinalize() {
    try {

      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();

      JobVertex v1 = new FailingFinalizeJobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);

      Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 6, v2, 4);

      List<Execution> execList = new ArrayList<Execution>();
      execList.addAll(executions.values());
      // sort executions by job vertex. Failing job vertex first
      Collections.sort(
          execList,
          new Comparator<Execution>() {
            @Override
            public int compare(Execution o1, Execution o2) {
              return o1.getVertex().getSimpleName().compareTo(o2.getVertex().getSimpleName());
            }
          });

      int cnt = 0;
      for (Execution e : execList) {
        cnt++;
        e.markFinished();
        if (cnt <= 6) {
          // the last execution of the first job vertex triggers the failing finalize hook
          assertEquals(ExecutionState.FINISHED, e.getState());
        } else {
          // all following executions should be canceled
          assertEquals(ExecutionState.CANCELED, e.getState());
        }
      }

      assertEquals(0, executions.size());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testRegistrationOfExecutionsFinishing() {
    try {
      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();

      JobVertex v1 = new JobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);

      Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 7650, v2, 2350);

      for (Execution e : executions.values()) {
        e.markFinished();
      }

      assertEquals(0, executions.size());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testRegistrationOfExecutionsFailedExternally() {
    try {

      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();

      JobVertex v1 = new JobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);

      Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 7, v2, 6);

      for (Execution e : executions.values()) {
        e.fail(null);
      }

      assertEquals(0, executions.size());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
  @Test
  public void testRegistrationOfExecutionsCanceled() {
    try {

      final JobVertexID jid1 = new JobVertexID();
      final JobVertexID jid2 = new JobVertexID();

      JobVertex v1 = new JobVertex("v1", jid1);
      JobVertex v2 = new JobVertex("v2", jid2);

      Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 19, v2, 37);

      for (Execution e : executions.values()) {
        e.cancel();
        e.cancelingComplete();
      }

      assertEquals(0, executions.size());
    } catch (Exception e) {
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
예제 #6
0
  void scheduleOrUpdateConsumers(List<List<ExecutionEdge>> allConsumers) {
    final int numConsumers = allConsumers.size();

    if (numConsumers > 1) {
      fail(
          new IllegalStateException(
              "Currently, only a single consumer group per partition is supported."));
    } else if (numConsumers == 0) {
      return;
    }

    for (ExecutionEdge edge : allConsumers.get(0)) {
      final ExecutionVertex consumerVertex = edge.getTarget();

      final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
      final ExecutionState consumerState = consumer.getState();

      final IntermediateResultPartition partition = edge.getSource();

      // ----------------------------------------------------------------
      // Consumer is created => try to deploy and cache input channel
      // descriptors if there is a deployment race
      // ----------------------------------------------------------------
      if (consumerState == CREATED) {
        final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();

        consumerVertex.cachePartitionInfo(
            PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));

        // When deploying a consuming task, its task deployment descriptor will contain all
        // deployment information available at the respective time. It is possible that some
        // of the partitions to be consumed have not been created yet. These are updated
        // runtime via the update messages.
        //
        // TODO The current approach may send many update messages even though the consuming
        // task has already been deployed with all necessary information. We have to check
        // whether this is a problem and fix it, if it is.
        future(
            new Callable<Boolean>() {
              @Override
              public Boolean call() throws Exception {
                try {
                  consumerVertex.scheduleForExecution(
                      consumerVertex.getExecutionGraph().getScheduler(),
                      consumerVertex.getExecutionGraph().isQueuedSchedulingAllowed());
                } catch (Throwable t) {
                  fail(
                      new IllegalStateException(
                          "Could not schedule consumer " + "vertex " + consumerVertex, t));
                }

                return true;
              }
            },
            executionContext);

        // double check to resolve race conditions
        if (consumerVertex.getExecutionState() == RUNNING) {
          consumerVertex.sendPartitionInfos();
        }
      }
      // ----------------------------------------------------------------
      // Consumer is running => send update message now
      // ----------------------------------------------------------------
      else {
        if (consumerState == RUNNING) {
          final SimpleSlot consumerSlot = consumer.getAssignedResource();

          if (consumerSlot == null) {
            // The consumer has been reset concurrently
            continue;
          }

          final Instance consumerInstance = consumerSlot.getInstance();

          final ResultPartitionID partitionId =
              new ResultPartitionID(partition.getPartitionId(), attemptId);

          final Instance partitionInstance =
              partition.getProducer().getCurrentAssignedResource().getInstance();

          final ResultPartitionLocation partitionLocation;

          if (consumerInstance.equals(partitionInstance)) {
            // Consuming task is deployed to the same instance as the partition => local
            partitionLocation = ResultPartitionLocation.createLocal();
          } else {
            // Different instances => remote
            final ConnectionID connectionId =
                new ConnectionID(
                    partitionInstance.getInstanceConnectionInfo(),
                    partition.getIntermediateResult().getConnectionIndex());

            partitionLocation = ResultPartitionLocation.createRemote(connectionId);
          }

          final InputChannelDeploymentDescriptor descriptor =
              new InputChannelDeploymentDescriptor(partitionId, partitionLocation);

          final UpdatePartitionInfo updateTaskMessage =
              new UpdateTaskSinglePartitionInfo(
                  consumer.getAttemptId(), partition.getIntermediateResult().getId(), descriptor);

          sendUpdatePartitionInfoRpcCall(consumerSlot, updateTaskMessage);
        }
        // ----------------------------------------------------------------
        // Consumer is scheduled or deploying => cache input channel
        // deployment descriptors and send update message later
        // ----------------------------------------------------------------
        else if (consumerState == SCHEDULED || consumerState == DEPLOYING) {
          final Execution partitionExecution = partition.getProducer().getCurrentExecutionAttempt();

          consumerVertex.cachePartitionInfo(
              PartialInputChannelDeploymentDescriptor.fromEdge(partition, partitionExecution));

          // double check to resolve race conditions
          if (consumerVertex.getExecutionState() == RUNNING) {
            consumerVertex.sendPartitionInfos();
          }
        }
      }
    }
  }
예제 #7
0
  public boolean restoreLatestCheckpointedState(
      Map<JobVertexID, ExecutionJobVertex> tasks,
      boolean errorIfNoCheckpoint,
      boolean allOrNothingState)
      throws Exception {

    synchronized (lock) {
      if (shutdown) {
        throw new IllegalStateException("CheckpointCoordinator is shut down");
      }

      // Recover the checkpoints
      completedCheckpointStore.recover();

      // restore from the latest checkpoint
      CompletedCheckpoint latest = completedCheckpointStore.getLatestCheckpoint();

      if (latest == null) {
        if (errorIfNoCheckpoint) {
          throw new IllegalStateException("No completed checkpoint available");
        } else {
          return false;
        }
      }

      long recoveryTimestamp = System.currentTimeMillis();

      for (Map.Entry<JobVertexID, TaskState> taskGroupStateEntry :
          latest.getTaskStates().entrySet()) {
        TaskState taskState = taskGroupStateEntry.getValue();
        ExecutionJobVertex executionJobVertex = tasks.get(taskGroupStateEntry.getKey());

        if (executionJobVertex != null) {
          // check that we only restore the state if the parallelism has not been changed
          if (taskState.getParallelism() != executionJobVertex.getParallelism()) {
            throw new RuntimeException(
                "Cannot restore the latest checkpoint because "
                    + "the parallelism changed. The operator"
                    + executionJobVertex.getJobVertexId()
                    + " has parallelism "
                    + executionJobVertex.getParallelism()
                    + " whereas the corresponding"
                    + "state object has a parallelism of "
                    + taskState.getParallelism());
          }

          int counter = 0;

          List<Set<Integer>> keyGroupPartitions =
              createKeyGroupPartitions(numberKeyGroups, executionJobVertex.getParallelism());

          for (int i = 0; i < executionJobVertex.getParallelism(); i++) {
            SubtaskState subtaskState = taskState.getState(i);
            SerializedValue<StateHandle<?>> state = null;

            if (subtaskState != null) {
              // count the number of executions for which we set a state
              counter++;
              state = subtaskState.getState();
            }

            Map<Integer, SerializedValue<StateHandle<?>>> kvStateForTaskMap =
                taskState.getUnwrappedKvStates(keyGroupPartitions.get(i));

            Execution currentExecutionAttempt =
                executionJobVertex.getTaskVertices()[i].getCurrentExecutionAttempt();
            currentExecutionAttempt.setInitialState(state, kvStateForTaskMap, recoveryTimestamp);
          }

          if (allOrNothingState && counter > 0 && counter < executionJobVertex.getParallelism()) {
            throw new IllegalStateException(
                "The checkpoint contained state only for "
                    + "a subset of tasks for vertex "
                    + executionJobVertex);
          }
        } else {
          throw new IllegalStateException(
              "There is no execution job vertex for the job"
                  + " vertex ID "
                  + taskGroupStateEntry.getKey());
        }
      }

      return true;
    }
  }
예제 #8
0
  /**
   * Receives an AcknowledgeCheckpoint message and returns whether the message was associated with a
   * pending checkpoint.
   *
   * @param message Checkpoint ack from the task manager
   * @return Flag indicating whether the ack'd checkpoint was associated with a pending checkpoint.
   * @throws Exception If the checkpoint cannot be added to the completed checkpoint store.
   */
  public boolean receiveAcknowledgeMessage(AcknowledgeCheckpoint message) throws Exception {
    if (shutdown || message == null) {
      return false;
    }
    if (!job.equals(message.getJob())) {
      LOG.error("Received AcknowledgeCheckpoint message for wrong job: {}", message);
      return false;
    }

    final long checkpointId = message.getCheckpointId();

    CompletedCheckpoint completed = null;
    PendingCheckpoint checkpoint;

    // Flag indicating whether the ack message was for a known pending
    // checkpoint.
    boolean isPendingCheckpoint;

    synchronized (lock) {
      // we need to check inside the lock for being shutdown as well, otherwise we
      // get races and invalid error log messages
      if (shutdown) {
        return false;
      }

      checkpoint = pendingCheckpoints.get(checkpointId);

      if (checkpoint != null && !checkpoint.isDiscarded()) {
        isPendingCheckpoint = true;

        if (checkpoint.acknowledgeTask(
            message.getTaskExecutionId(),
            message.getState(),
            message.getStateSize(),
            null)) { // TODO: Give KV-state to the acknowledgeTask method
          if (checkpoint.isFullyAcknowledged()) {
            completed = checkpoint.toCompletedCheckpoint();

            completedCheckpointStore.addCheckpoint(completed);

            LOG.info(
                "Completed checkpoint "
                    + checkpointId
                    + " (in "
                    + completed.getDuration()
                    + " ms)");

            if (LOG.isDebugEnabled()) {
              StringBuilder builder = new StringBuilder();
              for (Map.Entry<JobVertexID, TaskState> entry : completed.getTaskStates().entrySet()) {
                builder
                    .append("JobVertexID: ")
                    .append(entry.getKey())
                    .append(" {")
                    .append(entry.getValue())
                    .append("}");
              }

              LOG.debug(builder.toString());
            }

            pendingCheckpoints.remove(checkpointId);
            rememberRecentCheckpointId(checkpointId);

            dropSubsumedCheckpoints(completed.getTimestamp());

            onFullyAcknowledgedCheckpoint(completed);

            triggerQueuedRequests();
          }
        } else {
          // checkpoint did not accept message
          LOG.error(
              "Received duplicate or invalid acknowledge message for checkpoint "
                  + checkpointId
                  + " , task "
                  + message.getTaskExecutionId());
        }
      } else if (checkpoint != null) {
        // this should not happen
        throw new IllegalStateException(
            "Received message for discarded but non-removed checkpoint " + checkpointId);
      } else {
        // message is for an unknown checkpoint, or comes too late (checkpoint disposed)
        if (recentPendingCheckpoints.contains(checkpointId)) {
          isPendingCheckpoint = true;
          LOG.warn("Received late message for now expired checkpoint attempt " + checkpointId);
        } else {
          isPendingCheckpoint = false;
        }
      }
    }

    // send the confirmation messages to the necessary targets. we do this here
    // to be outside the lock scope
    if (completed != null) {
      final long timestamp = completed.getTimestamp();

      for (ExecutionVertex ev : tasksToCommitTo) {
        Execution ee = ev.getCurrentExecutionAttempt();
        if (ee != null) {
          ExecutionAttemptID attemptId = ee.getAttemptId();
          NotifyCheckpointComplete notifyMessage =
              new NotifyCheckpointComplete(job, attemptId, checkpointId, timestamp);
          ev.sendMessageToCurrentExecution(notifyMessage, ee.getAttemptId());
        }
      }

      statsTracker.onCompletedCheckpoint(completed);
    }

    return isPendingCheckpoint;
  }
예제 #9
0
  /**
   * Triggers a new checkpoint and uses the given timestamp as the checkpoint timestamp.
   *
   * @param timestamp The timestamp for the checkpoint.
   * @param nextCheckpointId The checkpoint ID to use for this checkpoint or <code>-1</code> if the
   *     checkpoint ID counter should be queried.
   */
  public boolean triggerCheckpoint(long timestamp, long nextCheckpointId) throws Exception {
    // make some eager pre-checks
    synchronized (lock) {
      // abort if the coordinator has been shutdown in the meantime
      if (shutdown) {
        return false;
      }

      // sanity check: there should never be more than one trigger request queued
      if (triggerRequestQueued) {
        LOG.warn("Trying to trigger another checkpoint while one was queued already");
        return false;
      }

      // if too many checkpoints are currently in progress, we need to mark that a request is queued
      if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
        triggerRequestQueued = true;
        if (currentPeriodicTrigger != null) {
          currentPeriodicTrigger.cancel();
          currentPeriodicTrigger = null;
        }
        return false;
      }

      // make sure the minimum interval between checkpoints has passed
      if (lastTriggeredCheckpoint + minPauseBetweenCheckpoints > timestamp) {
        if (currentPeriodicTrigger != null) {
          currentPeriodicTrigger.cancel();
          currentPeriodicTrigger = null;
        }
        ScheduledTrigger trigger = new ScheduledTrigger();
        timer.scheduleAtFixedRate(trigger, minPauseBetweenCheckpoints, baseInterval);
        return false;
      }
    }

    // first check if all tasks that we need to trigger are running.
    // if not, abort the checkpoint
    ExecutionAttemptID[] triggerIDs = new ExecutionAttemptID[tasksToTrigger.length];
    for (int i = 0; i < tasksToTrigger.length; i++) {
      Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
      if (ee != null && ee.getState() == ExecutionState.RUNNING) {
        triggerIDs[i] = ee.getAttemptId();
      } else {
        LOG.info(
            "Checkpoint triggering task {} is not being executed at the moment. Aborting checkpoint.",
            tasksToTrigger[i].getSimpleName());
        return false;
      }
    }

    // next, check if all tasks that need to acknowledge the checkpoint are running.
    // if not, abort the checkpoint
    Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);

    for (ExecutionVertex ev : tasksToWaitFor) {
      Execution ee = ev.getCurrentExecutionAttempt();
      if (ee != null) {
        ackTasks.put(ee.getAttemptId(), ev);
      } else {
        LOG.info(
            "Checkpoint acknowledging task {} is not being executed at the moment. Aborting checkpoint.",
            ev.getSimpleName());
        return false;
      }
    }

    // we will actually trigger this checkpoint!

    lastTriggeredCheckpoint = timestamp;
    final long checkpointID;
    if (nextCheckpointId < 0) {
      try {
        // this must happen outside the locked scope, because it communicates
        // with external services (in HA mode) and may block for a while.
        checkpointID = checkpointIdCounter.getAndIncrement();
      } catch (Throwable t) {
        int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers;
        LOG.warn(
            "Failed to trigger checkpoint ("
                + numUnsuccessful
                + " consecutive failed attempts so far)",
            t);
        return false;
      }
    } else {
      checkpointID = nextCheckpointId;
    }

    LOG.info("Triggering checkpoint " + checkpointID + " @ " + timestamp);

    final PendingCheckpoint checkpoint =
        new PendingCheckpoint(job, checkpointID, timestamp, ackTasks);

    // schedule the timer that will clean up the expired checkpoints
    TimerTask canceller =
        new TimerTask() {
          @Override
          public void run() {
            try {
              synchronized (lock) {
                // only do the work if the checkpoint is not discarded anyways
                // note that checkpoint completion discards the pending checkpoint object
                if (!checkpoint.isDiscarded()) {
                  LOG.info("Checkpoint " + checkpointID + " expired before completing.");

                  checkpoint.discard(userClassLoader);
                  pendingCheckpoints.remove(checkpointID);
                  rememberRecentCheckpointId(checkpointID);

                  onCancelCheckpoint(checkpointID);

                  triggerQueuedRequests();
                }
              }
            } catch (Throwable t) {
              LOG.error("Exception while handling checkpoint timeout", t);
            }
          }
        };

    try {
      // re-acquire the lock
      synchronized (lock) {
        // since we released the lock in the meantime, we need to re-check
        // that the conditions still hold. this is clumsy, but it allows us to
        // release the lock in the meantime while calls to external services are
        // blocking progress, and still gives us early checks that skip work
        // if no checkpoint can happen anyways
        if (shutdown) {
          return false;
        } else if (triggerRequestQueued) {
          LOG.warn("Trying to trigger another checkpoint while one was queued already");
          return false;
        } else if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
          triggerRequestQueued = true;
          if (currentPeriodicTrigger != null) {
            currentPeriodicTrigger.cancel();
            currentPeriodicTrigger = null;
          }
          return false;
        }

        pendingCheckpoints.put(checkpointID, checkpoint);
        timer.schedule(canceller, checkpointTimeout);
      }
      // end of lock scope

      // send the messages to the tasks that trigger their checkpoint
      for (int i = 0; i < tasksToTrigger.length; i++) {
        ExecutionAttemptID id = triggerIDs[i];
        TriggerCheckpoint message = new TriggerCheckpoint(job, id, checkpointID, timestamp);
        tasksToTrigger[i].sendMessageToCurrentExecution(message, id);
      }

      numUnsuccessfulCheckpointsTriggers = 0;
      return true;
    } catch (Throwable t) {
      // guard the map against concurrent modifications
      synchronized (lock) {
        pendingCheckpoints.remove(checkpointID);
      }

      int numUnsuccessful = ++numUnsuccessfulCheckpointsTriggers;
      LOG.warn(
          "Failed to trigger checkpoint ("
              + numUnsuccessful
              + " consecutive failed attempts so far)",
          t);
      if (!checkpoint.isDiscarded()) {
        checkpoint.discard(userClassLoader);
      }
      return false;
    }
  }
예제 #10
0
  public void restoreLatestCheckpointedState(
      Map<JobVertexID, ExecutionJobVertex> tasks,
      boolean errorIfNoCheckpoint,
      boolean allOrNothingState)
      throws Exception {

    synchronized (lock) {
      if (shutdown) {
        throw new IllegalStateException("CheckpointCoordinator is shut down");
      }

      // Recover the checkpoints
      completedCheckpointStore.recover();

      // restore from the latest checkpoint
      CompletedCheckpoint latest = completedCheckpointStore.getLatestCheckpoint();

      if (latest == null) {
        if (errorIfNoCheckpoint) {
          throw new IllegalStateException("No completed checkpoint available");
        } else {
          return;
        }
      }

      long recoveryTimestamp = System.currentTimeMillis();

      if (allOrNothingState) {
        Map<ExecutionJobVertex, Integer> stateCounts = new HashMap<ExecutionJobVertex, Integer>();

        for (StateForTask state : latest.getStates()) {
          ExecutionJobVertex vertex = tasks.get(state.getOperatorId());
          Execution exec =
              vertex.getTaskVertices()[state.getSubtask()].getCurrentExecutionAttempt();
          exec.setInitialState(state.getState(), recoveryTimestamp);

          Integer count = stateCounts.get(vertex);
          if (count != null) {
            stateCounts.put(vertex, count + 1);
          } else {
            stateCounts.put(vertex, 1);
          }
        }

        // validate that either all task vertices have state, or none
        for (Map.Entry<ExecutionJobVertex, Integer> entry : stateCounts.entrySet()) {
          ExecutionJobVertex vertex = entry.getKey();
          if (entry.getValue() != vertex.getParallelism()) {
            throw new IllegalStateException(
                "The checkpoint contained state only for a subset of tasks for vertex " + vertex);
          }
        }
      } else {
        for (StateForTask state : latest.getStates()) {
          ExecutionJobVertex vertex = tasks.get(state.getOperatorId());
          Execution exec =
              vertex.getTaskVertices()[state.getSubtask()].getCurrentExecutionAttempt();
          exec.setInitialState(state.getState(), recoveryTimestamp);
        }
      }
    }
  }