示例#1
0
  public SqlQueryScheduler(
      QueryStateMachine queryStateMachine,
      LocationFactory locationFactory,
      StageExecutionPlan plan,
      NodePartitioningManager nodePartitioningManager,
      NodeScheduler nodeScheduler,
      RemoteTaskFactory remoteTaskFactory,
      Session session,
      boolean summarizeTaskInfo,
      int splitBatchSize,
      ExecutorService executor,
      OutputBuffers rootOutputBuffers,
      NodeTaskMap nodeTaskMap,
      ExecutionPolicy executionPolicy) {
    this.queryStateMachine = requireNonNull(queryStateMachine, "queryStateMachine is null");
    this.executionPolicy = requireNonNull(executionPolicy, "schedulerPolicyFactory is null");
    this.summarizeTaskInfo = summarizeTaskInfo;

    // todo come up with a better way to build this, or eliminate this map
    ImmutableMap.Builder<StageId, StageScheduler> stageSchedulers = ImmutableMap.builder();
    ImmutableMap.Builder<StageId, StageLinkage> stageLinkages = ImmutableMap.builder();

    // Only fetch a distribution once per query to assure all stages see the same machine
    // assignments
    Map<PartitioningHandle, NodePartitionMap> partitioningCache = new HashMap<>();

    List<SqlStageExecution> stages =
        createStages(
            Optional.empty(),
            new AtomicInteger(),
            locationFactory,
            plan.withBucketToPartition(Optional.of(new int[1])),
            nodeScheduler,
            remoteTaskFactory,
            session,
            splitBatchSize,
            partitioningHandle ->
                partitioningCache.computeIfAbsent(
                    partitioningHandle,
                    handle -> nodePartitioningManager.getNodePartitioningMap(session, handle)),
            executor,
            nodeTaskMap,
            stageSchedulers,
            stageLinkages);

    SqlStageExecution rootStage = stages.get(0);
    rootStage.setOutputBuffers(rootOutputBuffers);
    this.rootStageId = rootStage.getStageId();

    this.stages = stages.stream().collect(toImmutableMap(SqlStageExecution::getStageId));

    this.stageSchedulers = stageSchedulers.build();
    this.stageLinkages = stageLinkages.build();

    this.executor = executor;

    rootStage.addStateChangeListener(
        state -> {
          if (state == FINISHED) {
            queryStateMachine.transitionToFinishing();
          } else if (state == CANCELED) {
            // output stage was canceled
            queryStateMachine.transitionToFailed(
                new PrestoException(USER_CANCELED, "Query was canceled"));
          }
        });

    for (SqlStageExecution stage : stages) {
      stage.addStateChangeListener(
          state -> {
            if (queryStateMachine.isDone()) {
              return;
            }
            if (state == FAILED) {
              queryStateMachine.transitionToFailed(
                  stage.getStageInfo().getFailureCause().toException());
            } else if (state == ABORTED) {
              // this should never happen, since abort can only be triggered in query clean up after
              // the query is finished
              queryStateMachine.transitionToFailed(
                  new PrestoException(INTERNAL_ERROR, "Query stage was aborted"));
            } else if (queryStateMachine.getQueryState() == QueryState.STARTING) {
              // if the stage has at least one task, we are running
              if (stage.hasTasks()) {
                queryStateMachine.transitionToRunning();
              }
            }
          });
    }
  }
示例#2
0
  private List<SqlStageExecution> createStages(
      Optional<SqlStageExecution> parent,
      AtomicInteger nextStageId,
      LocationFactory locationFactory,
      StageExecutionPlan plan,
      NodeScheduler nodeScheduler,
      RemoteTaskFactory remoteTaskFactory,
      Session session,
      int splitBatchSize,
      Function<PartitioningHandle, NodePartitionMap> partitioningCache,
      ExecutorService executor,
      NodeTaskMap nodeTaskMap,
      ImmutableMap.Builder<StageId, StageScheduler> stageSchedulers,
      ImmutableMap.Builder<StageId, StageLinkage> stageLinkages) {
    ImmutableList.Builder<SqlStageExecution> stages = ImmutableList.builder();

    StageId stageId =
        new StageId(queryStateMachine.getQueryId(), String.valueOf(nextStageId.getAndIncrement()));
    SqlStageExecution stage =
        new SqlStageExecution(
            stageId,
            locationFactory.createStageLocation(stageId),
            plan.getFragment(),
            remoteTaskFactory,
            session,
            summarizeTaskInfo,
            nodeTaskMap,
            executor);

    stages.add(stage);

    Optional<int[]> bucketToPartition;
    PartitioningHandle partitioningHandle = plan.getFragment().getPartitioning();
    if (partitioningHandle.equals(SOURCE_DISTRIBUTION)) {
      // nodes are selected dynamically based on the constraints of the splits and the system load
      SplitSource splitSource = plan.getDataSource().get();
      NodeSelector nodeSelector = nodeScheduler.createNodeSelector(splitSource.getDataSourceName());
      SplitPlacementPolicy placementPolicy =
          new DynamicSplitPlacementPolicy(nodeSelector, stage::getAllTasks);
      stageSchedulers.put(
          stageId,
          new SourcePartitionedScheduler(stage, splitSource, placementPolicy, splitBatchSize));
      bucketToPartition = Optional.of(new int[1]);
    } else {
      // nodes are pre determined by the nodePartitionMap
      NodePartitionMap nodePartitionMap =
          partitioningCache.apply(plan.getFragment().getPartitioning());

      if (plan.getDataSource().isPresent()) {
        stageSchedulers.put(
            stageId,
            new FixedSourcePartitionedScheduler(
                stage,
                plan.getDataSource().get(),
                nodePartitionMap,
                splitBatchSize,
                nodeScheduler.createNodeSelector(null)));
        bucketToPartition = Optional.of(nodePartitionMap.getBucketToPartition());
      } else {
        Map<Integer, Node> partitionToNode = nodePartitionMap.getPartitionToNode();
        // todo this should asynchronously wait a standard timeout period before failing
        checkCondition(!partitionToNode.isEmpty(), NO_NODES_AVAILABLE, "No worker nodes available");
        stageSchedulers.put(stageId, new FixedCountScheduler(stage, partitionToNode));
        bucketToPartition = Optional.of(nodePartitionMap.getBucketToPartition());
      }
    }

    ImmutableSet.Builder<SqlStageExecution> childStagesBuilder = ImmutableSet.builder();
    for (StageExecutionPlan subStagePlan : plan.getSubStages()) {
      List<SqlStageExecution> subTree =
          createStages(
              Optional.of(stage),
              nextStageId,
              locationFactory,
              subStagePlan.withBucketToPartition(bucketToPartition),
              nodeScheduler,
              remoteTaskFactory,
              session,
              splitBatchSize,
              partitioningCache,
              executor,
              nodeTaskMap,
              stageSchedulers,
              stageLinkages);
      stages.addAll(subTree);

      SqlStageExecution childStage = subTree.get(0);
      childStagesBuilder.add(childStage);
    }
    Set<SqlStageExecution> childStages = childStagesBuilder.build();
    stage.addStateChangeListener(
        newState -> {
          if (newState.isDone()) {
            childStages.stream().forEach(SqlStageExecution::cancel);
          }
        });

    stageLinkages.put(stageId, new StageLinkage(plan.getFragment().getId(), parent, childStages));

    return stages.build();
  }