예제 #1
0
    private synchronized void scheduleNextRequest() {
      // stopped or done?
      TaskInfo taskInfo = HttpRemoteTask.this.taskInfo.get();
      if (!running || taskInfo.getState().isDone()) {
        return;
      }

      // outstanding request?
      if (future != null && !future.isDone()) {
        // this should never happen
        log.error("Can not reschedule update because an update is already running");
        return;
      }

      // if throttled due to error, asynchronously wait for timeout and try again
      ListenableFuture<?> errorRateLimit = getErrorTracker.acquireRequestPermit();
      if (!errorRateLimit.isDone()) {
        errorRateLimit.addListener(this::scheduleNextRequest, executor);
        return;
      }

      Request request =
          prepareGet()
              .setUri(uriBuilderFrom(taskInfo.getSelf()).addParameter("summarize").build())
              .setHeader(HttpHeaders.CONTENT_TYPE, MediaType.JSON_UTF_8.toString())
              .setHeader(PrestoHeaders.PRESTO_CURRENT_STATE, taskInfo.getState().toString())
              .setHeader(PrestoHeaders.PRESTO_MAX_WAIT, refreshMaxWait.toString())
              .build();

      getErrorTracker.startRequest();

      future = httpClient.executeAsync(request, createFullJsonResponseHandler(taskInfoCodec));
      Futures.addCallback(
          future, new SimpleHttpResponseHandler<>(this, request.getUri()), executor);
    }
예제 #2
0
 @Override
 public RecordCursor cursor() {
   Builder table = InMemoryRecordSet.builder(TASK_TABLE);
   for (TaskInfo taskInfo : taskManager.getAllTaskInfo()) {
     TaskStats stats = taskInfo.getStats();
     table.addRow(
         nodeId,
         taskInfo.getTaskId().toString(),
         taskInfo.getTaskId().getStageId().toString(),
         taskInfo.getTaskId().getQueryId().toString(),
         taskInfo.getState().toString(),
         (long) stats.getTotalDrivers(),
         (long) stats.getQueuedDrivers(),
         (long) stats.getRunningDrivers(),
         (long) stats.getCompletedDrivers(),
         toMillis(stats.getTotalScheduledTime()),
         toMillis(stats.getTotalCpuTime()),
         toMillis(stats.getTotalUserTime()),
         toMillis(stats.getTotalBlockedTime()),
         toBytes(stats.getRawInputDataSize()),
         stats.getRawInputPositions(),
         toBytes(stats.getProcessedInputDataSize()),
         stats.getProcessedInputPositions(),
         toBytes(stats.getOutputDataSize()),
         stats.getOutputPositions(),
         toTimeStamp(stats.getCreateTime()),
         toTimeStamp(stats.getFirstStartTime()),
         toTimeStamp(taskInfo.getLastHeartbeat()),
         toTimeStamp(stats.getEndTime()));
   }
   return table.build().cursor();
 }
예제 #3
0
    @Override
    public void failed(Throwable cause) {
      try (SetThreadName ignored = new SetThreadName("ContinuousTaskInfoFetcher-%s", taskId)) {
        synchronized (this) {
          future = null;
        }

        try {
          // if task not already done, record error
          TaskInfo taskInfo = getTaskInfo();
          if (!taskInfo.getState().isDone()) {
            getErrorTracker.requestFailed(cause);
          }
        } catch (Error e) {
          failTask(e);
          abort();
          throw e;
        } catch (RuntimeException e) {
          failTask(e);
          abort();
        } finally {
          // there is no back off here so we can get a lot of error messages when a server spins
          // down, but it typically goes away quickly because the queries get canceled
          scheduleNextRequest();
        }
      }
    }
예제 #4
0
    @Override
    public void failed(Throwable cause) {
      try (SetThreadName ignored = new SetThreadName("UpdateResponseHandler-%s", taskId)) {
        try {
          synchronized (HttpRemoteTask.this) {
            currentRequest = null;
          }

          // on failure assume we need to update again
          needsUpdate.set(true);

          // if task not already done, record error
          TaskInfo taskInfo = getTaskInfo();
          if (!taskInfo.getState().isDone()) {
            updateErrorTracker.requestFailed(cause);
          }
        } catch (Error e) {
          failTask(e);
          abort();
          throw e;
        } catch (RuntimeException e) {
          failTask(e);
          abort();
        } finally {
          scheduleUpdate();
        }
      }
    }
예제 #5
0
 /** Move the task directly to the failed state */
 private void failTask(Throwable cause) {
   TaskInfo taskInfo = getTaskInfo();
   if (!taskInfo.getState().isDone()) {
     log.debug(cause, "Remote task failed: %s", taskInfo.getSelf());
   }
   updateTaskInfo(
       new TaskInfo(
           taskInfo.getTaskId(),
           taskInfo.getNodeInstanceId(),
           TaskInfo.MAX_VERSION,
           TaskState.FAILED,
           taskInfo.getSelf(),
           taskInfo.getLastHeartbeat(),
           taskInfo.getOutputBuffers(),
           taskInfo.getNoMoreSplits(),
           taskInfo.getStats(),
           ImmutableList.of(toFailure(cause))));
 }
예제 #6
0
  private synchronized void updateTaskInfo(TaskInfo newValue, List<TaskSource> sources) {
    if (newValue.getState().isDone()) {
      // splits can be huge so clear the list
      pendingSplits.clear();
      fireSplitCountChanged(-pendingSourceSplitCount);
      pendingSourceSplitCount = 0;
    }

    int oldPartitionedSplitCount = getPartitionedSplitCount();

    // change to new value if old value is not changed and new value has a newer version
    AtomicBoolean workerRestarted = new AtomicBoolean();
    boolean updated =
        taskInfo.setIf(
            newValue,
            oldValue -> {
              // did the worker restart
              if (oldValue.getNodeInstanceId().isPresent()
                  && !oldValue.getNodeInstanceId().equals(newValue.getNodeInstanceId())) {
                workerRestarted.set(true);
                return false;
              }

              if (oldValue.getState().isDone()) {
                // never update if the task has reached a terminal state
                return false;
              }
              if (newValue.getVersion() < oldValue.getVersion()) {
                // don't update to an older version (same version is ok)
                return false;
              }
              return true;
            });

    if (workerRestarted.get()) {
      PrestoException exception =
          new PrestoException(
              WORKER_RESTARTED, format("%s (%s)", WORKER_RESTARTED_ERROR, newValue.getSelf()));
      failTask(exception);
      abort();
    }

    // remove acknowledged splits, which frees memory
    for (TaskSource source : sources) {
      PlanNodeId planNodeId = source.getPlanNodeId();
      int removed = 0;
      for (ScheduledSplit split : source.getSplits()) {
        if (pendingSplits.remove(planNodeId, split)) {
          removed++;
        }
      }
      if (planNodeId.equals(planFragment.getPartitionedSource())) {
        pendingSourceSplitCount -= removed;
      }
    }

    if (updated) {
      if (getTaskInfo().getState().isDone()) {
        fireSplitCountChanged(-oldPartitionedSplitCount);
      } else {
        fireSplitCountChanged(getPartitionedSplitCount() - oldPartitionedSplitCount);
      }
    }
  }