@Override
    public void failed(Throwable cause) {
      try (SetThreadName ignored = new SetThreadName("ContinuousTaskInfoFetcher-%s", taskId)) {
        synchronized (this) {
          future = null;
        }

        try {
          // if task not already done, record error
          TaskInfo taskInfo = getTaskInfo();
          if (!taskInfo.getState().isDone()) {
            getErrorTracker.requestFailed(cause);
          }
        } catch (Error e) {
          failTask(e);
          abort();
          throw e;
        } catch (RuntimeException e) {
          failTask(e);
          abort();
        } finally {
          // there is no back off here so we can get a lot of error messages when a server spins
          // down, but it typically goes away quickly because the queries get canceled
          scheduleNextRequest();
        }
      }
    }
  @Test(enabled = false)
  public void testQuery() throws Exception {
    URI location =
        client.execute(
            preparePost()
                .setUri(uriFor("/v1/query"))
                .setBodyGenerator(createStaticBodyGenerator("query", UTF_8))
                .build(),
            new CreatedResponseHandler());
    assertQueryStatus(location, QueryState.RUNNING);

    QueryInfo queryInfo =
        client.execute(
            prepareGet().setUri(location).build(),
            createJsonResponseHandler(jsonCodec(QueryInfo.class)));
    TaskInfo taskInfo = queryInfo.getOutputStage().getTasks().get(0);
    URI outputLocation = uriFor("/v1/task/" + taskInfo.getTaskId() + "/results/out");

    long sequenceId = 0;
    PagesResponse response =
        client.execute(
            prepareGet()
                .setUri(
                    uriBuilderFrom(outputLocation).appendPath(String.valueOf(sequenceId)).build())
                .build(),
            new PageResponseHandler());
    List<Page> pages = response.getPages();
    assertEquals(countPositions(pages), 220);
    assertQueryStatus(location, QueryState.RUNNING);

    sequenceId += pages.size();
    response =
        client.execute(
            prepareGet()
                .setUri(
                    uriBuilderFrom(outputLocation).appendPath(String.valueOf(sequenceId)).build())
                .build(),
            new PageResponseHandler());
    pages = response.getPages();
    assertEquals(countPositions(pages), 44 + 48);

    sequenceId += pages.size();
    response =
        client.execute(
            prepareGet()
                .setUri(
                    uriBuilderFrom(outputLocation).appendPath(String.valueOf(sequenceId)).build())
                .build(),
            new PageResponseHandler());
    pages = response.getPages();
    assertEquals(countPositions(pages), 0);

    assertQueryStatus(location, QueryState.FINISHED);

    // cancel the query
    StatusResponse cancelResponse =
        client.execute(prepareDelete().setUri(location).build(), createStatusResponseHandler());
    assertQueryStatus(location, QueryState.FINISHED);
    assertEquals(cancelResponse.getStatusCode(), HttpStatus.NO_CONTENT.code());
  }
    private synchronized void scheduleNextRequest() {
      // stopped or done?
      TaskInfo taskInfo = HttpRemoteTask.this.taskInfo.get();
      if (!running || taskInfo.getState().isDone()) {
        return;
      }

      // outstanding request?
      if (future != null && !future.isDone()) {
        // this should never happen
        log.error("Can not reschedule update because an update is already running");
        return;
      }

      // if throttled due to error, asynchronously wait for timeout and try again
      ListenableFuture<?> errorRateLimit = getErrorTracker.acquireRequestPermit();
      if (!errorRateLimit.isDone()) {
        errorRateLimit.addListener(this::scheduleNextRequest, executor);
        return;
      }

      Request request =
          prepareGet()
              .setUri(uriBuilderFrom(taskInfo.getSelf()).addParameter("summarize").build())
              .setHeader(HttpHeaders.CONTENT_TYPE, MediaType.JSON_UTF_8.toString())
              .setHeader(PrestoHeaders.PRESTO_CURRENT_STATE, taskInfo.getState().toString())
              .setHeader(PrestoHeaders.PRESTO_MAX_WAIT, refreshMaxWait.toString())
              .build();

      getErrorTracker.startRequest();

      future = httpClient.executeAsync(request, createFullJsonResponseHandler(taskInfoCodec));
      Futures.addCallback(
          future, new SimpleHttpResponseHandler<>(this, request.getUri()), executor);
    }
    @Override
    public void failed(Throwable cause) {
      try (SetThreadName ignored = new SetThreadName("UpdateResponseHandler-%s", taskId)) {
        try {
          synchronized (HttpRemoteTask.this) {
            currentRequest = null;
          }

          // on failure assume we need to update again
          needsUpdate.set(true);

          // if task not already done, record error
          TaskInfo taskInfo = getTaskInfo();
          if (!taskInfo.getState().isDone()) {
            updateErrorTracker.requestFailed(cause);
          }
        } catch (Error e) {
          failTask(e);
          abort();
          throw e;
        } catch (RuntimeException e) {
          failTask(e);
          abort();
        } finally {
          scheduleUpdate();
        }
      }
    }
 /** Move the task directly to the failed state */
 private void failTask(Throwable cause) {
   TaskInfo taskInfo = getTaskInfo();
   if (!taskInfo.getState().isDone()) {
     log.debug(cause, "Remote task failed: %s", taskInfo.getSelf());
   }
   updateTaskInfo(
       new TaskInfo(
           taskInfo.getTaskId(),
           taskInfo.getNodeInstanceId(),
           TaskInfo.MAX_VERSION,
           TaskState.FAILED,
           taskInfo.getSelf(),
           taskInfo.getLastHeartbeat(),
           taskInfo.getOutputBuffers(),
           taskInfo.getNoMoreSplits(),
           taskInfo.getStats(),
           ImmutableList.of(toFailure(cause))));
 }
Beispiel #6
0
 @Override
 public RecordCursor cursor() {
   Builder table = InMemoryRecordSet.builder(TASK_TABLE);
   for (TaskInfo taskInfo : taskManager.getAllTaskInfo()) {
     TaskStats stats = taskInfo.getStats();
     table.addRow(
         nodeId,
         taskInfo.getTaskId().toString(),
         taskInfo.getTaskId().getStageId().toString(),
         taskInfo.getTaskId().getQueryId().toString(),
         taskInfo.getState().toString(),
         (long) stats.getTotalDrivers(),
         (long) stats.getQueuedDrivers(),
         (long) stats.getRunningDrivers(),
         (long) stats.getCompletedDrivers(),
         toMillis(stats.getTotalScheduledTime()),
         toMillis(stats.getTotalCpuTime()),
         toMillis(stats.getTotalUserTime()),
         toMillis(stats.getTotalBlockedTime()),
         toBytes(stats.getRawInputDataSize()),
         stats.getRawInputPositions(),
         toBytes(stats.getProcessedInputDataSize()),
         stats.getProcessedInputPositions(),
         toBytes(stats.getOutputDataSize()),
         stats.getOutputPositions(),
         toTimeStamp(stats.getCreateTime()),
         toTimeStamp(stats.getFirstStartTime()),
         toTimeStamp(taskInfo.getLastHeartbeat()),
         toTimeStamp(stats.getEndTime()));
   }
   return table.build().cursor();
 }
  @Override
  public synchronized void abort() {
    try (SetThreadName ignored = new SetThreadName("HttpRemoteTask-%s", taskId)) {
      // clear pending splits to free memory
      fireSplitCountChanged(-pendingSourceSplitCount);
      pendingSplits.clear();
      pendingSourceSplitCount = 0;

      // cancel pending request
      if (currentRequest != null) {
        currentRequest.cancel(true);
        currentRequest = null;
        currentRequestStartNanos = 0;
      }

      // mark task as canceled (if not already done)
      TaskInfo taskInfo = getTaskInfo();
      URI uri = taskInfo.getSelf();

      updateTaskInfo(
          new TaskInfo(
              taskInfo.getTaskId(),
              taskInfo.getNodeInstanceId(),
              TaskInfo.MAX_VERSION,
              TaskState.ABORTED,
              uri,
              taskInfo.getLastHeartbeat(),
              taskInfo.getOutputBuffers(),
              taskInfo.getNoMoreSplits(),
              taskInfo.getStats(),
              ImmutableList.<ExecutionFailureInfo>of()));

      // send abort to task and ignore response
      Request request =
          prepareDelete().setUri(uriBuilderFrom(uri).addParameter("summarize").build()).build();
      scheduleAsyncCleanupRequest(new Backoff(MAX_CLEANUP_RETRY_TIME), request, "abort");
    }
  }
Beispiel #8
0
  private void logQueryTimeline(QueryInfo queryInfo) {
    try {
      QueryStats queryStats = queryInfo.getQueryStats();
      DateTime queryStartTime = queryStats.getCreateTime();
      DateTime queryEndTime = queryStats.getEndTime();

      // query didn't finish cleanly
      if (queryStartTime == null || queryEndTime == null) {
        return;
      }

      // planning duration -- start to end of planning
      Duration planning = queryStats.getTotalPlanningTime();
      if (planning == null) {
        planning = new Duration(0, MILLISECONDS);
      }

      List<StageInfo> stages = StageInfo.getAllStages(queryInfo.getOutputStage());
      // long lastSchedulingCompletion = 0;
      long firstTaskStartTime = queryEndTime.getMillis();
      long lastTaskStartTime = queryStartTime.getMillis() + planning.toMillis();
      long lastTaskEndTime = queryStartTime.getMillis() + planning.toMillis();
      for (StageInfo stage : stages) {
        // only consider leaf stages
        if (!stage.getSubStages().isEmpty()) {
          continue;
        }

        for (TaskInfo taskInfo : stage.getTasks()) {
          TaskStats taskStats = taskInfo.getStats();

          DateTime firstStartTime = taskStats.getFirstStartTime();
          if (firstStartTime != null) {
            firstTaskStartTime = Math.min(firstStartTime.getMillis(), firstTaskStartTime);
          }

          DateTime lastStartTime = taskStats.getLastStartTime();
          if (lastStartTime != null) {
            lastTaskStartTime = Math.max(lastStartTime.getMillis(), lastTaskStartTime);
          }

          DateTime endTime = taskStats.getEndTime();
          if (endTime != null) {
            lastTaskEndTime = Math.max(endTime.getMillis(), lastTaskEndTime);
          }
        }
      }

      Duration elapsed = millis(queryEndTime.getMillis() - queryStartTime.getMillis());

      Duration scheduling =
          millis(firstTaskStartTime - queryStartTime.getMillis() - planning.toMillis());

      Duration running = millis(lastTaskEndTime - firstTaskStartTime);

      Duration finishing = millis(queryEndTime.getMillis() - lastTaskEndTime);

      log.info(
          "TIMELINE: Query %s :: elapsed %s :: planning %s :: scheduling %s :: running %s :: finishing %s :: begin %s :: end %s",
          queryInfo.getQueryId(),
          elapsed,
          planning,
          scheduling,
          running,
          finishing,
          queryStartTime,
          queryEndTime);
    } catch (Exception e) {
      log.error(e, "Error logging query timeline");
    }
  }
  private synchronized void updateTaskInfo(TaskInfo newValue, List<TaskSource> sources) {
    if (newValue.getState().isDone()) {
      // splits can be huge so clear the list
      pendingSplits.clear();
      fireSplitCountChanged(-pendingSourceSplitCount);
      pendingSourceSplitCount = 0;
    }

    int oldPartitionedSplitCount = getPartitionedSplitCount();

    // change to new value if old value is not changed and new value has a newer version
    AtomicBoolean workerRestarted = new AtomicBoolean();
    boolean updated =
        taskInfo.setIf(
            newValue,
            oldValue -> {
              // did the worker restart
              if (oldValue.getNodeInstanceId().isPresent()
                  && !oldValue.getNodeInstanceId().equals(newValue.getNodeInstanceId())) {
                workerRestarted.set(true);
                return false;
              }

              if (oldValue.getState().isDone()) {
                // never update if the task has reached a terminal state
                return false;
              }
              if (newValue.getVersion() < oldValue.getVersion()) {
                // don't update to an older version (same version is ok)
                return false;
              }
              return true;
            });

    if (workerRestarted.get()) {
      PrestoException exception =
          new PrestoException(
              WORKER_RESTARTED, format("%s (%s)", WORKER_RESTARTED_ERROR, newValue.getSelf()));
      failTask(exception);
      abort();
    }

    // remove acknowledged splits, which frees memory
    for (TaskSource source : sources) {
      PlanNodeId planNodeId = source.getPlanNodeId();
      int removed = 0;
      for (ScheduledSplit split : source.getSplits()) {
        if (pendingSplits.remove(planNodeId, split)) {
          removed++;
        }
      }
      if (planNodeId.equals(planFragment.getPartitionedSource())) {
        pendingSourceSplitCount -= removed;
      }
    }

    if (updated) {
      if (getTaskInfo().getState().isDone()) {
        fireSplitCountChanged(-oldPartitionedSplitCount);
      } else {
        fireSplitCountChanged(getPartitionedSplitCount() - oldPartitionedSplitCount);
      }
    }
  }