Exemplo n.º 1
0
  private void enqueueHealthAndNewTaskChecks() {
    final long start = System.currentTimeMillis();

    final List<SingularityTask> activeTasks = taskManager.getActiveTasks();
    final Map<SingularityTaskId, SingularityTask> activeTaskMap =
        Maps.uniqueIndex(activeTasks, SingularityTaskIdHolder.getTaskIdFunction());

    final Map<SingularityTaskId, List<SingularityTaskHistoryUpdate>> taskUpdates =
        taskManager.getTaskHistoryUpdates(activeTaskMap.keySet());

    final Map<SingularityDeployKey, SingularityPendingDeploy> pendingDeploys =
        Maps.uniqueIndex(
            deployManager.getPendingDeploys(), SingularityDeployKey.FROM_PENDING_TO_DEPLOY_KEY);
    final Map<String, SingularityRequestWithState> idToRequest =
        Maps.uniqueIndex(
            requestManager.getRequests(), SingularityRequestWithState.REQUEST_STATE_TO_REQUEST_ID);

    requestManager.getActiveRequests();
    int enqueuedNewTaskChecks = 0;
    int enqueuedHealthchecks = 0;

    for (Map.Entry<SingularityTaskId, SingularityTask> entry : activeTaskMap.entrySet()) {
      SingularityTaskId taskId = entry.getKey();
      SingularityTask task = entry.getValue();
      SimplifiedTaskState simplifiedTaskState =
          SingularityTaskHistoryUpdate.getCurrentState(taskUpdates.get(taskId));

      if (simplifiedTaskState != SimplifiedTaskState.DONE) {
        SingularityDeployKey deployKey =
            new SingularityDeployKey(taskId.getRequestId(), taskId.getDeployId());
        Optional<SingularityPendingDeploy> pendingDeploy =
            Optional.fromNullable(pendingDeploys.get(deployKey));
        Optional<SingularityRequestWithState> request =
            Optional.fromNullable(idToRequest.get(taskId.getRequestId()));

        if (!pendingDeploy.isPresent()) {
          newTaskChecker.enqueueNewTaskCheck(task, request, healthchecker);
          enqueuedNewTaskChecks++;
        }
        if (simplifiedTaskState == SimplifiedTaskState.RUNNING) {
          if (healthchecker.enqueueHealthcheck(task, pendingDeploy, request)) {
            enqueuedHealthchecks++;
          }
        }
      }
    }

    LOG.info(
        "Enqueued {} health checks and {} new task checks (out of {} active tasks) in {}",
        enqueuedHealthchecks,
        enqueuedNewTaskChecks,
        activeTasks.size(),
        JavaUtils.duration(start));
  }
  private <T extends SingularityId> List<T> getChildrenAsIdsForParentsThrows(
      final String pathNameforLogs,
      final Collection<String> parents,
      final IdTranscoder<T> idTranscoder)
      throws Exception {
    if (parents.isEmpty()) {
      return Collections.emptyList();
    }

    final List<T> objects = Lists.newArrayListWithExpectedSize(parents.size());

    final CountDownLatch latch = new CountDownLatch(parents.size());
    final AtomicInteger missing = new AtomicInteger();

    final BackgroundCallback callback =
        new BackgroundCallback() {

          @Override
          public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
            if (event.getChildren() == null || event.getChildren().size() == 0) {
              LOG.trace("Expected children for node {} - but found none", event.getPath());

              missing.incrementAndGet();
              latch.countDown();

              return;
            }

            objects.addAll(Lists.transform(event.getChildren(), idTranscoder));

            latch.countDown();
          }
        };

    final long start = System.currentTimeMillis();

    for (String parent : parents) {
      curator.getChildren().inBackground(callback).forPath(parent);
    }

    checkLatch(latch, pathNameforLogs);

    LOG.trace(
        "Fetched {} objects from {} (missing {}) in {}",
        objects.size(),
        pathNameforLogs,
        missing.intValue(),
        JavaUtils.duration(start));

    return objects;
  }
  private <T> List<T> getAsyncThrows(
      final String pathNameForLogs, final Collection<String> paths, final Transcoder<T> transcoder)
      throws Exception {
    final List<T> objects = Lists.newArrayListWithCapacity(paths.size());

    if (paths.isEmpty()) {
      return objects;
    }

    final CountDownLatch latch = new CountDownLatch(paths.size());
    final AtomicInteger missing = new AtomicInteger();

    final BackgroundCallback callback =
        new BackgroundCallback() {

          @Override
          public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
            if (event.getData() == null || event.getData().length == 0) {
              LOG.trace("Expected active node {} but it wasn't there", event.getPath());

              missing.incrementAndGet();
              latch.countDown();

              return;
            }

            objects.add(transcoder.transcode(event.getData()));

            latch.countDown();
          }
        };

    final long start = System.currentTimeMillis();

    for (String path : paths) {
      curator.getData().inBackground(callback).forPath(path);
    }

    checkLatch(latch, pathNameForLogs);

    LOG.trace(
        "Fetched {} objects from {} (missing {}) in {}",
        objects.size(),
        pathNameForLogs,
        missing.intValue(),
        JavaUtils.duration(start));

    return objects;
  }
  private <T extends SingularityId> List<T> existsThrows(
      final String pathNameforLogs,
      final Collection<String> paths,
      final IdTranscoder<T> idTranscoder)
      throws Exception {
    if (paths.isEmpty()) {
      return Collections.emptyList();
    }

    final List<T> objects = Lists.newArrayListWithCapacity(paths.size());

    final CountDownLatch latch = new CountDownLatch(paths.size());

    final BackgroundCallback callback =
        new BackgroundCallback() {

          @Override
          public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
            if (event.getStat() == null) {
              latch.countDown();

              return;
            }

            objects.add(idTranscoder.apply(ZKPaths.getNodeFromPath(event.getPath())));

            latch.countDown();
          }
        };

    final long start = System.currentTimeMillis();

    for (String path : paths) {
      curator.checkExists().inBackground(callback).forPath(path);
    }

    checkLatch(latch, pathNameforLogs);

    LOG.trace(
        "Found {} objects out of {} from {} in {}",
        objects.size(),
        paths.size(),
        pathNameforLogs,
        JavaUtils.duration(start));

    return objects;
  }
  public void download(S3Artifact s3Artifact, Path downloadTo) {
    final long start = System.currentTimeMillis();
    boolean success = false;

    try {
      downloadThrows(s3Artifact, downloadTo);
      success = true;
    } catch (Throwable t) {
      throw Throwables.propagate(t);
    } finally {
      log.info(
          "S3 Download {}/{} finished {} after {}",
          s3Artifact.getS3Bucket(),
          s3Artifact.getS3ObjectKey(),
          success ? "successfully" : "with error",
          JavaUtils.duration(start));
    }
  }
  private void combineChunk(Path downloadTo, Path path) throws Exception {
    final long start = System.currentTimeMillis();
    long bytes = 0;

    log.info("Writing {} to {}", path, downloadTo);

    try (WritableByteChannel wbs =
        Files.newByteChannel(
            downloadTo, EnumSet.of(StandardOpenOption.APPEND, StandardOpenOption.WRITE))) {
      try (FileChannel readChannel =
          FileChannel.open(
              path, EnumSet.of(StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE))) {
        bytes = readChannel.size();
        readChannel.transferTo(0, bytes, wbs);
      }
    }

    log.info("Finished writing {} bytes in {}", bytes, JavaUtils.duration(start));
  }
  private void readInitialFiles() throws IOException {
    final long start = System.currentTimeMillis();
    LOG.info(
        "Scanning for metadata files (*{}) in {}",
        configuration.getS3MetadataSuffix(),
        configuration.getS3MetadataDirectory());

    int foundFiles = 0;

    for (Path file : JavaUtils.iterable(configuration.getS3MetadataDirectory())) {
      if (!isS3MetadataFile(file)) {
        continue;
      }

      if (handleNewOrModifiedS3Metadata(file)) {
        foundFiles++;
      }
    }

    LOG.info("Found {} file(s) in {}", foundFiles, JavaUtils.duration(start));
  }
Exemplo n.º 8
0
  public void startup(MasterInfo masterInfo, SchedulerDriver driver) throws Exception {
    final long start = System.currentTimeMillis();

    final String uri = mesosClient.getMasterUri(MesosUtils.getMasterHostAndPort(masterInfo));

    LOG.info("Starting up... fetching state data from: " + uri);

    zkDataMigrationRunner.checkMigrations();

    MesosMasterStateObject state = mesosClient.getMasterState(uri);

    slaveAndRackManager.loadSlavesAndRacksFromMaster(state);

    checkSchedulerForInconsistentState();

    enqueueHealthAndNewTaskChecks();

    taskReconciliation.startReconciliation();

    LOG.info("Finished startup after {}", JavaUtils.duration(start));
  }
  @Override
  public void shutdown() {
    final long start = System.currentTimeMillis();
    LOG.info("Gracefully shutting down S3Uploader, this may take a few moments...");

    runLock.lock();
    try {
      if (!super.stop()) {
        LOG.info("Already shutting down, ignoring request");
        return;
      }
    } finally {
      runLock.unlock();
    }

    future.cancel(false);

    scheduler.shutdown();
    executorService.shutdown();

    LOG.info("Shut down in {}", JavaUtils.duration(start));
  }
Exemplo n.º 10
0
  private boolean handleChunk(
      S3Artifact s3Artifact,
      Future<Path> future,
      Path downloadTo,
      int chunk,
      long start,
      long remainingMillis) {
    if (remainingMillis <= 0) {
      remainingMillis = 1;
    }

    try {
      Path path = future.get(remainingMillis, TimeUnit.MILLISECONDS);

      if (chunk > 0) {
        combineChunk(downloadTo, path);
      }

      return true;
    } catch (TimeoutException te) {
      log.error(
          "Chunk {} for {} timed out after {} - had {} remaining",
          chunk,
          s3Artifact.getFilename(),
          JavaUtils.duration(start),
          JavaUtils.durationFromMillis(remainingMillis));
      future.cancel(true);
      exceptionNotifier.notify(
          te,
          ImmutableMap.of("filename", s3Artifact.getFilename(), "chunk", Integer.toString(chunk)));
    } catch (Throwable t) {
      log.error("Error while handling chunk {} for {}", chunk, s3Artifact.getFilename(), t);
      exceptionNotifier.notify(
          t,
          ImmutableMap.of("filename", s3Artifact.getFilename(), "chunk", Integer.toString(chunk)));
    }

    return false;
  }
  public void drainPendingQueue(final SingularitySchedulerStateCache stateCache) {
    final long start = System.currentTimeMillis();

    final ImmutableList<SingularityPendingRequest> pendingRequests =
        ImmutableList.copyOf(requestManager.getPendingRequests());

    if (pendingRequests.isEmpty()) {
      LOG.trace("Pending queue was empty");
      return;
    }

    LOG.info("Pending queue had {} requests", pendingRequests.size());

    int totalNewScheduledTasks = 0;
    int heldForScheduledActiveTask = 0;
    int obsoleteRequests = 0;

    for (SingularityPendingRequest pendingRequest : pendingRequests) {
      Optional<SingularityRequestWithState> maybeRequest =
          requestManager.getRequest(pendingRequest.getRequestId());

      if (shouldScheduleTasks(pendingRequest, maybeRequest)) {
        final List<SingularityTaskId> matchingTaskIds =
            getMatchingTaskIds(stateCache, maybeRequest.get().getRequest(), pendingRequest);
        final SingularityDeployStatistics deployStatistics =
            getDeployStatistics(pendingRequest.getRequestId(), pendingRequest.getDeployId());

        final RequestState requestState = checkCooldown(maybeRequest.get(), deployStatistics);

        int numScheduledTasks =
            scheduleTasks(
                stateCache,
                maybeRequest.get().getRequest(),
                requestState,
                deployStatistics,
                pendingRequest,
                matchingTaskIds);

        if (numScheduledTasks == 0
            && !matchingTaskIds.isEmpty()
            && maybeRequest.get().getRequest().isScheduled()
            && pendingRequest.getPendingType() == PendingType.NEW_DEPLOY) {
          LOG.trace(
              "Holding pending request {} because it is scheduled and has an active task",
              pendingRequest);
          heldForScheduledActiveTask++;
          continue;
        }

        LOG.debug(
            "Pending request {} resulted in {} new scheduled tasks",
            pendingRequest,
            numScheduledTasks);

        totalNewScheduledTasks += numScheduledTasks;
      } else {
        LOG.debug(
            "Pending request {} was obsolete (request {})",
            pendingRequest,
            SingularityRequestWithState.getRequestState(maybeRequest));

        obsoleteRequests++;
      }

      requestManager.deletePendingRequest(pendingRequest);
    }

    LOG.info(
        "Scheduled {} new tasks ({} obsolete requests, {} held) in {}",
        totalNewScheduledTasks,
        obsoleteRequests,
        heldForScheduledActiveTask,
        JavaUtils.duration(start));
  }
  public void checkForDecomissions(SingularitySchedulerStateCache stateCache) {
    final long start = System.currentTimeMillis();

    final Set<String> requestIdsToReschedule = Sets.newHashSet();
    final Set<SingularityTaskId> matchingTaskIds = Sets.newHashSet();

    final Collection<SingularityTaskId> activeTaskIds = stateCache.getActiveTaskIds();

    final Map<SingularitySlave, MachineState> slaves =
        getDefaultMap(slaveManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));

    for (SingularitySlave slave : slaves.keySet()) {
      boolean foundTask = false;

      for (SingularityTask activeTask : taskManager.getTasksOnSlave(activeTaskIds, slave)) {
        cleanupTaskDueToDecomission(requestIdsToReschedule, matchingTaskIds, activeTask, slave);
        foundTask = true;
      }

      if (!foundTask) {
        slaves.put(slave, MachineState.DECOMMISSIONED);
      }
    }

    final Map<SingularityRack, MachineState> racks =
        getDefaultMap(rackManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));

    for (SingularityRack rack : racks.keySet()) {
      boolean foundTask = false;

      for (SingularityTaskId activeTaskId : activeTaskIds) {
        if (rack.getId().equals(activeTaskId.getRackId())) {
          foundTask = true;
        }

        if (matchingTaskIds.contains(activeTaskId)) {
          continue;
        }

        if (rack.getId().equals(activeTaskId.getRackId())) {
          Optional<SingularityTask> maybeTask = taskManager.getTask(activeTaskId);
          cleanupTaskDueToDecomission(
              requestIdsToReschedule, matchingTaskIds, maybeTask.get(), rack);
        }
      }

      if (!foundTask) {
        racks.put(rack, MachineState.DECOMMISSIONED);
      }
    }

    for (String requestId : requestIdsToReschedule) {
      LOG.trace("Rescheduling request {} due to decomissions", requestId);

      Optional<String> maybeDeployId = deployManager.getInUseDeployId(requestId);

      if (maybeDeployId.isPresent()) {
        requestManager.addToPendingQueue(
            new SingularityPendingRequest(
                requestId, maybeDeployId.get(), start, PendingType.DECOMISSIONED_SLAVE_OR_RACK));
      } else {
        LOG.warn("Not rescheduling a request ({}) because of no active deploy", requestId);
      }
    }

    changeState(slaves, slaveManager);
    changeState(racks, rackManager);

    if (slaves.isEmpty()
        && racks.isEmpty()
        && requestIdsToReschedule.isEmpty()
        && matchingTaskIds.isEmpty()) {
      LOG.trace("Decomission check found nothing");
    } else {
      LOG.info(
          "Found {} decomissioning slaves, {} decomissioning racks, rescheduling {} requests and scheduling {} tasks for cleanup in {}",
          slaves.size(),
          racks.size(),
          requestIdsToReschedule.size(),
          matchingTaskIds.size(),
          JavaUtils.duration(start));
    }
  }