Ejemplo n.º 1
0
  @Override
  public void onVertexStarted(Map<String, List<Integer>> completions) {
    pendingTasks =
        Lists.newArrayListWithCapacity(context.getVertexNumTasks(context.getVertexName()));
    // track the tasks in this vertex
    updatePendingTasks();
    updateSourceTaskCount();

    LOG.info(
        "OnVertexStarted vertex: "
            + context.getVertexName()
            + " with "
            + numSourceTasks
            + " source tasks and "
            + totalTasksToSchedule
            + " pending tasks");

    if (completions != null) {
      for (Map.Entry<String, List<Integer>> entry : completions.entrySet()) {
        for (Integer taskId : entry.getValue()) {
          onSourceTaskCompleted(entry.getKey(), taskId);
        }
      }
    }
    // for the special case when source has 0 tasks or min fraction == 0
    schedulePendingTasks();
  }
Ejemplo n.º 2
0
 void updatePendingTasks() {
   pendingTasks.clear();
   for (int i = 0; i < context.getVertexNumTasks(context.getVertexName()); ++i) {
     pendingTasks.add(new Integer(i));
   }
   totalTasksToSchedule = pendingTasks.size();
 }
Ejemplo n.º 3
0
  void schedulePendingTasks() {
    int numPendingTasks = pendingTasks.size();
    if (numPendingTasks == 0) {
      return;
    }

    if (numSourceTasksCompleted == numSourceTasks && numPendingTasks > 0) {
      LOG.info(
          "All source tasks assigned. "
              + "Ramping up "
              + numPendingTasks
              + " remaining tasks for vertex: "
              + context.getVertexName());
      schedulePendingTasks(numPendingTasks);
      return;
    }

    float completedSourceTaskFraction = 0f;
    if (numSourceTasks != 0) { // support for 0 source tasks
      completedSourceTaskFraction = (float) numSourceTasksCompleted / numSourceTasks;
    } else {
      completedSourceTaskFraction = 1;
    }

    // start scheduling when source tasks completed fraction is more than min.
    // linearly increase the number of scheduled tasks such that all tasks are
    // scheduled when source tasks completed fraction reaches max
    float tasksFractionToSchedule = 1;
    float percentRange = slowStartMaxSrcCompletionFraction - slowStartMinSrcCompletionFraction;
    if (percentRange > 0) {
      tasksFractionToSchedule =
          (completedSourceTaskFraction - slowStartMinSrcCompletionFraction) / percentRange;
    } else {
      // min and max are equal. schedule 100% on reaching min
      if (completedSourceTaskFraction < slowStartMinSrcCompletionFraction) {
        tasksFractionToSchedule = 0;
      }
    }

    if (tasksFractionToSchedule > 1) {
      tasksFractionToSchedule = 1;
    } else if (tasksFractionToSchedule < 0) {
      tasksFractionToSchedule = 0;
    }

    int numTasksToSchedule =
        ((int) (tasksFractionToSchedule * totalTasksToSchedule)
            - (totalTasksToSchedule - numPendingTasks));

    if (numTasksToSchedule > 0) {
      // numTasksToSchedule can be -ve if numSourceTasksCompleted does not
      // does not increase monotonically
      LOG.info(
          "Scheduling "
              + numTasksToSchedule
              + " tasks for vertex: "
              + context.getVertexName()
              + " with totalTasks: "
              + totalTasksToSchedule
              + ". "
              + numSourceTasksCompleted
              + " source tasks completed out of "
              + numSourceTasks
              + ". SourceTaskCompletedFraction: "
              + completedSourceTaskFraction
              + " min: "
              + slowStartMinSrcCompletionFraction
              + " max: "
              + slowStartMaxSrcCompletionFraction);
      schedulePendingTasks(numTasksToSchedule);
    }
  }
Ejemplo n.º 4
0
  void determineParallelismAndApply() {
    if (numSourceTasksCompleted == 0) {
      return;
    }

    if (numVertexManagerEventsReceived == 0) {
      return;
    }

    int currentParallelism = pendingTasks.size();
    long expectedTotalSourceTasksOutputSize =
        (numSourceTasks * completedSourceTasksOutputSize) / numVertexManagerEventsReceived;
    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return;
    }

    // most shufflers will be assigned this range
    int basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    int remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + context.getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeManagerDescriptor> edgeManagers =
          new HashMap<String, EdgeManagerDescriptor>(bipartiteSources.size());
      for (String vertex : bipartiteSources.keySet()) {
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                numSourceTasks,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerDescriptor edgeManagerDescriptor =
            new EdgeManagerDescriptor(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        edgeManagers.put(vertex, edgeManagerDescriptor);
      }

      context.setVertexParallelism(finalTaskParallelism, null, edgeManagers, null);
      updatePendingTasks();
    }
  }