コード例 #1
0
 @Override
 public void initialize() {
   // Nothing to do. This class isn't currently designed to be used at the DAG API level.
   UserPayload userPayload = getContext().getUserPayload();
   if (userPayload == null
       || userPayload.getPayload() == null
       || userPayload.getPayload().limit() == 0) {
     throw new RuntimeException(
         "Could not initialize CustomShuffleEdgeManager" + " from provided user payload");
   }
   CustomShuffleEdgeManagerConfig config;
   try {
     config = CustomShuffleEdgeManagerConfig.fromUserPayload(userPayload);
   } catch (InvalidProtocolBufferException e) {
     throw new RuntimeException(
         "Could not initialize CustomShuffleEdgeManager" + " from provided user payload", e);
   }
   this.numSourceTaskOutputs = config.numSourceTaskOutputs;
   this.numDestinationTasks = config.numDestinationTasks;
   this.basePartitionRange = config.basePartitionRange;
   this.remainderRangeForLastShuffler = config.remainderRangeForLastShuffler;
   this.numSourceTasks = getContext().getSourceVertexNumTasks();
   Preconditions.checkState(
       this.numDestinationTasks == getContext().getDestinationVertexNumTasks());
 }
コード例 #2
0
 @Override
 public void initialize(EdgeManagerContext edgeManagerContext) {
   // Nothing to do. This class isn't currently designed to be used at the DAG API level.
   byte[] userPayload = edgeManagerContext.getUserPayload();
   if (userPayload == null || userPayload.length == 0) {
     throw new RuntimeException(
         "Could not initialize CustomShuffleEdgeManager" + " from provided user payload");
   }
   CustomShuffleEdgeManagerConfig config;
   try {
     config = CustomShuffleEdgeManagerConfig.fromUserPayload(userPayload);
   } catch (InvalidProtocolBufferException e) {
     throw new RuntimeException(
         "Could not initialize CustomShuffleEdgeManager" + " from provided user payload", e);
   }
   this.numSourceTaskOutputs = config.numSourceTaskOutputs;
   this.numDestinationTasks = config.numDestinationTasks;
   this.basePartitionRange = config.basePartitionRange;
   this.remainderRangeForLastShuffler = config.remainderRangeForLastShuffler;
   this.numSourceTasks = config.numSourceTasks;
 }
コード例 #3
0
  void determineParallelismAndApply() {
    if (numSourceTasksCompleted == 0) {
      return;
    }

    if (numVertexManagerEventsReceived == 0) {
      return;
    }

    int currentParallelism = pendingTasks.size();
    long expectedTotalSourceTasksOutputSize =
        (numSourceTasks * completedSourceTasksOutputSize) / numVertexManagerEventsReceived;
    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return;
    }

    // most shufflers will be assigned this range
    int basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    int remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + context.getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeManagerDescriptor> edgeManagers =
          new HashMap<String, EdgeManagerDescriptor>(bipartiteSources.size());
      for (String vertex : bipartiteSources.keySet()) {
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                numSourceTasks,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerDescriptor edgeManagerDescriptor =
            new EdgeManagerDescriptor(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        edgeManagers.put(vertex, edgeManagerDescriptor);
      }

      context.setVertexParallelism(finalTaskParallelism, null, edgeManagers, null);
      updatePendingTasks();
    }
  }
コード例 #4
0
  /**
   * Compute optimal parallelism needed for the job
   *
   * @return true (if parallelism is determined), false otherwise
   */
  @VisibleForTesting
  boolean determineParallelismAndApply() {
    if (numBipartiteSourceTasksCompleted == 0) {
      return true;
    }

    if (numVertexManagerEventsReceived == 0) {
      return true;
    }

    int currentParallelism = pendingTasks.size();
    /**
     * When overall completed output size is not even equal to desiredTaskInputSize, we can wait for
     * some more data to be available to determine better parallelism until max.fraction is reached.
     * min.fraction is just a hint to the framework and need not be honored strictly in this case.
     */
    boolean canDetermineParallelismLater =
        (completedSourceTasksOutputSize < desiredTaskInputDataSize)
            && (numBipartiteSourceTasksCompleted
                < (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
    if (canDetermineParallelismLater) {
      LOG.info(
          "Defer scheduling tasks; vertex="
              + getContext().getVertexName()
              + ", totalNumBipartiteSourceTasks="
              + totalNumBipartiteSourceTasks
              + ", completedSourceTasksOutputSize="
              + completedSourceTasksOutputSize
              + ", numVertexManagerEventsReceived="
              + numVertexManagerEventsReceived
              + ", numBipartiteSourceTasksCompleted="
              + numBipartiteSourceTasksCompleted
              + ", maxThreshold="
              + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
      return false;
    }

    long expectedTotalSourceTasksOutputSize =
        (totalNumBipartiteSourceTasks * completedSourceTasksOutputSize)
            / numVertexManagerEventsReceived;

    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return true;
    }

    // most shufflers will be assigned this range
    basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return true;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + getContext().getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize
            + " max slow start tasks:"
            + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction)
            + " num sources completed:"
            + numBipartiteSourceTasksCompleted);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeProperty> edgeProperties =
          new HashMap<String, EdgeProperty>(bipartiteSources);
      Iterable<Map.Entry<String, SourceVertexInfo>> bipartiteItr = getBipartiteInfo();
      for (Map.Entry<String, SourceVertexInfo> entry : bipartiteItr) {
        String vertex = entry.getKey();
        EdgeProperty oldEdgeProp = entry.getValue().edgeProperty;
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerPluginDescriptor edgeManagerDescriptor =
            EdgeManagerPluginDescriptor.create(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        EdgeProperty newEdgeProp =
            EdgeProperty.create(
                edgeManagerDescriptor,
                oldEdgeProp.getDataSourceType(),
                oldEdgeProp.getSchedulingType(),
                oldEdgeProp.getEdgeSource(),
                oldEdgeProp.getEdgeDestination());
        edgeProperties.put(vertex, newEdgeProp);
      }

      getContext().reconfigureVertex(finalTaskParallelism, null, edgeProperties);
      updatePendingTasks();
      configureTargetMapping(finalTaskParallelism);
    }
    return true;
  }