コード例 #1
0
ファイル: DAGUtils.java プロジェクト: willzxd/HiveMRvsHiveTez
  public static Map<String, Object> convertEdgeProperty(EdgeProperty edge) {
    Map<String, Object> jsonDescriptor = new HashMap<String, Object>();

    jsonDescriptor.put(DATA_MOVEMENT_TYPE_KEY, edge.getDataMovementType().name());
    jsonDescriptor.put(DATA_SOURCE_TYPE_KEY, edge.getDataSourceType().name());
    jsonDescriptor.put(SCHEDULING_TYPE_KEY, edge.getSchedulingType().name());
    jsonDescriptor.put(EDGE_SOURCE_CLASS_KEY, edge.getEdgeSource().getClassName());
    jsonDescriptor.put(EDGE_DESTINATION_CLASS_KEY, edge.getEdgeDestination().getClassName());
    String history = edge.getEdgeSource().getHistoryText();
    if (history != null) {
      jsonDescriptor.put(OUTPUT_USER_PAYLOAD_AS_TEXT, history);
    }
    history = edge.getEdgeDestination().getHistoryText();
    if (history != null) {
      jsonDescriptor.put(INPUT_USER_PAYLOAD_AS_TEXT, history);
    }
    EdgeManagerPluginDescriptor descriptor = edge.getEdgeManagerDescriptor();
    if (descriptor != null) {
      jsonDescriptor.put(EDGE_MANAGER_CLASS_KEY, descriptor.getClassName());
      if (descriptor.getHistoryText() != null && !descriptor.getHistoryText().isEmpty()) {
        jsonDescriptor.put(USER_PAYLOAD_AS_TEXT, descriptor.getHistoryText());
      }
    }
    return jsonDescriptor;
  }
コード例 #2
0
  /**
   * Compute optimal parallelism needed for the job
   *
   * @return true (if parallelism is determined), false otherwise
   */
  @VisibleForTesting
  boolean determineParallelismAndApply() {
    if (numBipartiteSourceTasksCompleted == 0) {
      return true;
    }

    if (numVertexManagerEventsReceived == 0) {
      return true;
    }

    int currentParallelism = pendingTasks.size();
    /**
     * When overall completed output size is not even equal to desiredTaskInputSize, we can wait for
     * some more data to be available to determine better parallelism until max.fraction is reached.
     * min.fraction is just a hint to the framework and need not be honored strictly in this case.
     */
    boolean canDetermineParallelismLater =
        (completedSourceTasksOutputSize < desiredTaskInputDataSize)
            && (numBipartiteSourceTasksCompleted
                < (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
    if (canDetermineParallelismLater) {
      LOG.info(
          "Defer scheduling tasks; vertex="
              + getContext().getVertexName()
              + ", totalNumBipartiteSourceTasks="
              + totalNumBipartiteSourceTasks
              + ", completedSourceTasksOutputSize="
              + completedSourceTasksOutputSize
              + ", numVertexManagerEventsReceived="
              + numVertexManagerEventsReceived
              + ", numBipartiteSourceTasksCompleted="
              + numBipartiteSourceTasksCompleted
              + ", maxThreshold="
              + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
      return false;
    }

    long expectedTotalSourceTasksOutputSize =
        (totalNumBipartiteSourceTasks * completedSourceTasksOutputSize)
            / numVertexManagerEventsReceived;

    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return true;
    }

    // most shufflers will be assigned this range
    basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return true;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + getContext().getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize
            + " max slow start tasks:"
            + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction)
            + " num sources completed:"
            + numBipartiteSourceTasksCompleted);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeProperty> edgeProperties =
          new HashMap<String, EdgeProperty>(bipartiteSources);
      Iterable<Map.Entry<String, SourceVertexInfo>> bipartiteItr = getBipartiteInfo();
      for (Map.Entry<String, SourceVertexInfo> entry : bipartiteItr) {
        String vertex = entry.getKey();
        EdgeProperty oldEdgeProp = entry.getValue().edgeProperty;
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerPluginDescriptor edgeManagerDescriptor =
            EdgeManagerPluginDescriptor.create(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        EdgeProperty newEdgeProp =
            EdgeProperty.create(
                edgeManagerDescriptor,
                oldEdgeProp.getDataSourceType(),
                oldEdgeProp.getSchedulingType(),
                oldEdgeProp.getEdgeSource(),
                oldEdgeProp.getEdgeDestination());
        edgeProperties.put(vertex, newEdgeProp);
      }

      getContext().reconfigureVertex(finalTaskParallelism, null, edgeProperties);
      updatePendingTasks();
      configureTargetMapping(finalTaskParallelism);
    }
    return true;
  }