Beispiel #1
0
  /*
   * Helper function to create an edge property from an edge type.
   */
  private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp) throws IOException {
    DataMovementType dataMovementType;
    Class logicalInputClass;
    Class logicalOutputClass;

    EdgeProperty edgeProperty = null;
    EdgeType edgeType = edgeProp.getEdgeType();
    switch (edgeType) {
      case BROADCAST_EDGE:
        dataMovementType = DataMovementType.BROADCAST;
        logicalOutputClass = OnFileUnorderedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        break;

      case CUSTOM_EDGE:
        dataMovementType = DataMovementType.CUSTOM;
        logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        EdgeManagerDescriptor edgeDesc =
            new EdgeManagerDescriptor(CustomPartitionEdge.class.getName());
        CustomEdgeConfiguration edgeConf =
            new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null);
        DataOutputBuffer dob = new DataOutputBuffer();
        edgeConf.write(dob);
        byte[] userPayload = dob.getData();
        edgeDesc.setUserPayload(userPayload);
        edgeProperty =
            new EdgeProperty(
                edgeDesc,
                DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                new OutputDescriptor(logicalOutputClass.getName()),
                new InputDescriptor(logicalInputClass.getName()));
        break;

      case CUSTOM_SIMPLE_EDGE:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class;
        logicalInputClass = ShuffledUnorderedKVInput.class;
        break;

      case SIMPLE_EDGE:
      default:
        dataMovementType = DataMovementType.SCATTER_GATHER;
        logicalOutputClass = OnFileSortedOutput.class;
        logicalInputClass = ShuffledMergedInputLegacy.class;
        break;
    }

    if (edgeProperty == null) {
      edgeProperty =
          new EdgeProperty(
              dataMovementType,
              DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL,
              new OutputDescriptor(logicalOutputClass.getName()),
              new InputDescriptor(logicalInputClass.getName()));
    }

    return edgeProperty;
  }
  void determineParallelismAndApply() {
    if (numSourceTasksCompleted == 0) {
      return;
    }

    if (numVertexManagerEventsReceived == 0) {
      return;
    }

    int currentParallelism = pendingTasks.size();
    long expectedTotalSourceTasksOutputSize =
        (numSourceTasks * completedSourceTasksOutputSize) / numVertexManagerEventsReceived;
    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return;
    }

    // most shufflers will be assigned this range
    int basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    int remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + context.getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeManagerDescriptor> edgeManagers =
          new HashMap<String, EdgeManagerDescriptor>(bipartiteSources.size());
      for (String vertex : bipartiteSources.keySet()) {
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                numSourceTasks,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerDescriptor edgeManagerDescriptor =
            new EdgeManagerDescriptor(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        edgeManagers.put(vertex, edgeManagerDescriptor);
      }

      context.setVertexParallelism(finalTaskParallelism, null, edgeManagers, null);
      updatePendingTasks();
    }
  }