Example #1
0
  public static Map<String, Object> convertEdgeProperty(EdgeProperty edge) {
    Map<String, Object> jsonDescriptor = new HashMap<String, Object>();

    jsonDescriptor.put(DATA_MOVEMENT_TYPE_KEY, edge.getDataMovementType().name());
    jsonDescriptor.put(DATA_SOURCE_TYPE_KEY, edge.getDataSourceType().name());
    jsonDescriptor.put(SCHEDULING_TYPE_KEY, edge.getSchedulingType().name());
    jsonDescriptor.put(EDGE_SOURCE_CLASS_KEY, edge.getEdgeSource().getClassName());
    jsonDescriptor.put(EDGE_DESTINATION_CLASS_KEY, edge.getEdgeDestination().getClassName());
    String history = edge.getEdgeSource().getHistoryText();
    if (history != null) {
      jsonDescriptor.put(OUTPUT_USER_PAYLOAD_AS_TEXT, history);
    }
    history = edge.getEdgeDestination().getHistoryText();
    if (history != null) {
      jsonDescriptor.put(INPUT_USER_PAYLOAD_AS_TEXT, history);
    }
    EdgeManagerPluginDescriptor descriptor = edge.getEdgeManagerDescriptor();
    if (descriptor != null) {
      jsonDescriptor.put(EDGE_MANAGER_CLASS_KEY, descriptor.getClassName());
      if (descriptor.getHistoryText() != null && !descriptor.getHistoryText().isEmpty()) {
        jsonDescriptor.put(USER_PAYLOAD_AS_TEXT, descriptor.getHistoryText());
      }
    }
    return jsonDescriptor;
  }
 public InputSpec getDestinationSpec(int destinationTaskIndex) {
   return new InputSpec(
       sourceVertex.getName(),
       edgeProperty.getEdgeDestination(),
       edgeManager.getNumDestinationTaskInputs(
           sourceVertex.getTotalTasks(), destinationTaskIndex));
 }
 @SuppressWarnings("rawtypes")
 public Edge(EdgeProperty edgeProperty, EventHandler eventHandler) {
   this.edgeProperty = edgeProperty;
   this.eventHandler = eventHandler;
   switch (edgeProperty.getDataMovementType()) {
     case ONE_TO_ONE:
       edgeManager = new OneToOneEdgeManager();
       break;
     case BROADCAST:
       edgeManager = new BroadcastEdgeManager();
       break;
     case SCATTER_GATHER:
       edgeManager = new ScatterGatherEdgeManager();
       break;
     default:
       String message = "Unknown edge data movement type: " + edgeProperty.getDataMovementType();
       throw new TezUncheckedException(message);
   }
 }
Example #4
0
  @Test(timeout = 10000)
  public void testBasicSpeculationPerVertexConf() throws Exception {
    DAG dag = DAG.create("test");
    String vNameNoSpec = "A";
    String vNameSpec = "B";
    Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5);
    Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5);
    vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false");
    dag.addVertex(vA);
    dag.addVertex(vB);
    // min/max src fraction is set to 1. So vertices will run sequentially
    dag.addEdge(
        Edge.create(
            vA,
            vB,
            EdgeProperty.create(
                DataMovementType.SCATTER_GATHER,
                DataSourceType.PERSISTED,
                SchedulingType.SEQUENTIAL,
                OutputDescriptor.create("O"),
                InputDescriptor.create("I"))));

    MockTezClient tezClient = createTezSession();

    DAGClient dagClient = tezClient.submitDAG(dag);
    DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
    TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId();
    TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId();
    // original attempt is killed and speculative one is successful
    TezTaskAttemptID killedTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
    TezTaskAttemptID noSpecTaId =
        TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0);

    // cause speculation trigger for both
    mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
    mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100);

    mockLauncher.startScheduling(true);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
    org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId);
    org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec);
    // speculation for vA but not for vB
    Assert.assertTrue(
        vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue() > 0);
    Assert.assertEquals(
        0, vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue());

    tezClient.stop();
  }
Example #5
0
  /**
   * Compute optimal parallelism needed for the job
   *
   * @return true (if parallelism is determined), false otherwise
   */
  @VisibleForTesting
  boolean determineParallelismAndApply() {
    if (numBipartiteSourceTasksCompleted == 0) {
      return true;
    }

    if (numVertexManagerEventsReceived == 0) {
      return true;
    }

    int currentParallelism = pendingTasks.size();
    /**
     * When overall completed output size is not even equal to desiredTaskInputSize, we can wait for
     * some more data to be available to determine better parallelism until max.fraction is reached.
     * min.fraction is just a hint to the framework and need not be honored strictly in this case.
     */
    boolean canDetermineParallelismLater =
        (completedSourceTasksOutputSize < desiredTaskInputDataSize)
            && (numBipartiteSourceTasksCompleted
                < (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
    if (canDetermineParallelismLater) {
      LOG.info(
          "Defer scheduling tasks; vertex="
              + getContext().getVertexName()
              + ", totalNumBipartiteSourceTasks="
              + totalNumBipartiteSourceTasks
              + ", completedSourceTasksOutputSize="
              + completedSourceTasksOutputSize
              + ", numVertexManagerEventsReceived="
              + numVertexManagerEventsReceived
              + ", numBipartiteSourceTasksCompleted="
              + numBipartiteSourceTasksCompleted
              + ", maxThreshold="
              + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction));
      return false;
    }

    long expectedTotalSourceTasksOutputSize =
        (totalNumBipartiteSourceTasks * completedSourceTasksOutputSize)
            / numVertexManagerEventsReceived;

    int desiredTaskParallelism =
        (int)
            ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1)
                / desiredTaskInputDataSize);
    if (desiredTaskParallelism < minTaskParallelism) {
      desiredTaskParallelism = minTaskParallelism;
    }

    if (desiredTaskParallelism >= currentParallelism) {
      return true;
    }

    // most shufflers will be assigned this range
    basePartitionRange = currentParallelism / desiredTaskParallelism;

    if (basePartitionRange <= 1) {
      // nothing to do if range is equal 1 partition. shuffler does it by default
      return true;
    }

    int numShufflersWithBaseRange = currentParallelism / basePartitionRange;
    remainderRangeForLastShuffler = currentParallelism % basePartitionRange;

    int finalTaskParallelism =
        (remainderRangeForLastShuffler > 0)
            ? (numShufflersWithBaseRange + 1)
            : (numShufflersWithBaseRange);

    LOG.info(
        "Reduce auto parallelism for vertex: "
            + getContext().getVertexName()
            + " to "
            + finalTaskParallelism
            + " from "
            + pendingTasks.size()
            + " . Expected output: "
            + expectedTotalSourceTasksOutputSize
            + " based on actual output: "
            + completedSourceTasksOutputSize
            + " from "
            + numVertexManagerEventsReceived
            + " vertex manager events. "
            + " desiredTaskInputSize: "
            + desiredTaskInputDataSize
            + " max slow start tasks:"
            + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction)
            + " num sources completed:"
            + numBipartiteSourceTasksCompleted);

    if (finalTaskParallelism < currentParallelism) {
      // final parallelism is less than actual parallelism
      Map<String, EdgeProperty> edgeProperties =
          new HashMap<String, EdgeProperty>(bipartiteSources);
      Iterable<Map.Entry<String, SourceVertexInfo>> bipartiteItr = getBipartiteInfo();
      for (Map.Entry<String, SourceVertexInfo> entry : bipartiteItr) {
        String vertex = entry.getKey();
        EdgeProperty oldEdgeProp = entry.getValue().edgeProperty;
        // use currentParallelism for numSourceTasks to maintain original state
        // for the source tasks
        CustomShuffleEdgeManagerConfig edgeManagerConfig =
            new CustomShuffleEdgeManagerConfig(
                currentParallelism,
                finalTaskParallelism,
                basePartitionRange,
                ((remainderRangeForLastShuffler > 0)
                    ? remainderRangeForLastShuffler
                    : basePartitionRange));
        EdgeManagerPluginDescriptor edgeManagerDescriptor =
            EdgeManagerPluginDescriptor.create(CustomShuffleEdgeManager.class.getName());
        edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload());
        EdgeProperty newEdgeProp =
            EdgeProperty.create(
                edgeManagerDescriptor,
                oldEdgeProp.getDataSourceType(),
                oldEdgeProp.getSchedulingType(),
                oldEdgeProp.getEdgeSource(),
                oldEdgeProp.getEdgeDestination());
        edgeProperties.put(vertex, newEdgeProp);
      }

      getContext().reconfigureVertex(finalTaskParallelism, null, edgeProperties);
      updatePendingTasks();
      configureTargetMapping(finalTaskParallelism);
    }
    return true;
  }
Example #6
0
 SourceVertexInfo(EdgeProperty edgeProperty) {
   this.edgeProperty = edgeProperty;
   if (edgeProperty.getDataMovementType() == DataMovementType.SCATTER_GATHER) {
     finishedTaskSet = new BitSet();
   }
 }
 public OutputSpec getSourceSpec(int sourceTaskIndex) {
   return new OutputSpec(
       destinationVertex.getName(),
       edgeProperty.getEdgeSource(),
       edgeManager.getNumSourceTaskOutputs(destinationVertex.getTotalTasks(), sourceTaskIndex));
 }