public static Map<String, Object> convertEdgeProperty(EdgeProperty edge) { Map<String, Object> jsonDescriptor = new HashMap<String, Object>(); jsonDescriptor.put(DATA_MOVEMENT_TYPE_KEY, edge.getDataMovementType().name()); jsonDescriptor.put(DATA_SOURCE_TYPE_KEY, edge.getDataSourceType().name()); jsonDescriptor.put(SCHEDULING_TYPE_KEY, edge.getSchedulingType().name()); jsonDescriptor.put(EDGE_SOURCE_CLASS_KEY, edge.getEdgeSource().getClassName()); jsonDescriptor.put(EDGE_DESTINATION_CLASS_KEY, edge.getEdgeDestination().getClassName()); String history = edge.getEdgeSource().getHistoryText(); if (history != null) { jsonDescriptor.put(OUTPUT_USER_PAYLOAD_AS_TEXT, history); } history = edge.getEdgeDestination().getHistoryText(); if (history != null) { jsonDescriptor.put(INPUT_USER_PAYLOAD_AS_TEXT, history); } EdgeManagerPluginDescriptor descriptor = edge.getEdgeManagerDescriptor(); if (descriptor != null) { jsonDescriptor.put(EDGE_MANAGER_CLASS_KEY, descriptor.getClassName()); if (descriptor.getHistoryText() != null && !descriptor.getHistoryText().isEmpty()) { jsonDescriptor.put(USER_PAYLOAD_AS_TEXT, descriptor.getHistoryText()); } } return jsonDescriptor; }
public InputSpec getDestinationSpec(int destinationTaskIndex) { return new InputSpec( sourceVertex.getName(), edgeProperty.getEdgeDestination(), edgeManager.getNumDestinationTaskInputs( sourceVertex.getTotalTasks(), destinationTaskIndex)); }
@SuppressWarnings("rawtypes") public Edge(EdgeProperty edgeProperty, EventHandler eventHandler) { this.edgeProperty = edgeProperty; this.eventHandler = eventHandler; switch (edgeProperty.getDataMovementType()) { case ONE_TO_ONE: edgeManager = new OneToOneEdgeManager(); break; case BROADCAST: edgeManager = new BroadcastEdgeManager(); break; case SCATTER_GATHER: edgeManager = new ScatterGatherEdgeManager(); break; default: String message = "Unknown edge data movement type: " + edgeProperty.getDataMovementType(); throw new TezUncheckedException(message); } }
@Test(timeout = 10000) public void testBasicSpeculationPerVertexConf() throws Exception { DAG dag = DAG.create("test"); String vNameNoSpec = "A"; String vNameSpec = "B"; Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5); Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5); vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false"); dag.addVertex(vA); dag.addVertex(vB); // min/max src fraction is set to 1. So vertices will run sequentially dag.addEdge( Edge.create( vA, vB, EdgeProperty.create( DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"), InputDescriptor.create("I")))); MockTezClient tezClient = createTezSession(); DAGClient dagClient = tezClient.submitDAG(dag); DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG(); TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId(); TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId(); // original attempt is killed and speculative one is successful TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0); TezTaskAttemptID noSpecTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0); // cause speculation trigger for both mockLauncher.setStatusUpdatesForTask(killedTaId, 100); mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100); mockLauncher.startScheduling(true); dagClient.waitForCompletion(); Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState()); org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId); org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec); // speculation for vA but not for vB Assert.assertTrue( vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue() > 0); Assert.assertEquals( 0, vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS).getValue()); tezClient.stop(); }
/** * Compute optimal parallelism needed for the job * * @return true (if parallelism is determined), false otherwise */ @VisibleForTesting boolean determineParallelismAndApply() { if (numBipartiteSourceTasksCompleted == 0) { return true; } if (numVertexManagerEventsReceived == 0) { return true; } int currentParallelism = pendingTasks.size(); /** * When overall completed output size is not even equal to desiredTaskInputSize, we can wait for * some more data to be available to determine better parallelism until max.fraction is reached. * min.fraction is just a hint to the framework and need not be honored strictly in this case. */ boolean canDetermineParallelismLater = (completedSourceTasksOutputSize < desiredTaskInputDataSize) && (numBipartiteSourceTasksCompleted < (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction)); if (canDetermineParallelismLater) { LOG.info( "Defer scheduling tasks; vertex=" + getContext().getVertexName() + ", totalNumBipartiteSourceTasks=" + totalNumBipartiteSourceTasks + ", completedSourceTasksOutputSize=" + completedSourceTasksOutputSize + ", numVertexManagerEventsReceived=" + numVertexManagerEventsReceived + ", numBipartiteSourceTasksCompleted=" + numBipartiteSourceTasksCompleted + ", maxThreshold=" + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction)); return false; } long expectedTotalSourceTasksOutputSize = (totalNumBipartiteSourceTasks * completedSourceTasksOutputSize) / numVertexManagerEventsReceived; int desiredTaskParallelism = (int) ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1) / desiredTaskInputDataSize); if (desiredTaskParallelism < minTaskParallelism) { desiredTaskParallelism = minTaskParallelism; } if (desiredTaskParallelism >= currentParallelism) { return true; } // most shufflers will be assigned this range basePartitionRange = currentParallelism / desiredTaskParallelism; if (basePartitionRange <= 1) { // nothing to do if range is equal 1 partition. shuffler does it by default return true; } int numShufflersWithBaseRange = currentParallelism / basePartitionRange; remainderRangeForLastShuffler = currentParallelism % basePartitionRange; int finalTaskParallelism = (remainderRangeForLastShuffler > 0) ? (numShufflersWithBaseRange + 1) : (numShufflersWithBaseRange); LOG.info( "Reduce auto parallelism for vertex: " + getContext().getVertexName() + " to " + finalTaskParallelism + " from " + pendingTasks.size() + " . Expected output: " + expectedTotalSourceTasksOutputSize + " based on actual output: " + completedSourceTasksOutputSize + " from " + numVertexManagerEventsReceived + " vertex manager events. " + " desiredTaskInputSize: " + desiredTaskInputDataSize + " max slow start tasks:" + (totalNumBipartiteSourceTasks * slowStartMaxSrcCompletionFraction) + " num sources completed:" + numBipartiteSourceTasksCompleted); if (finalTaskParallelism < currentParallelism) { // final parallelism is less than actual parallelism Map<String, EdgeProperty> edgeProperties = new HashMap<String, EdgeProperty>(bipartiteSources); Iterable<Map.Entry<String, SourceVertexInfo>> bipartiteItr = getBipartiteInfo(); for (Map.Entry<String, SourceVertexInfo> entry : bipartiteItr) { String vertex = entry.getKey(); EdgeProperty oldEdgeProp = entry.getValue().edgeProperty; // use currentParallelism for numSourceTasks to maintain original state // for the source tasks CustomShuffleEdgeManagerConfig edgeManagerConfig = new CustomShuffleEdgeManagerConfig( currentParallelism, finalTaskParallelism, basePartitionRange, ((remainderRangeForLastShuffler > 0) ? remainderRangeForLastShuffler : basePartitionRange)); EdgeManagerPluginDescriptor edgeManagerDescriptor = EdgeManagerPluginDescriptor.create(CustomShuffleEdgeManager.class.getName()); edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload()); EdgeProperty newEdgeProp = EdgeProperty.create( edgeManagerDescriptor, oldEdgeProp.getDataSourceType(), oldEdgeProp.getSchedulingType(), oldEdgeProp.getEdgeSource(), oldEdgeProp.getEdgeDestination()); edgeProperties.put(vertex, newEdgeProp); } getContext().reconfigureVertex(finalTaskParallelism, null, edgeProperties); updatePendingTasks(); configureTargetMapping(finalTaskParallelism); } return true; }
SourceVertexInfo(EdgeProperty edgeProperty) { this.edgeProperty = edgeProperty; if (edgeProperty.getDataMovementType() == DataMovementType.SCATTER_GATHER) { finishedTaskSet = new BitSet(); } }
public OutputSpec getSourceSpec(int sourceTaskIndex) { return new OutputSpec( destinationVertex.getName(), edgeProperty.getEdgeSource(), edgeManager.getNumSourceTaskOutputs(destinationVertex.getTotalTasks(), sourceTaskIndex)); }