/* * Helper function to create an edge property from an edge type. */ private EdgeProperty createEdgeProperty(TezEdgeProperty edgeProp) throws IOException { DataMovementType dataMovementType; Class logicalInputClass; Class logicalOutputClass; EdgeProperty edgeProperty = null; EdgeType edgeType = edgeProp.getEdgeType(); switch (edgeType) { case BROADCAST_EDGE: dataMovementType = DataMovementType.BROADCAST; logicalOutputClass = OnFileUnorderedKVOutput.class; logicalInputClass = ShuffledUnorderedKVInput.class; break; case CUSTOM_EDGE: dataMovementType = DataMovementType.CUSTOM; logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class; logicalInputClass = ShuffledUnorderedKVInput.class; EdgeManagerDescriptor edgeDesc = new EdgeManagerDescriptor(CustomPartitionEdge.class.getName()); CustomEdgeConfiguration edgeConf = new CustomEdgeConfiguration(edgeProp.getNumBuckets(), null); DataOutputBuffer dob = new DataOutputBuffer(); edgeConf.write(dob); byte[] userPayload = dob.getData(); edgeDesc.setUserPayload(userPayload); edgeProperty = new EdgeProperty( edgeDesc, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, new OutputDescriptor(logicalOutputClass.getName()), new InputDescriptor(logicalInputClass.getName())); break; case CUSTOM_SIMPLE_EDGE: dataMovementType = DataMovementType.SCATTER_GATHER; logicalOutputClass = OnFileUnorderedPartitionedKVOutput.class; logicalInputClass = ShuffledUnorderedKVInput.class; break; case SIMPLE_EDGE: default: dataMovementType = DataMovementType.SCATTER_GATHER; logicalOutputClass = OnFileSortedOutput.class; logicalInputClass = ShuffledMergedInputLegacy.class; break; } if (edgeProperty == null) { edgeProperty = new EdgeProperty( dataMovementType, DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, new OutputDescriptor(logicalOutputClass.getName()), new InputDescriptor(logicalInputClass.getName())); } return edgeProperty; }
void determineParallelismAndApply() { if (numSourceTasksCompleted == 0) { return; } if (numVertexManagerEventsReceived == 0) { return; } int currentParallelism = pendingTasks.size(); long expectedTotalSourceTasksOutputSize = (numSourceTasks * completedSourceTasksOutputSize) / numVertexManagerEventsReceived; int desiredTaskParallelism = (int) ((expectedTotalSourceTasksOutputSize + desiredTaskInputDataSize - 1) / desiredTaskInputDataSize); if (desiredTaskParallelism < minTaskParallelism) { desiredTaskParallelism = minTaskParallelism; } if (desiredTaskParallelism >= currentParallelism) { return; } // most shufflers will be assigned this range int basePartitionRange = currentParallelism / desiredTaskParallelism; if (basePartitionRange <= 1) { // nothing to do if range is equal 1 partition. shuffler does it by default return; } int numShufflersWithBaseRange = currentParallelism / basePartitionRange; int remainderRangeForLastShuffler = currentParallelism % basePartitionRange; int finalTaskParallelism = (remainderRangeForLastShuffler > 0) ? (numShufflersWithBaseRange + 1) : (numShufflersWithBaseRange); LOG.info( "Reduce auto parallelism for vertex: " + context.getVertexName() + " to " + finalTaskParallelism + " from " + pendingTasks.size() + " . Expected output: " + expectedTotalSourceTasksOutputSize + " based on actual output: " + completedSourceTasksOutputSize + " from " + numVertexManagerEventsReceived + " vertex manager events. " + " desiredTaskInputSize: " + desiredTaskInputDataSize); if (finalTaskParallelism < currentParallelism) { // final parallelism is less than actual parallelism Map<String, EdgeManagerDescriptor> edgeManagers = new HashMap<String, EdgeManagerDescriptor>(bipartiteSources.size()); for (String vertex : bipartiteSources.keySet()) { // use currentParallelism for numSourceTasks to maintain original state // for the source tasks CustomShuffleEdgeManagerConfig edgeManagerConfig = new CustomShuffleEdgeManagerConfig( currentParallelism, finalTaskParallelism, numSourceTasks, basePartitionRange, ((remainderRangeForLastShuffler > 0) ? remainderRangeForLastShuffler : basePartitionRange)); EdgeManagerDescriptor edgeManagerDescriptor = new EdgeManagerDescriptor(CustomShuffleEdgeManager.class.getName()); edgeManagerDescriptor.setUserPayload(edgeManagerConfig.toUserPayload()); edgeManagers.put(vertex, edgeManagerDescriptor); } context.setVertexParallelism(finalTaskParallelism, null, edgeManagers, null); updatePendingTasks(); } }