public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println( "MRRSleepJob [-m numMapper] [-r numReducer]" + " [-ir numIntermediateReducer]" + " [-irs numIntermediateReducerStages]" + " [-mt mapSleepTime (msec)] [-rt reduceSleepTime (msec)]" + " [-irt intermediateReduceSleepTime]" + " [-recordt recordSleepTime (msec)]" + " [-generateSplitsInAM (false)/true]" + " [-writeSplitsToDfs (false)/true]"); ToolRunner.printGenericCommandUsage(System.err); return 2; } int numMapper = 1, numReducer = 1, numIReducer = 1; long mapSleepTime = 100, reduceSleepTime = 100, recSleepTime = 100, iReduceSleepTime = 1; int mapSleepCount = 1, reduceSleepCount = 1, iReduceSleepCount = 1; int iReduceStagesCount = 1; boolean writeSplitsToDfs = false; boolean generateSplitsInAM = false; boolean splitsOptionFound = false; for (int i = 0; i < args.length; i++) { if (args[i].equals("-m")) { numMapper = Integer.parseInt(args[++i]); } else if (args[i].equals("-r")) { numReducer = Integer.parseInt(args[++i]); } else if (args[i].equals("-ir")) { numIReducer = Integer.parseInt(args[++i]); } else if (args[i].equals("-mt")) { mapSleepTime = Long.parseLong(args[++i]); } else if (args[i].equals("-rt")) { reduceSleepTime = Long.parseLong(args[++i]); } else if (args[i].equals("-irt")) { iReduceSleepTime = Long.parseLong(args[++i]); } else if (args[i].equals("-irs")) { iReduceStagesCount = Integer.parseInt(args[++i]); } else if (args[i].equals("-recordt")) { recSleepTime = Long.parseLong(args[++i]); } else if (args[i].equals("-generateSplitsInAM")) { if (splitsOptionFound) { throw new RuntimeException( "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together"); } splitsOptionFound = true; generateSplitsInAM = Boolean.parseBoolean(args[++i]); } else if (args[i].equals("-writeSplitsToDfs")) { if (splitsOptionFound) { throw new RuntimeException( "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together"); } splitsOptionFound = true; writeSplitsToDfs = Boolean.parseBoolean(args[++i]); } } if (numIReducer > 0 && numReducer <= 0) { throw new RuntimeException("Cannot have intermediate reduces without" + " a final reduce"); } // sleep for *SleepTime duration in Task by recSleepTime per record mapSleepCount = (int) Math.ceil(mapSleepTime / ((double) recSleepTime)); reduceSleepCount = (int) Math.ceil(reduceSleepTime / ((double) recSleepTime)); iReduceSleepCount = (int) Math.ceil(iReduceSleepTime / ((double) recSleepTime)); TezConfiguration conf = new TezConfiguration(getConf()); FileSystem remoteFs = FileSystem.get(conf); conf.set( TezConfiguration.TEZ_AM_STAGING_DIR, conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT)); Path remoteStagingDir = remoteFs.makeQualified( new Path( conf.get( TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT), Long.toString(System.currentTimeMillis()))); TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir); DAG dag = createDAG( remoteFs, conf, remoteStagingDir, numMapper, numReducer, iReduceStagesCount, numIReducer, mapSleepTime, mapSleepCount, reduceSleepTime, reduceSleepCount, iReduceSleepTime, iReduceSleepCount, writeSplitsToDfs, generateSplitsInAM); TezClient tezSession = new TezClient("MRRSleep", conf, false, null, credentials); tezSession.start(); DAGClient dagClient = tezSession.submitDAG(dag); while (true) { DAGStatus status = dagClient.getDAGStatus(null); LOG.info("DAG Status: " + status); if (status.isCompleted()) { break; } try { Thread.sleep(1000); } catch (InterruptedException e) { // do nothing } } tezSession.stop(); return dagClient.getDAGStatus(null).getState().equals(DAGStatus.State.SUCCEEDED) ? 0 : 1; }
@SuppressWarnings("unchecked") @Before public void setUp() throws Exception { utils = mock(DagUtils.class); fs = mock(FileSystem.class); path = mock(Path.class); when(path.getFileSystem(any(Configuration.class))).thenReturn(fs); when(utils.getTezDir(any(Path.class))).thenReturn(path); when(utils.createVertex( any(JobConf.class), any(BaseWork.class), any(Path.class), any(LocalResource.class), any(List.class), any(FileSystem.class), any(Context.class), anyBoolean(), any(TezWork.class), any(VertexType.class))) .thenAnswer( new Answer<Vertex>() { @Override public Vertex answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); return Vertex.create( ((BaseWork) args[1]).getName(), mock(ProcessorDescriptor.class), 0, mock(Resource.class)); } }); when(utils.createEdge( any(JobConf.class), any(Vertex.class), any(Vertex.class), any(TezEdgeProperty.class), any(VertexType.class))) .thenAnswer( new Answer<Edge>() { @Override public Edge answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class)); } }); work = new TezWork(""); mws = new MapWork[] {new MapWork(), new MapWork()}; rws = new ReduceWork[] {new ReduceWork(), new ReduceWork()}; work.addAll(mws); work.addAll(rws); int i = 0; for (BaseWork w : work.getAllWork()) { w.setName("Work " + (++i)); } op = mock(Operator.class); LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>(); map.put("foo", op); mws[0].setAliasToWork(map); mws[1].setAliasToWork(map); LinkedHashMap<String, ArrayList<String>> pathMap = new LinkedHashMap<String, ArrayList<String>>(); ArrayList<String> aliasList = new ArrayList<String>(); aliasList.add("foo"); pathMap.put("foo", aliasList); mws[0].setPathToAliases(pathMap); mws[1].setPathToAliases(pathMap); rws[0].setReducer(op); rws[1].setReducer(op); TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE); work.connect(mws[0], rws[0], edgeProp); work.connect(mws[1], rws[0], edgeProp); work.connect(rws[0], rws[1], edgeProp); task = new TezTask(utils); task.setWork(work); task.setConsole(mock(LogHelper.class)); conf = new JobConf(); appLr = mock(LocalResource.class); SessionState.start(new HiveConf()); session = mock(TezClient.class); sessionState = mock(TezSessionState.class); when(sessionState.getSession()).thenReturn(session); when(session.submitDAG(any(DAG.class))) .thenThrow(new SessionNotRunning("")) .thenReturn(mock(DAGClient.class)); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); // Configure intermediate reduces conf.setInt(MRJobConfig.MRR_INTERMEDIATE_STAGES, 1); // Set reducer class for intermediate reduce conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduce.class"), MyGroupByReducer.class, Reducer.class); // Set reducer output key class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( 1, "mapreduce.map.output.key.class"), IntWritable.class, Object.class); // Set reducer output value class conf.setClass( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage( 1, "mapreduce.map.output.value.class"), Text.class, Object.class); conf.setInt( MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduces"), 2); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: groupbyorderbymrrtest <in> <out>"); System.exit(2); } @SuppressWarnings("deprecation") Job job = new Job(conf, "groupbyorderbymrrtest"); job.setJarByClass(GroupByOrderByMRRTest.class); // Configure map job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Configure reduce job.setReducerClass(MyOrderByNoOpReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); TezClient tezClient = new TezClient(new TezConfiguration(conf)); job.submit(); JobID jobId = job.getJobID(); ApplicationId appId = TypeConverter.toYarn(jobId).getAppId(); DAGClient dagClient = tezClient.getDAGClient(appId); DAGStatus dagStatus = null; while (true) { dagStatus = dagClient.getDAGStatus(); if (dagStatus.getState() == DAGStatus.State.RUNNING || dagStatus.getState() == DAGStatus.State.SUCCEEDED || dagStatus.getState() == DAGStatus.State.FAILED || dagStatus.getState() == DAGStatus.State.KILLED || dagStatus.getState() == DAGStatus.State.ERROR) { break; } try { Thread.sleep(500); } catch (InterruptedException e) { // continue; } } while (dagStatus.getState() == DAGStatus.State.RUNNING) { try { ExampleDriver.printMRRDAGStatus(dagStatus); try { Thread.sleep(1000); } catch (InterruptedException e) { // continue; } dagStatus = dagClient.getDAGStatus(); } catch (TezException e) { LOG.fatal("Failed to get application progress. Exiting"); System.exit(-1); } } ExampleDriver.printMRRDAGStatus(dagStatus); LOG.info("Application completed. " + "FinalState=" + dagStatus.getState()); System.exit(dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1); }