public int run(String[] args) throws Exception {

    if (args.length < 1) {
      System.err.println(
          "MRRSleepJob [-m numMapper] [-r numReducer]"
              + " [-ir numIntermediateReducer]"
              + " [-irs numIntermediateReducerStages]"
              + " [-mt mapSleepTime (msec)] [-rt reduceSleepTime (msec)]"
              + " [-irt intermediateReduceSleepTime]"
              + " [-recordt recordSleepTime (msec)]"
              + " [-generateSplitsInAM (false)/true]"
              + " [-writeSplitsToDfs (false)/true]");
      ToolRunner.printGenericCommandUsage(System.err);
      return 2;
    }

    int numMapper = 1, numReducer = 1, numIReducer = 1;
    long mapSleepTime = 100, reduceSleepTime = 100, recSleepTime = 100, iReduceSleepTime = 1;
    int mapSleepCount = 1, reduceSleepCount = 1, iReduceSleepCount = 1;
    int iReduceStagesCount = 1;
    boolean writeSplitsToDfs = false;
    boolean generateSplitsInAM = false;
    boolean splitsOptionFound = false;

    for (int i = 0; i < args.length; i++) {
      if (args[i].equals("-m")) {
        numMapper = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-r")) {
        numReducer = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-ir")) {
        numIReducer = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-mt")) {
        mapSleepTime = Long.parseLong(args[++i]);
      } else if (args[i].equals("-rt")) {
        reduceSleepTime = Long.parseLong(args[++i]);
      } else if (args[i].equals("-irt")) {
        iReduceSleepTime = Long.parseLong(args[++i]);
      } else if (args[i].equals("-irs")) {
        iReduceStagesCount = Integer.parseInt(args[++i]);
      } else if (args[i].equals("-recordt")) {
        recSleepTime = Long.parseLong(args[++i]);
      } else if (args[i].equals("-generateSplitsInAM")) {
        if (splitsOptionFound) {
          throw new RuntimeException(
              "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
        }
        splitsOptionFound = true;
        generateSplitsInAM = Boolean.parseBoolean(args[++i]);

      } else if (args[i].equals("-writeSplitsToDfs")) {
        if (splitsOptionFound) {
          throw new RuntimeException(
              "Cannot use both -generateSplitsInAm and -writeSplitsToDfs together");
        }
        splitsOptionFound = true;
        writeSplitsToDfs = Boolean.parseBoolean(args[++i]);
      }
    }

    if (numIReducer > 0 && numReducer <= 0) {
      throw new RuntimeException("Cannot have intermediate reduces without" + " a final reduce");
    }

    // sleep for *SleepTime duration in Task by recSleepTime per record
    mapSleepCount = (int) Math.ceil(mapSleepTime / ((double) recSleepTime));
    reduceSleepCount = (int) Math.ceil(reduceSleepTime / ((double) recSleepTime));
    iReduceSleepCount = (int) Math.ceil(iReduceSleepTime / ((double) recSleepTime));

    TezConfiguration conf = new TezConfiguration(getConf());
    FileSystem remoteFs = FileSystem.get(conf);

    conf.set(
        TezConfiguration.TEZ_AM_STAGING_DIR,
        conf.get(TezConfiguration.TEZ_AM_STAGING_DIR, TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT));

    Path remoteStagingDir =
        remoteFs.makeQualified(
            new Path(
                conf.get(
                    TezConfiguration.TEZ_AM_STAGING_DIR,
                    TezConfiguration.TEZ_AM_STAGING_DIR_DEFAULT),
                Long.toString(System.currentTimeMillis())));
    TezClientUtils.ensureStagingDirExists(conf, remoteStagingDir);

    DAG dag =
        createDAG(
            remoteFs,
            conf,
            remoteStagingDir,
            numMapper,
            numReducer,
            iReduceStagesCount,
            numIReducer,
            mapSleepTime,
            mapSleepCount,
            reduceSleepTime,
            reduceSleepCount,
            iReduceSleepTime,
            iReduceSleepCount,
            writeSplitsToDfs,
            generateSplitsInAM);

    TezClient tezSession = new TezClient("MRRSleep", conf, false, null, credentials);
    tezSession.start();
    DAGClient dagClient = tezSession.submitDAG(dag);

    while (true) {
      DAGStatus status = dagClient.getDAGStatus(null);
      LOG.info("DAG Status: " + status);
      if (status.isCompleted()) {
        break;
      }
      try {
        Thread.sleep(1000);
      } catch (InterruptedException e) {
        // do nothing
      }
    }
    tezSession.stop();

    return dagClient.getDAGStatus(null).getState().equals(DAGStatus.State.SUCCEEDED) ? 0 : 1;
  }
Exemple #2
0
  @SuppressWarnings("unchecked")
  @Before
  public void setUp() throws Exception {
    utils = mock(DagUtils.class);
    fs = mock(FileSystem.class);
    path = mock(Path.class);
    when(path.getFileSystem(any(Configuration.class))).thenReturn(fs);
    when(utils.getTezDir(any(Path.class))).thenReturn(path);
    when(utils.createVertex(
            any(JobConf.class),
            any(BaseWork.class),
            any(Path.class),
            any(LocalResource.class),
            any(List.class),
            any(FileSystem.class),
            any(Context.class),
            anyBoolean(),
            any(TezWork.class),
            any(VertexType.class)))
        .thenAnswer(
            new Answer<Vertex>() {

              @Override
              public Vertex answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                return Vertex.create(
                    ((BaseWork) args[1]).getName(),
                    mock(ProcessorDescriptor.class),
                    0,
                    mock(Resource.class));
              }
            });

    when(utils.createEdge(
            any(JobConf.class),
            any(Vertex.class),
            any(Vertex.class),
            any(TezEdgeProperty.class),
            any(VertexType.class)))
        .thenAnswer(
            new Answer<Edge>() {

              @Override
              public Edge answer(InvocationOnMock invocation) throws Throwable {
                Object[] args = invocation.getArguments();
                return Edge.create((Vertex) args[1], (Vertex) args[2], mock(EdgeProperty.class));
              }
            });

    work = new TezWork("");

    mws = new MapWork[] {new MapWork(), new MapWork()};
    rws = new ReduceWork[] {new ReduceWork(), new ReduceWork()};

    work.addAll(mws);
    work.addAll(rws);

    int i = 0;
    for (BaseWork w : work.getAllWork()) {
      w.setName("Work " + (++i));
    }

    op = mock(Operator.class);

    LinkedHashMap<String, Operator<? extends OperatorDesc>> map =
        new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    map.put("foo", op);
    mws[0].setAliasToWork(map);
    mws[1].setAliasToWork(map);

    LinkedHashMap<String, ArrayList<String>> pathMap =
        new LinkedHashMap<String, ArrayList<String>>();
    ArrayList<String> aliasList = new ArrayList<String>();
    aliasList.add("foo");
    pathMap.put("foo", aliasList);

    mws[0].setPathToAliases(pathMap);
    mws[1].setPathToAliases(pathMap);

    rws[0].setReducer(op);
    rws[1].setReducer(op);

    TezEdgeProperty edgeProp = new TezEdgeProperty(EdgeType.SIMPLE_EDGE);
    work.connect(mws[0], rws[0], edgeProp);
    work.connect(mws[1], rws[0], edgeProp);
    work.connect(rws[0], rws[1], edgeProp);

    task = new TezTask(utils);
    task.setWork(work);
    task.setConsole(mock(LogHelper.class));

    conf = new JobConf();
    appLr = mock(LocalResource.class);

    SessionState.start(new HiveConf());
    session = mock(TezClient.class);
    sessionState = mock(TezSessionState.class);
    when(sessionState.getSession()).thenReturn(session);
    when(session.submitDAG(any(DAG.class)))
        .thenThrow(new SessionNotRunning(""))
        .thenReturn(mock(DAGClient.class));
  }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // Configure intermediate reduces
    conf.setInt(MRJobConfig.MRR_INTERMEDIATE_STAGES, 1);

    // Set reducer class for intermediate reduce
    conf.setClass(
        MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduce.class"),
        MyGroupByReducer.class,
        Reducer.class);
    // Set reducer output key class
    conf.setClass(
        MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(
            1, "mapreduce.map.output.key.class"),
        IntWritable.class,
        Object.class);
    // Set reducer output value class
    conf.setClass(
        MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(
            1, "mapreduce.map.output.value.class"),
        Text.class,
        Object.class);
    conf.setInt(
        MultiStageMRConfigUtil.getPropertyNameForIntermediateStage(1, "mapreduce.job.reduces"), 2);

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: groupbyorderbymrrtest <in> <out>");
      System.exit(2);
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "groupbyorderbymrrtest");

    job.setJarByClass(GroupByOrderByMRRTest.class);

    // Configure map
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Configure reduce
    job.setReducerClass(MyOrderByNoOpReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    TezClient tezClient = new TezClient(new TezConfiguration(conf));

    job.submit();
    JobID jobId = job.getJobID();
    ApplicationId appId = TypeConverter.toYarn(jobId).getAppId();

    DAGClient dagClient = tezClient.getDAGClient(appId);
    DAGStatus dagStatus = null;
    while (true) {
      dagStatus = dagClient.getDAGStatus();
      if (dagStatus.getState() == DAGStatus.State.RUNNING
          || dagStatus.getState() == DAGStatus.State.SUCCEEDED
          || dagStatus.getState() == DAGStatus.State.FAILED
          || dagStatus.getState() == DAGStatus.State.KILLED
          || dagStatus.getState() == DAGStatus.State.ERROR) {
        break;
      }
      try {
        Thread.sleep(500);
      } catch (InterruptedException e) {
        // continue;
      }
    }

    while (dagStatus.getState() == DAGStatus.State.RUNNING) {
      try {
        ExampleDriver.printMRRDAGStatus(dagStatus);
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
          // continue;
        }
        dagStatus = dagClient.getDAGStatus();
      } catch (TezException e) {
        LOG.fatal("Failed to get application progress. Exiting");
        System.exit(-1);
      }
    }

    ExampleDriver.printMRRDAGStatus(dagStatus);
    LOG.info("Application completed. " + "FinalState=" + dagStatus.getState());
    System.exit(dagStatus.getState() == DAGStatus.State.SUCCEEDED ? 0 : 1);
  }