public static void runSortJob(String... args) throws Exception {

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    JobConf job = new JobConf();

    job.setNumReduceTasks(2);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setJarByClass(SampleJob.class);

    output.getFileSystem(job).delete(output, true);

    JobClient jc = new JobClient(job);
    JobClient.setTaskOutputFilter(job, JobClient.TaskStatusFilter.ALL);
    RunningJob rj = jc.submitJob(job);
    try {
      if (!jc.monitorAndPrintJob(job, rj)) {
        System.out.println("Job Failed: " + rj.getFailureInfo());
        throw new IOException("Job failed!");
      }
    } catch (InterruptedException ie) {
      Thread.currentThread().interrupt();
    }
  }
  public static void runJob(String[] args) {
    JobConf conf = new JobConf(CassandraBulkLoader.class);

    if (args.length >= 4) {
      conf.setNumReduceTasks(new Integer(args[3]));
    }

    try {
      // We store the cassandra storage-conf.xml on the HDFS cluster
      DistributedCache.addCacheFile(new URI("/cassandra/storage-conf.xml#storage-conf.xml"), conf);
    } catch (URISyntaxException e) {
      throw new RuntimeException(e);
    }
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("CassandraBulkLoader_v2");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));
    try {
      JobClient.runJob(conf);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
 /** Configure a job given argv. */
 public static boolean parseArgs(String[] argv, JobConf job) throws IOException {
   if (argv.length < 1) {
     return 0 == printUsage();
   }
   for (int i = 0; i < argv.length; ++i) {
     if (argv.length == i + 1) {
       System.out.println("ERROR: Required parameter missing from " + argv[i]);
       return 0 == printUsage();
     }
     try {
       if ("-m".equals(argv[i])) {
         job.setNumMapTasks(Integer.parseInt(argv[++i]));
       } else if ("-r".equals(argv[i])) {
         job.setNumReduceTasks(Integer.parseInt(argv[++i]));
       } else if ("-inFormat".equals(argv[i])) {
         job.setInputFormat(Class.forName(argv[++i]).asSubclass(InputFormat.class));
       } else if ("-outFormat".equals(argv[i])) {
         job.setOutputFormat(Class.forName(argv[++i]).asSubclass(OutputFormat.class));
       } else if ("-outKey".equals(argv[i])) {
         job.setOutputKeyClass(Class.forName(argv[++i]).asSubclass(WritableComparable.class));
       } else if ("-outValue".equals(argv[i])) {
         job.setOutputValueClass(Class.forName(argv[++i]).asSubclass(Writable.class));
       } else if ("-keepmap".equals(argv[i])) {
         job.set(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.MAP_PRESERVE_PERCENT, argv[++i]);
       } else if ("-keepred".equals(argv[i])) {
         job.set(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.REDUCE_PRESERVE_PERCENT,
             argv[++i]);
       } else if ("-outdir".equals(argv[i])) {
         FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
       } else if ("-indir".equals(argv[i])) {
         FileInputFormat.addInputPaths(job, argv[++i]);
       } else if ("-inFormatIndirect".equals(argv[i])) {
         job.setClass(
             org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FORMAT,
             Class.forName(argv[++i]).asSubclass(InputFormat.class),
             InputFormat.class);
         job.setInputFormat(IndirectInputFormat.class);
       } else {
         System.out.println("Unexpected argument: " + argv[i]);
         return 0 == printUsage();
       }
     } catch (NumberFormatException except) {
       System.out.println("ERROR: Integer expected instead of " + argv[i]);
       return 0 == printUsage();
     } catch (Exception e) {
       throw (IOException) new IOException().initCause(e);
     }
   }
   return true;
 }
Exemple #4
0
  private static void runIOTest(Class<? extends Mapper> mapperClass, Path outputDir)
      throws IOException {
    JobConf job = new JobConf(fsConfig, DFSCIOTest.class);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }
Exemple #5
0
  // Job configuration
  private static JobConf createJobConf(Configuration conf) {
    JobConf jobconf = new JobConf(conf, DistCp.class);
    jobconf.setJobName(NAME);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobconf.setMapSpeculativeExecution(false);

    jobconf.setInputFormat(CopyInputFormat.class);
    jobconf.setOutputKeyClass(Text.class);
    jobconf.setOutputValueClass(Text.class);

    jobconf.setMapperClass(CopyFilesMapper.class);
    jobconf.setNumReduceTasks(0);
    return jobconf;
  }
  public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception {

    fs.delete(READ_DIR, true);

    JobConf job = new JobConf(conf, TestFileSystem.class);
    job.setBoolean("fs.test.fastCheck", fastCheck);

    FileInputFormat.setInputPaths(job, CONTROL_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(SeekMapper.class);
    job.setReducerClass(LongSumReducer.class);

    FileOutputFormat.setOutputPath(job, READ_DIR);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }
Exemple #7
0
  public int run(String[] args) throws Exception {
    if (args.length < 4) {
      System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)");
      return JobClient.SUCCESS;
    }
    String input = args[0];
    String output = args[1];
    String type = args[2];
    String splitChar = args[3];

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.split", splitChar);

    config.setJobName("File Filter -" + System.currentTimeMillis());
    config.setNumReduceTasks(10);
    config.setReducerClass(IdentityReducer.class);
    config.setMapperClass(FileTestMapper.class);
    if ("text".equals(type)) {
      config.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(config, new Path(input));
    } else {
      config.setInputFormat(SequenceFileInputFormat.class);
      SequenceFileInputFormat.addInputPath(config, new Path(input));
    }
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(output);
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
  public int run(String[] args) throws Exception {
    if (args.length < 1) {
      args = new String[] {DateStringUtils.now()};
      System.out.println(
          "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now());
    }

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.args", Utils.asString(args));

    config.setJobName(getClass() + "-" + System.currentTimeMillis());
    config.setNumReduceTasks(100);
    config.setMapperClass(getClass());
    config.setReducerClass(getClass());
    config.setInputFormat(getInputFormat());
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    // add input paths
    for (String path : getInputPath(args)) {
      if (TextInputFormat.class.equals(getInputFormat())) {
        TextInputFormat.addInputPath(config, new Path(path));
      } else if (SequenceFileInputFormat.class.equals(getInputFormat())) {
        SequenceFileInputFormat.addInputPath(config, new Path(path));
      }
    }

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(getOutputPath(args));
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
  public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), XiangLi1_exercise3.class);
    conf.setJobName("xiangli1_exercise3");
    conf.setNumReduceTasks(0);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    // conf.setCombinerClass(Reduce.class);
    // conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
    return 0;
  }
  public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(Main.class);
    conf.setJobName("feels-analysis");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(TheOutputClass.class);
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setNumReduceTasks(1);
    conf.setInputFormat(CSVTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    // TODO: determine whether we need extra output
    MultipleOutputs.addMultiNamedOutput(
        conf, SECOND_OUTPUT, TextOutputFormat.class, Text.class, TheOutputClass.class);
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
  }
Exemple #11
0
  /**
   * Validates map phase progress after each record is processed by map task using custom task
   * reporter.
   */
  public void testMapProgress() throws Exception {
    JobConf job = new JobConf();
    fs = FileSystem.getLocal(job);
    Path rootDir = new Path(TEST_ROOT_DIR);
    createInputFile(rootDir);

    job.setNumReduceTasks(0);
    TaskAttemptID taskId = TaskAttemptID.forName("attempt_200907082313_0424_m_000000_0");
    job.setClass("mapreduce.job.outputformat.class", NullOutputFormat.class, OutputFormat.class);
    job.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, TEST_ROOT_DIR);
    jobId = taskId.getJobID();

    JobContext jContext = new JobContextImpl(job, jobId);
    InputFormat<?, ?> input = ReflectionUtils.newInstance(jContext.getInputFormatClass(), job);

    List<InputSplit> splits = input.getSplits(jContext);
    JobSplitWriter.createSplitFiles(
        new Path(TEST_ROOT_DIR), job, new Path(TEST_ROOT_DIR).getFileSystem(job), splits);
    TaskSplitMetaInfo[] splitMetaInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, fs, job, new Path(TEST_ROOT_DIR));
    job.setUseNewMapper(true); // use new api
    for (int i = 0; i < splitMetaInfo.length; i++) { // rawSplits.length is 1
      map =
          new TestMapTask(
              job.get(JTConfig.JT_SYSTEM_DIR, "/tmp/hadoop/mapred/system") + jobId + "job.xml",
              taskId,
              i,
              splitMetaInfo[i].getSplitIndex(),
              1);

      JobConf localConf = new JobConf(job);
      map.localizeConfiguration(localConf);
      map.setConf(localConf);
      map.run(localConf, fakeUmbilical);
    }
    // clean up
    fs.delete(rootDir, true);
  }
  public void testLostTracker() throws IOException {
    // Tracker 0 contacts JT
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);

    TaskAttemptID[] tid = new TaskAttemptID[2];
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);
    FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
    job.initTasks();

    // Tracker 0 gets the map task
    tid[0] = job.findMapTask(trackers[0]);

    job.finishTask(tid[0]);

    // Advance clock. Tracker 0 would have got lost
    clock.advance(8 * 1000);

    jobTracker.checkExpiredTrackers();

    // Tracker 1 establishes contact with JT
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);

    // Tracker1 should get assigned the lost map task
    tid[1] = job.findMapTask(trackers[1]);

    assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]);

    assertEquals(
        "Task ID of reassigned map task does not match",
        tid[0].getTaskID().toString(),
        tid[1].getTaskID().toString());

    job.finishTask(tid[1]);
  }
  /** Test whether the tracker gets lost after its blacklisted. */
  public void testLostTrackerAfterBlacklisting() throws Exception {
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);
    clock.advance(600);
    TaskAttemptID[] tid = new TaskAttemptID[2];
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);
    conf.set(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, "1");
    conf.set(MRJobConfig.SETUP_CLEANUP_NEEDED, "false");
    FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
    job.initTasks();
    job.setClusterSize(4);

    // check if the tracker count is correct
    assertEquals("Active tracker count mismatch", 1, jobTracker.taskTrackers().size());

    // Tracker 0 gets the map task
    tid[0] = job.findMapTask(trackers[0]);
    // Fail the task
    job.failTask(tid[0]);

    // Tracker 1 establishes contact with JT
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);
    // check if the tracker count is correct
    assertEquals("Active tracker count mismatch", 2, jobTracker.taskTrackers().size());

    // Tracker 1 gets the map task
    tid[1] = job.findMapTask(trackers[1]);
    // Finish the task and also the job
    job.finishTask(tid[1]);

    // check if job is successful
    assertEquals("Job not successful", JobStatus.SUCCEEDED, job.getStatus().getRunState());

    // check if the trackers 1 got blacklisted
    assertTrue(
        "Tracker 0 not blacklisted",
        jobTracker.getBlacklistedTrackers()[0].getTaskTrackerName().equals(trackers[0]));
    // check if the tracker count is correct
    assertEquals("Active tracker count mismatch", 2, jobTracker.taskTrackers().size());
    // validate blacklisted count
    assertEquals(
        "Blacklisted tracker count mismatch",
        1,
        jobTracker.getClusterStatus(false).getBlacklistedTrackers());

    // Advance clock. Tracker 0 should be lost
    clock.advance(500);
    jobTracker.checkExpiredTrackers();

    // check if the task tracker is lost
    assertFalse(
        "Tracker 0 not lost",
        jobTracker.getClusterStatus(false).getActiveTrackerNames().contains(trackers[0]));

    // check if the lost tracker has removed from the jobtracker
    assertEquals("Active tracker count mismatch", 1, jobTracker.taskTrackers().size());
    // validate blacklisted count
    assertEquals(
        "Blacklisted tracker count mismatch",
        0,
        jobTracker.getClusterStatus(false).getBlacklistedTrackers());
  }
    @SuppressWarnings("unchecked")
    @Override
    public void run() {
      JobID jobId = profile.getJobID();
      JobContext jContext = new JobContextImpl(conf, jobId);
      org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = null;
      try {
        outputCommitter = createOutputCommitter(conf.getUseNewMapper(), jobId, conf);
      } catch (Exception e) {
        LOG.info("Failed to createOutputCommitter", e);
        return;
      }

      try {
        TaskSplitMetaInfo[] taskSplitMetaInfos =
            SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir);
        int numReduceTasks = job.getNumReduceTasks();
        if (numReduceTasks > 1 || numReduceTasks < 0) {
          // we only allow 0 or 1 reducer in local mode
          numReduceTasks = 1;
          job.setNumReduceTasks(1);
        }
        outputCommitter.setupJob(jContext);
        status.setSetupProgress(1.0f);

        Map<TaskAttemptID, MapOutputFile> mapOutputFiles =
            Collections.synchronizedMap(new HashMap<TaskAttemptID, MapOutputFile>());

        List<MapTaskRunnable> taskRunnables =
            getMapTaskRunnables(taskSplitMetaInfos, jobId, mapOutputFiles);

        ExecutorService mapService = createMapExecutor(taskRunnables.size());
        // Start populating the executor with work units.
        // They may begin running immediately (in other threads).
        for (Runnable r : taskRunnables) {
          mapService.submit(r);
        }

        try {
          mapService.shutdown(); // Instructs queue to drain.

          // Wait for tasks to finish; do not use a time-based timeout.
          // (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024)
          LOG.info("Waiting for map tasks");
          mapService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        } catch (InterruptedException ie) {
          // Cancel all threads.
          mapService.shutdownNow();
          throw ie;
        }

        LOG.info("Map task executor complete.");

        // After waiting for the map tasks to complete, if any of these
        // have thrown an exception, rethrow it now in the main thread context.
        for (MapTaskRunnable r : taskRunnables) {
          if (r.storedException != null) {
            throw new Exception(r.storedException);
          }
        }

        TaskAttemptID reduceId = new TaskAttemptID(new TaskID(jobId, false, 0), 0);
        try {
          if (numReduceTasks > 0) {
            ReduceTask reduce =
                new ReduceTask(systemJobFile.toString(), reduceId, 0, mapIds.size(), 1);
            reduce.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
            JobConf localConf = new JobConf(job);
            localConf.set("mapreduce.jobtracker.address", "local");
            TaskRunner.setupChildMapredLocalDirs(reduce, localConf);
            // move map output to reduce input
            for (int i = 0; i < mapIds.size(); i++) {
              if (!this.isInterrupted()) {
                TaskAttemptID mapId = mapIds.get(i);
                Path mapOut = mapOutputFiles.get(mapId).getOutputFile();
                MapOutputFile localOutputFile = new MapOutputFile();
                localOutputFile.setConf(localConf);
                Path reduceIn =
                    localOutputFile.getInputFileForWrite(
                        mapId.getTaskID(), localFs.getFileStatus(mapOut).getLen());
                if (!localFs.mkdirs(reduceIn.getParent())) {
                  throw new IOException(
                      "Mkdirs failed to create " + reduceIn.getParent().toString());
                }
                if (!localFs.rename(mapOut, reduceIn))
                  throw new IOException("Couldn't rename " + mapOut);
              } else {
                throw new InterruptedException();
              }
            }
            if (!this.isInterrupted()) {
              reduce.setJobFile(localJobFile.toString());
              localConf.setUser(reduce.getUser());
              reduce.localizeConfiguration(localConf);
              reduce.setConf(localConf);
              reduce_tasks += 1;
              myMetrics.launchReduce(reduce.getTaskID());
              reduce.run(localConf, this);
              myMetrics.completeReduce(reduce.getTaskID());
              reduce_tasks -= 1;
            } else {
              throw new InterruptedException();
            }
          }
        } finally {
          for (MapOutputFile output : mapOutputFiles.values()) {
            output.removeAll();
          }
        }
        // delete the temporary directory in output directory
        outputCommitter.commitJob(jContext);
        status.setCleanupProgress(1.0f);

        if (killed) {
          this.status.setRunState(JobStatus.KILLED);
        } else {
          this.status.setRunState(JobStatus.SUCCEEDED);
        }

        JobEndNotifier.localRunnerNotification(job, status);

      } catch (Throwable t) {
        try {
          outputCommitter.abortJob(jContext, org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
        } catch (IOException ioe) {
          LOG.info("Error cleaning up job:" + id);
        }
        status.setCleanupProgress(1.0f);
        if (killed) {
          this.status.setRunState(JobStatus.KILLED);
        } else {
          this.status.setRunState(JobStatus.FAILED);
        }
        LOG.warn(id, t);

        JobEndNotifier.localRunnerNotification(job, status);

      } finally {
        try {
          fs.delete(systemJobFile.getParent(), true); // delete submit dir
          localFs.delete(localJobFile, true); // delete local copy
          // Cleanup distributed cache
          taskDistributedCacheManager.release();
          trackerDistributedCacheManager.purgeCache();
        } catch (IOException e) {
          LOG.warn("Error cleaning up " + id + ": " + e);
        }
      }
    }
Exemple #15
0
  /**
   * The main driver for word count map/reduce program. Invoke this method to submit the map/reduce
   * job.
   *
   * @throws IOException When there is communication problems with the job tracker.
   */
  public void testOldJobWithMapAndReducers() throws Exception {
    JobConf conf = new JobConf(TestJobCounters.class);
    conf.setJobName("wordcount-map-reducers");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(WordCount.MapClass.class);
    conf.setCombinerClass(WordCount.Reduce.class);
    conf.setReducerClass(WordCount.Reduce.class);

    conf.setNumMapTasks(3);
    conf.setNumReduceTasks(1);
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 2);
    conf.set("io.sort.record.percent", "0.05");
    conf.set("io.sort.spill.percent", "0.80");

    FileSystem fs = FileSystem.get(conf);
    Path testDir = new Path(TEST_ROOT_DIR, "countertest");
    conf.set("test.build.data", testDir.toString());
    try {
      if (fs.exists(testDir)) {
        fs.delete(testDir, true);
      }
      if (!fs.mkdirs(testDir)) {
        throw new IOException("Mkdirs failed to create " + testDir.toString());
      }

      String inDir = testDir + File.separator + "genins" + File.separator;
      String outDir = testDir + File.separator;
      Path wordsIns = new Path(inDir);
      if (!fs.mkdirs(wordsIns)) {
        throw new IOException("Mkdirs failed to create " + wordsIns.toString());
      }

      long inputSize = 0;
      // create 3 input files each with 5*2k words
      File inpFile = new File(inDir + "input5_2k_1");
      createWordsFile(inpFile);
      inputSize += inpFile.length();
      inpFile = new File(inDir + "input5_2k_2");
      createWordsFile(inpFile);
      inputSize += inpFile.length();
      inpFile = new File(inDir + "input5_2k_3");
      createWordsFile(inpFile);
      inputSize += inpFile.length();

      FileInputFormat.setInputPaths(conf, inDir);
      Path outputPath1 = new Path(outDir, "output5_2k_3");
      FileOutputFormat.setOutputPath(conf, outputPath1);

      RunningJob myJob = JobClient.runJob(conf);
      Counters c1 = myJob.getCounters();
      // 3maps & in each map, 4 first level spills --- So total 12.
      // spilled records count:
      // Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
      //           3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
      //           So total 8k+8k+2k=18k
      // For 3 Maps, total = 3*18=54k
      // Reduce: each of the 3 map o/p's(2k each) will be spilled in shuffleToDisk()
      //         So 3*2k=6k in 1st level; 2nd level:4k(2k+2k);
      //         3rd level directly given to reduce(4k+2k --- combineAndSpill => 2k.
      //         So 0 records spilled to disk in 3rd level)
      //         So total of 6k+4k=10k
      // Total job counter will be 54k+10k = 64k

      // 3 maps and 2.5k lines --- So total 7.5k map input records
      // 3 maps and 10k words in each --- So total of 30k map output recs
      validateMapredCounters(c1, 64000, 7500, 30000);
      validateMapredFileCounters(c1, inputSize, inputSize, 0, 0, 0);

      // create 4th input file each with 5*2k words and test with 4 maps
      inpFile = new File(inDir + "input5_2k_4");
      createWordsFile(inpFile);
      inputSize += inpFile.length();
      conf.setNumMapTasks(4);
      Path outputPath2 = new Path(outDir, "output5_2k_4");
      FileOutputFormat.setOutputPath(conf, outputPath2);

      myJob = JobClient.runJob(conf);
      c1 = myJob.getCounters();
      // 4maps & in each map 4 first level spills --- So total 16.
      // spilled records count:
      // Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
      //           3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
      //           So total 8k+8k+2k=18k
      // For 3 Maps, total = 4*18=72k
      // Reduce: each of the 4 map o/p's(2k each) will be spilled in shuffleToDisk()
      //         So 4*2k=8k in 1st level; 2nd level:4k+4k=8k;
      //         3rd level directly given to reduce(4k+4k --- combineAndSpill => 2k.
      //         So 0 records spilled to disk in 3rd level)
      //         So total of 8k+8k=16k
      // Total job counter will be 72k+16k = 88k

      // 4 maps and 2.5k words in each --- So 10k map input records
      // 4 maps and 10k unique words --- So 40k map output records
      validateMapredCounters(c1, 88000, 10000, 40000);
      validateMapredFileCounters(c1, inputSize, inputSize, 0, 0, 0);

      // check for a map only job
      conf.setNumReduceTasks(0);
      Path outputPath3 = new Path(outDir, "output5_2k_5");
      FileOutputFormat.setOutputPath(conf, outputPath3);

      myJob = JobClient.runJob(conf);
      c1 = myJob.getCounters();
      // 4 maps and 2.5k words in each --- So 10k map input records
      // 4 maps and 10k unique words --- So 40k map output records
      validateMapredCounters(c1, 0, 10000, 40000);
      validateMapredFileCounters(c1, inputSize, inputSize, 0, -1, -1);
    } finally {
      // clean up the input and output files
      if (fs.exists(testDir)) {
        fs.delete(testDir, true);
      }
    }
  }
Exemple #16
0
    static void checkRecords(
        Configuration defaults, int noMaps, int noReduces, Path sortInput, Path sortOutput)
        throws IOException {
      JobConf jobConf = new JobConf(defaults, RecordChecker.class);
      jobConf.setJobName("sortvalidate-record-checker");

      jobConf.setInputFormat(SequenceFileInputFormat.class);
      jobConf.setOutputFormat(SequenceFileOutputFormat.class);

      jobConf.setOutputKeyClass(BytesWritable.class);
      jobConf.setOutputValueClass(IntWritable.class);

      jobConf.setMapperClass(Map.class);
      jobConf.setReducerClass(Reduce.class);

      JobClient client = new JobClient(jobConf);
      ClusterStatus cluster = client.getClusterStatus();
      if (noMaps == -1) {
        noMaps = cluster.getTaskTrackers() * jobConf.getInt("test.sortvalidate.maps_per_host", 10);
      }
      if (noReduces == -1) {
        noReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sortvalidate.reduces_per_host");
        if (sortReduces != null) {
          noReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }
      }
      jobConf.setNumMapTasks(noMaps);
      jobConf.setNumReduceTasks(noReduces);

      FileInputFormat.setInputPaths(jobConf, sortInput);
      FileInputFormat.addInputPath(jobConf, sortOutput);
      Path outputPath = new Path("/tmp/sortvalidate/recordchecker");
      FileSystem fs = FileSystem.get(defaults);
      if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
      }
      FileOutputFormat.setOutputPath(jobConf, outputPath);

      // Uncomment to run locally in a single process
      // job_conf.set("mapred.job.tracker", "local");
      Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
      System.out.println(
          "\nSortValidator.RecordChecker: Running on "
              + cluster.getTaskTrackers()
              + " nodes to validate sort from "
              + inputPaths[0]
              + ", "
              + inputPaths[1]
              + " into "
              + FileOutputFormat.getOutputPath(jobConf)
              + " with "
              + noReduces
              + " reduces.");
      Date startTime = new Date();
      System.out.println("Job started: " + startTime);
      JobClient.runJob(jobConf);
      Date end_time = new Date();
      System.out.println("Job ended: " + end_time);
      System.out.println(
          "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    }
  /** Check refreshNodes for decommissioning blacklisted nodes. */
  public void testBlacklistedNodeDecommissioning() throws Exception {
    LOG.info("Testing blacklisted node decommissioning");
    MiniMRCluster mr = null;
    JobTracker jt = null;

    try {
      // start mini mr
      JobConf jtConf = new JobConf();
      jtConf.set("mapred.max.tracker.blacklists", "1");
      mr = new MiniMRCluster(0, 0, 2, "file:///", 1, null, null, null, jtConf);
      jt = mr.getJobTrackerRunner().getJobTracker();

      assertEquals("Trackers not up", 2, jt.taskTrackers().size());
      // validate the total tracker count
      assertEquals(
          "Active tracker count mismatch", 2, jt.getClusterStatus(false).getTaskTrackers());
      // validate blacklisted count
      assertEquals(
          "Blacklisted tracker count mismatch",
          0,
          jt.getClusterStatus(false).getBlacklistedTrackers());

      // run a failing job to blacklist the tracker
      JobConf jConf = mr.createJobConf();
      jConf.set("mapred.max.tracker.failures", "1");
      jConf.setJobName("test-job-fail-once");
      jConf.setMapperClass(FailOnceMapper.class);
      jConf.setReducerClass(IdentityReducer.class);
      jConf.setNumMapTasks(1);
      jConf.setNumReduceTasks(0);

      RunningJob job =
          UtilsForTests.runJob(jConf, new Path(TEST_DIR, "in"), new Path(TEST_DIR, "out"));
      job.waitForCompletion();

      // validate the total tracker count
      assertEquals(
          "Active tracker count mismatch", 1, jt.getClusterStatus(false).getTaskTrackers());
      // validate blacklisted count
      assertEquals(
          "Blacklisted tracker count mismatch",
          1,
          jt.getClusterStatus(false).getBlacklistedTrackers());

      // find the blacklisted tracker
      String trackerName = null;
      for (TaskTrackerStatus status : jt.taskTrackers()) {
        if (jt.isBlacklisted(status.getTrackerName())) {
          trackerName = status.getTrackerName();
          break;
        }
      }
      // get the hostname
      String hostToDecommission = JobInProgress.convertTrackerNameToHostName(trackerName);
      LOG.info("Decommissioning tracker " + hostToDecommission);

      // decommission the node
      HashSet<String> decom = new HashSet<String>(1);
      decom.add(hostToDecommission);
      jt.decommissionNodes(decom);

      // validate
      // check the cluster status and tracker size
      assertEquals(
          "Tracker is not lost upon host decommissioning",
          1,
          jt.getClusterStatus(false).getTaskTrackers());
      assertEquals(
          "Blacklisted tracker count incorrect in cluster status " + "after decommissioning",
          0,
          jt.getClusterStatus(false).getBlacklistedTrackers());
      assertEquals("Tracker is not lost upon host decommissioning", 1, jt.taskTrackers().size());
    } finally {
      if (mr != null) {
        mr.shutdown();
        mr = null;
        jt = null;
        FileUtil.fullyDelete(new File(TEST_DIR.toString()));
      }
    }
  }
  /** Test whether the tracker gets blacklisted after its lost. */
  public void testLostTrackerBeforeBlacklisting() throws Exception {
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[0]);
    TaskAttemptID[] tid = new TaskAttemptID[3];
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(1);
    conf.set(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, "1");
    conf.set(MRJobConfig.SETUP_CLEANUP_NEEDED, "false");
    FakeJobInProgress job = new FakeJobInProgress(conf, jobTracker);
    job.initTasks();
    job.setClusterSize(4);

    // Tracker 0 gets the map task
    tid[0] = job.findMapTask(trackers[0]);

    job.finishTask(tid[0]);

    // validate the total tracker count
    assertEquals(
        "Active tracker count mismatch", 1, jobTracker.getClusterStatus(false).getTaskTrackers());

    // lose the tracker
    clock.advance(1100);
    jobTracker.checkExpiredTrackers();
    assertFalse(
        "Tracker 0 not lost",
        jobTracker.getClusterStatus(false).getActiveTrackerNames().contains(trackers[0]));

    // validate the total tracker count
    assertEquals(
        "Active tracker count mismatch", 0, jobTracker.getClusterStatus(false).getTaskTrackers());

    // Tracker 1 establishes contact with JT
    FakeObjectUtilities.establishFirstContact(jobTracker, trackers[1]);

    // Tracker1 should get assigned the lost map task
    tid[1] = job.findMapTask(trackers[1]);

    assertNotNull("Map Task from Lost Tracker did not get reassigned", tid[1]);

    assertEquals(
        "Task ID of reassigned map task does not match",
        tid[0].getTaskID().toString(),
        tid[1].getTaskID().toString());

    // finish the map task
    job.finishTask(tid[1]);

    // finish the reduce task
    tid[2] = job.findReduceTask(trackers[1]);
    job.finishTask(tid[2]);

    // check if job is successful
    assertEquals("Job not successful", JobStatus.SUCCEEDED, job.getStatus().getRunState());

    // check if the tracker is lost
    // validate the total tracker count
    assertEquals(
        "Active tracker count mismatch", 1, jobTracker.getClusterStatus(false).getTaskTrackers());
    // validate blacklisted count .. since we lost one blacklisted tracker
    assertEquals(
        "Blacklisted tracker count mismatch",
        0,
        jobTracker.getClusterStatus(false).getBlacklistedTrackers());
  }
Exemple #19
0
    static void checkRecords(Configuration defaults, Path sortInput, Path sortOutput)
        throws IOException {
      FileSystem inputfs = sortInput.getFileSystem(defaults);
      FileSystem outputfs = sortOutput.getFileSystem(defaults);
      FileSystem defaultfs = FileSystem.get(defaults);
      JobConf jobConf = new JobConf(defaults, RecordStatsChecker.class);
      jobConf.setJobName("sortvalidate-recordstats-checker");

      int noSortReduceTasks = outputfs.listStatus(sortOutput, sortPathsFilter).length;
      jobConf.setInt("sortvalidate.sort.reduce.tasks", noSortReduceTasks);
      int noSortInputpaths = inputfs.listStatus(sortInput).length;

      jobConf.setInputFormat(NonSplitableSequenceFileInputFormat.class);
      jobConf.setOutputFormat(SequenceFileOutputFormat.class);

      jobConf.setOutputKeyClass(IntWritable.class);
      jobConf.setOutputValueClass(RecordStatsChecker.RecordStatsWritable.class);

      jobConf.setMapperClass(Map.class);
      jobConf.setCombinerClass(Reduce.class);
      jobConf.setReducerClass(Reduce.class);

      jobConf.setNumMapTasks(noSortReduceTasks);
      jobConf.setNumReduceTasks(1);

      FileInputFormat.setInputPaths(jobConf, sortInput);
      FileInputFormat.addInputPath(jobConf, sortOutput);
      Path outputPath = new Path("/tmp/sortvalidate/recordstatschecker");
      if (defaultfs.exists(outputPath)) {
        defaultfs.delete(outputPath, true);
      }
      FileOutputFormat.setOutputPath(jobConf, outputPath);

      // Uncomment to run locally in a single process
      // job_conf.set("mapred.job.tracker", "local");
      Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
      System.out.println(
          "\nSortValidator.RecordStatsChecker: Validate sort "
              + "from "
              + inputPaths[0]
              + " ("
              + noSortInputpaths
              + " files), "
              + inputPaths[1]
              + " ("
              + noSortReduceTasks
              + " files) into "
              + FileOutputFormat.getOutputPath(jobConf)
              + " with 1 reducer.");
      Date startTime = new Date();
      System.out.println("Job started: " + startTime);
      JobClient.runJob(jobConf);
      Date end_time = new Date();
      System.out.println("Job ended: " + end_time);
      System.out.println(
          "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");

      // Check to ensure that the statistics of the
      // framework's sort-input and sort-output match
      SequenceFile.Reader stats =
          new SequenceFile.Reader(defaultfs, new Path(outputPath, "part-00000"), defaults);
      IntWritable k1 = new IntWritable();
      IntWritable k2 = new IntWritable();
      RecordStatsWritable v1 = new RecordStatsWritable();
      RecordStatsWritable v2 = new RecordStatsWritable();
      if (!stats.next(k1, v1)) {
        throw new IOException("Failed to read record #1 from reduce's output");
      }
      if (!stats.next(k2, v2)) {
        throw new IOException("Failed to read record #2 from reduce's output");
      }

      if ((v1.getBytes() != v2.getBytes())
          || (v1.getRecords() != v2.getRecords())
          || v1.getChecksum() != v2.getChecksum()) {
        throw new IOException(
            "("
                + v1.getBytes()
                + ", "
                + v1.getRecords()
                + ", "
                + v1.getChecksum()
                + ") v/s ("
                + v2.getBytes()
                + ", "
                + v2.getRecords()
                + ", "
                + v2.getChecksum()
                + ")");
      }
    }