Пример #1
0
  public static void getData(CloudataConf conf, Path keyPath) throws IOException {
    JobConf jobConf = new JobConf(TeraReadJob.class);
    jobConf.set("user.name", conf.getUserId());
    String libDir = CloudataMapReduceUtil.initMapReduce(jobConf);

    Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis());

    jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")");

    TextOutputFormat.setOutputPath(jobConf, tempOutputPath);
    // <MAP>
    jobConf.setMapperClass(ManyTableGetMap.class);
    jobConf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(jobConf, keyPath);
    jobConf.setMapSpeculativeExecution(false);
    jobConf.setMaxMapAttempts(0);
    // </MAP>

    // <REDUCE>
    jobConf.setNumReduceTasks(0);
    // </REDUCE>

    try {
      // Run Job
      JobClient.runJob(jobConf);
    } finally {
      // delete temp output path
      FileSystem fs = FileSystem.get(jobConf);
      FileUtil.delete(fs, tempOutputPath, true);
      CloudataMapReduceUtil.clearMapReduce(libDir);
    }
  }
  /** Runs this tool. */
  public int run(String[] args) throws Exception {
    if (args.length != 3) {
      printUsage();
      return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int n = Integer.parseInt(args[2]);

    sLogger.info("Tool name: BuildPageRankRecords");
    sLogger.info(" - inputDir: " + inputPath);
    sLogger.info(" - outputDir: " + outputPath);
    sLogger.info(" - numNodes: " + n);

    JobConf conf = new JobConf(BuildPageRankRecords.class);
    conf.setJobName("PackageLinkGraph");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInt("NodeCnt", n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    TextInputFormat.addInputPath(conf, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(PageRankNode.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
  }
Пример #3
0
  public int run(String[] args) throws Exception {
    if (args.length < 4) {
      System.out.println("ERROR: Please Enter args : input output type(text|seq) splitChar(9=\t)");
      return JobClient.SUCCESS;
    }
    String input = args[0];
    String output = args[1];
    String type = args[2];
    String splitChar = args[3];

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.split", splitChar);

    config.setJobName("File Filter -" + System.currentTimeMillis());
    config.setNumReduceTasks(10);
    config.setReducerClass(IdentityReducer.class);
    config.setMapperClass(FileTestMapper.class);
    if ("text".equals(type)) {
      config.setInputFormat(TextInputFormat.class);
      TextInputFormat.addInputPath(config, new Path(input));
    } else {
      config.setInputFormat(SequenceFileInputFormat.class);
      SequenceFileInputFormat.addInputPath(config, new Path(input));
    }
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(output);
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
  public int run(String[] args) throws Exception {
    if (args.length < 1) {
      args = new String[] {DateStringUtils.now()};
      System.out.println(
          "ERROR: Please Enter Date , eg. 20101010 ! now use default => " + DateStringUtils.now());
    }

    JobConf config = new JobConf(getConf(), getClass());
    config.set("user.args", Utils.asString(args));

    config.setJobName(getClass() + "-" + System.currentTimeMillis());
    config.setNumReduceTasks(100);
    config.setMapperClass(getClass());
    config.setReducerClass(getClass());
    config.setInputFormat(getInputFormat());
    config.setMapOutputKeyClass(Text.class);
    config.setMapOutputValueClass(Text.class);

    // add input paths
    for (String path : getInputPath(args)) {
      if (TextInputFormat.class.equals(getInputFormat())) {
        TextInputFormat.addInputPath(config, new Path(path));
      } else if (SequenceFileInputFormat.class.equals(getInputFormat())) {
        SequenceFileInputFormat.addInputPath(config, new Path(path));
      }
    }

    config.setOutputKeyClass(Text.class);
    config.setOutputValueClass(Text.class);

    // if output path exists then return
    FileSystem fs = FileSystem.get(config);
    Path outputPath = new Path(getOutputPath(args));
    FileOutputFormat.setOutputPath(config, outputPath);

    if (!fs.exists(outputPath)) {
      JobClient.runJob(config);
    } else {
      System.out.println("You has finished this job today ! " + outputPath);
    }

    return JobClient.SUCCESS;
  }
Пример #5
0
  public int run(String[] args) throws Exception {

    if (args.length < 1) {
      args = new String[] {TaobaoPath.now()};
      System.out.println(
          "ERROR: Please Enter Date , eg. 20100507  now use default!" + TaobaoPath.now());
    }

    JobConf conf = new JobConf(getConf(), NewItemDailyFrom.class);
    conf.setJobName("NewItemDailyFrom-" + System.currentTimeMillis());

    String date = args[0];
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(TaobaoPath.getOutput("new_item_daily_from", date))) {
      System.out.println(
          "ERROR: You has finish this job at this day :  "
              + date
              + " [ "
              + TaobaoPath.getOutput("new_item_daily_from", date)
              + " ] ");
      return -1;
    }

    conf.set("user.date", date);
    conf.setNumReduceTasks(1);
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(LongSumReducer.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(LongWritable.class);
    TextInputFormat.addInputPath(conf, TaobaoPath.hiveAuctionAuctions(date));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    FileOutputFormat.setOutputPath(conf, TaobaoPath.getOutput("new_item_daily_from", date));

    JobClient.runJob(conf);

    return JobClient.SUCCESS;
  }