Пример #1
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }
Пример #2
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = new Job(conf, "Update History Mac ");
    job.setJarByClass(UpdateHistoryMac.class);

    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path outputPath = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, outputPath);
    outputPath.getFileSystem(conf).delete(outputPath, true);

    job.setMapperClass(UpdateHistoryMacMapper.class);
    job.setReducerClass(UpdateHistoryMacReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    //      TextOutputFormat.setCompressOutput(job, true);
    //      TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    job.setNumReduceTasks(5);

    int code = job.waitForCompletion(true) ? 0 : 1;
    //      LzoIndexer lzoIndexer = new LzoIndexer(conf);
    //      lzoIndexer.index(outputPath);

    return code;
  }
Пример #3
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
Пример #4
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    job.setJarByClass(DeliverFormatForUVMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");
    String tmpDir = outputDir + "_tmp";
    Path tmpOut = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOut);
    tmpOut.getFileSystem(conf).delete(tmpOut, true);

    job.setMapperClass(DeliverFormatForUVMapper.class);
    job.setCombinerClass(DeliverFormatForUVCombiner.class);
    job.setReducerClass(DeliverFormatForUVReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("reduce_num", 20));

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {

      // this job is for combining  small files into one
      Job combineJob = new Job(conf, "CombineTmpData");
      combineJob.setJarByClass(DeliverFormatForUVMR.class);

      FileInputFormat.addInputPath(combineJob, new Path(tmpDir));
      FileOutputFormat.setOutputPath(combineJob, new Path(outputDir));
      combineJob.setMapperClass(IdentityMapper.class);
      combineJob.setReducerClass(IdentityReducer.class);

      combineJob.setInputFormatClass(KeyValueTextInputFormat.class);
      combineJob.setOutputFormatClass(TextOutputFormat.class);

      combineJob.setOutputKeyClass(Text.class);
      combineJob.setOutputValueClass(Text.class);

      TextOutputFormat.setCompressOutput(combineJob, true);
      TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class);

      combineJob.setNumReduceTasks(1);
      code = combineJob.waitForCompletion(true) ? 0 : 1;
    }

    FileSystem.get(conf).delete(tmpOut, true);
    LzoIndexer lzoIndexer = new LzoIndexer(conf);
    lzoIndexer.index(new Path(outputDir));
    System.exit(code);
    return code;
  }
Пример #5
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, "ClientUserInstallMR");
    job.setJarByClass(ClientUserInstallMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");

    String tmpDir = outputDir + "_tmp";
    Path tmpOutput = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOutput);
    tmpOutput.getFileSystem(conf).delete(tmpOutput, true);

    job.setMapperClass(ClientUserInstallFirstMapper.class);
    job.setReducerClass(ClientUserInstallFirstReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(30);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job secondJob = new Job(conf, "ClientUserInstallResult");
      secondJob.setJarByClass(ClientUserInstallMR.class);
      conf.set("stat_date", conf.get("stat_date"));

      FileInputFormat.addInputPath(secondJob, new Path(tmpDir));
      Path output = new Path(outputDir);
      FileOutputFormat.setOutputPath(secondJob, output);
      output.getFileSystem(conf).delete(output, true);

      secondJob.setMapperClass(ClientUserInstallSecondMapper.class);
      secondJob.setReducerClass(ClientUserInstallSecondReduce.class);

      secondJob.setInputFormatClass(KeyValueTextInputFormat.class);
      secondJob.setOutputFormatClass(TextOutputFormat.class);
      secondJob.setOutputKeyClass(Text.class);
      secondJob.setOutputValueClass(Text.class);

      secondJob.setNumReduceTasks(1);

      code = secondJob.waitForCompletion(true) ? 0 : 1;
    }
    FileSystem.get(conf).delete(tmpOutput, true);
    System.exit(code);
    return code;
  }
Пример #6
0
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    String statDate = conf.get("stat_date");
    conf.set("stat_date", statDate);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(UninstallLogFormatMR.class);
    job.setMapperClass(ClientUninstallFormatMapper.class);
    job.setInputFormatClass(LzoTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }
Пример #7
0
  @Override
  public int run(String[] strings) throws Exception {
    String input_1 = strings[0];
    String input_2 = strings[1];
    String output = strings[2];

    Configuration conf = new Configuration();

    // 删除已有结果集
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(output);
    if (fs.exists(out)) {
      fs.delete(out, true);
    }

    Job job = Job.getInstance();

    job.setJobName("itemCF-Aggregate-Matrix-yxl");
    job.setJarByClass(getClass());

    job.setMapperClass(S4_Mapper.class);
    //        job.setCombinerClass(S4_Reducer.class); // 这里不能用 combiner 如果本地直接先做一次 reduce 则输出为空
    job.setReducerClass(S4_Reducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(1); // 数据量小 设置一个 reduce 方便查看

    FileInputFormat.addInputPaths(job, input_1 + "," + input_2);
    FileOutputFormat.setOutputPath(job, new Path(output));

    return job.waitForCompletion(true) ? 0 : 1;
  }
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    // Create configuration
    Configuration conf = new Configuration(true);

    // Create job
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "CountryIncomeConf");
    job.setJarByClass(CountryIncomeConf.class);

    // Decompressing .gz file Ex. foo.csv.gz to foo.csv

    String uri = args[0];
    FileSystem fs = FileSystem.get(URI.create(uri), conf);

    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(inputPath);
    if (codec == null) {
      System.err.println("No codec found for " + uri);
      System.exit(1);
    }
    String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
    InputStream in = null;
    OutputStream out = null;
    try {
      in = codec.createInputStream(fs.open(inputPath));
      out = fs.create(new Path(outputUri));
      IOUtils.copyBytes(in, out, conf);
    } finally {
      IOUtils.closeStream(in);
      IOUtils.closeStream(out);
    }

    // Setup MapReduce
    job.setMapperClass(CountryIncomeMapper.class);
    job.setReducerClass(CountryIncomeReducer.class);
    job.setNumReduceTasks(1);

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Input
    // FileInputFormat.addInputPath(job, inputPath);
    FileInputFormat.addInputPaths(job, outputUri);
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true);

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;

    // Counter finding and displaying
    Counters counters = job.getCounters();

    // Displaying counters
    System.out.printf(
        "Missing Fields: %d, Error Count: %d\n",
        counters.findCounter(COUNTERS.MISSING_FIELDS_RECORD_COUNT).getValue(),
        counters.findCounter(COUNTERS.NULL_OR_EMPTY).getValue());

    System.exit(code);
  }