示例#1
0
  public int run(String[] args) throws Exception {
    Path inputPath = new Path("weblog_entries.txt");
    Path outputPath = new Path("output");
    Schema schema = ReflectData.get().getSchema(WeblogRecord.class);
    Configuration conf = getConf();

    Job weblogJob = Job.getInstance(conf);
    weblogJob.setJobName("Avro Writer");
    weblogJob.setJarByClass(getClass());

    weblogJob.setNumReduceTasks(0);
    weblogJob.setMapperClass(WeblogMapper_Ex_5.class);
    weblogJob.setMapOutputKeyClass(AvroWrapper.class);
    weblogJob.setMapOutputValueClass(NullWritable.class);

    weblogJob.setInputFormatClass(TextInputFormat.class);

    AvroJob.setOutputKeySchema(weblogJob, schema);

    FileInputFormat.setInputPaths(weblogJob, inputPath);
    FileOutputFormat.setOutputPath(weblogJob, outputPath);

    if (weblogJob.waitForCompletion(true)) {
      return 0;
    }
    return 1;
  }
示例#2
0
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length != 2) {
      System.err.printf(
          "Usage: %s <comma separated paths> <output path>\n", this.getClass().getName());
      return -1;
    }

    Job job = Job.getInstance();
    job.setJobName("PasmJoin");
    job.setJarByClass(PsamXY.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AvroValue.class);
    job.setOutputKeyClass(AvroKey.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(PsamXYMapper.class);
    job.setReducerClass(PsamXYReducer.class);

    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);

    FileInputFormat.setInputPaths(job, args[0]);
    Path output = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, output);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    AvroJob.setOutputKeySchema(job, outputSchema);
    AvroJob.setMapOutputValueSchema(job, outputSchema);

    // DistributedCache.addCacheFile(new Path("BM_TERM_TYPE_DMT.avro").toUri(),
    // job.getConfiguration());

    job.setNumReduceTasks(1);
    job.submit();

    job.waitForCompletion(true);
    return 0;
  }