예제 #1
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }
  @Override
  public int run(String[] arg0) throws Exception {
    // config a job and start it
    Configuration conf = getConf();
    Job job = new Job(conf, "Index construction..");
    job.setJarByClass(IndexConstructorDriver.class);
    job.setMapperClass(IndexConstructorMapper.class);
    job.setReducerClass(IndexConstructorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InvertedListWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // can add the dir by the config
    FileSystem fs = FileSystem.get(conf);
    String workdir = conf.get("org.joy.crawler.dir", "crawler/");
    fs.delete(new Path(workdir + "indexOutput/"), true);
    FileInputFormat.addInputPath(job, new Path(workdir + "content/"));
    FileOutputFormat.setOutputPath(job, new Path(workdir + "indexOutput/"));
    System.out.println(
        "indexer starts to work, it begins to construct the index, please wait ...\n");
    return job.waitForCompletion(true) ? 0 : 1;
  }
  public static void main(String args[]) throws Exception {
    Configuration c = new Configuration();
    if (args.length != 2) {
      System.out.println("provide sufficient arguments");
      System.exit(-1);
    }
    Job job = Job.getInstance(c, "Wordcount");
    job.setJarByClass(Wordcount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //	job.submit();
    job.waitForCompletion(true);
  }
  public static void runJob(String mysqlJar, String output) throws Exception {
    Configuration conf = new Configuration();

    JobHelper.addJarForJob(conf, mysqlJar);

    DBConfiguration.configureDB(
        conf,
        "com.mysql.jdbc.Driver",
        "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password");

    Job job = new Job(conf);
    job.setJarByClass(DBImportExportMapReduce.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(DBInputFormat.class);
    job.setOutputFormatClass(DBOutputFormat.class);

    job.setMapOutputKeyClass(StockRecord.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(StockRecord.class);
    job.setOutputValueClass(NullWritable.class);

    job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.setNumReduceTasks(4);

    DBInputFormat.setInput(
        job, StockRecord.class, "select * from stocks", "SELECT COUNT(id) FROM stocks");

    DBOutputFormat.setOutput(job, "stocks_export", StockRecord.fields);

    Path outputPath = new Path(output);

    FileOutputFormat.setOutputPath(job, outputPath);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    job.waitForCompletion(true);
  }
예제 #5
0
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }