Пример #1
0
  public static void main(String[] args) throws Exception {
    String inputDirectory = "/home/cs246/Desktop/HW2/input";
    String outputDirectory = "/home/cs246/Desktop/HW2/output";
    String centroidDirectory = "/home/cs246/Desktop/HW2/config";

    int iterations = 20;

    for (int i = 1; i <= iterations; i++) {
      Configuration conf = new Configuration();

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      Job job = new Job(conf, "HW2_Q4." + i);
      job.setJarByClass(HW2_Q4.class);
      job.setOutputKeyClass(IntWritable.class);
      job.setOutputValueClass(Text.class);
      job.setMapperClass(Map1.class);
      job.setReducerClass(Reduce1.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      FileInputFormat.addInputPath(job, new Path(inputDirectory));
      FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i));

      job.waitForCompletion(true);
    }
  }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
  @Override
  public int run(String[] arg0) throws Exception {
    // config a job and start it
    Configuration conf = getConf();
    Job job = new Job(conf, "Index construction..");
    job.setJarByClass(IndexConstructorDriver.class);
    job.setMapperClass(IndexConstructorMapper.class);
    job.setReducerClass(IndexConstructorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InvertedListWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // can add the dir by the config
    FileSystem fs = FileSystem.get(conf);
    String workdir = conf.get("org.joy.crawler.dir", "crawler/");
    fs.delete(new Path(workdir + "indexOutput/"), true);
    FileInputFormat.addInputPath(job, new Path(workdir + "content/"));
    FileOutputFormat.setOutputPath(job, new Path(workdir + "indexOutput/"));
    System.out.println(
        "indexer starts to work, it begins to construct the index, please wait ...\n");
    return job.waitForCompletion(true) ? 0 : 1;
  }
  public static void main(String args[]) throws Exception {
    Configuration c = new Configuration();
    if (args.length != 2) {
      System.out.println("provide sufficient arguments");
      System.exit(-1);
    }
    Job job = Job.getInstance(c, "Wordcount");
    job.setJarByClass(Wordcount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //	job.submit();
    job.waitForCompletion(true);
  }
Пример #5
0
 private static Job getJobForClient() throws IOException {
   Job job = Job.getInstance(new Configuration());
   job.getConfiguration().set("mapred.job.tracker", "localhost:" + PORT);
   job.setInputFormatClass(NullInputFormat.class);
   job.setOutputFormatClass(NullOutputFormat.class);
   job.setNumReduceTasks(0);
   return job;
 }
Пример #6
0
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(HadoopNBFilter.class);
    job.setJobName("hadoopnbfilter");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    boolean jobCompleted = job.waitForCompletion(true);
    return jobCompleted ? 0 : 1;
  }
Пример #7
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TopMapper.class);
    job.setReducerClass(TopReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);
    job.setJarByClass(WordCount_e3.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }
  @Override
  public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    job.setJarByClass(ElimiateRepeat.class);
    job.setJobName("ElimiateRepeat");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("file0*"));
    FileOutputFormat.setOutputPath(job, new Path("elimiateRepeat"));

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
  }
Пример #9
0
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }
  public int run(String args[]) throws Exception {
    Configuration conf = getConf();

    /* creates a new job with given job name */
    Job job = Job.getInstance(conf, "Mat_vect_mul");
    job.setJarByClass(Mat_vect_mul.class);

    /* adds input-vector file to the cache */
    DistributedCache.addCacheFile(new Path(args[1]).toUri(), job.getConfiguration());

    /* set the key class and value class for the job output */
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    /* set mapper and reducer for the job */
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);

    /* set the input foramt and output format for the job */
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    /* add path to the list of inputs for the map-reduce job */
    FileInputFormat.addInputPath(job, new Path(args[0]));

    /* set path of the output diretcory for the map-reduce job*/
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    /* Submit the job, then poll for progress until the job is complete */
    boolean succ = job.waitForCompletion(true);
    if (!succ) {
      System.out.println("Job failed, exiting");
      return -1;
    }

    return 0;
  }