Пример #1
0
  public static void main(String[] args) throws Exception {
    String inputDirectory = "/home/cs246/Desktop/HW2/input";
    String outputDirectory = "/home/cs246/Desktop/HW2/output";
    String centroidDirectory = "/home/cs246/Desktop/HW2/config";

    int iterations = 20;

    for (int i = 1; i <= iterations; i++) {
      Configuration conf = new Configuration();

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      Job job = new Job(conf, "HW2_Q4." + i);
      job.setJarByClass(HW2_Q4.class);
      job.setOutputKeyClass(IntWritable.class);
      job.setOutputValueClass(Text.class);
      job.setMapperClass(Map1.class);
      job.setReducerClass(Reduce1.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      FileInputFormat.addInputPath(job, new Path(inputDirectory));
      FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i));

      job.waitForCompletion(true);
    }
  }
  void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext =
        new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2));

    MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    for (int j = 0; j < splits.size(); j++) {
      RecordReader<LongWritable, MyClassWritable> reader =
          format.createRecordReader(splits.get(j), attemptContext);
      reader.initialize(splits.get(j), attemptContext);

      int count = 0;
      try {
        while (reader.nextKeyValue()) {
          LongWritable key = reader.getCurrentKey();
          MyClassWritable val = reader.getCurrentValue();
          MyClass mc = val.get();
          assertEquals(mc.v, count);
          assertEquals(mc.s, Integer.toString(count));
          count++;
        }
      } finally {
        reader.close();
      }
    }
  }
Пример #3
0
  public int run(String[] args) throws Exception {
    Path tempDir = new Path("/user/akhfa/temp");

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(AuthorCounter.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, tempDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;
  }
Пример #4
0
  public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(Sort.class);
    job.setJobName("Sort");

    FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input/"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output/"));
    job.setMapperClass(Map.class);
    // job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(2);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Пример #5
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "wordcount");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(TopMapper.class);
    job.setReducerClass(TopReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);
    job.setJarByClass(WordCount_e3.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }
  /**
   * @param args
   * @throws IOException
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub

    Configuration conf = new Configuration();
    // conf.set("inParameter", args[0]);
    Job job = new Job(conf, "MovieGenreIdentifier");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setJarByClass(UserIdForAge.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    job.waitForCompletion(true);
  }
  @Override
  public int run(String[] args) throws Exception {

    Job job = new Job(getConf());
    job.setJarByClass(ElimiateRepeat.class);
    job.setJobName("ElimiateRepeat");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path("file0*"));
    FileOutputFormat.setOutputPath(job, new Path("elimiateRepeat"));

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
  }
Пример #8
0
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }
Пример #9
0
  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "job");

    job.setJarByClass(PVidConvert.class);
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/"));
    job.waitForCompletion(true);

    Configuration conf1 = new Configuration();
    Job job1 = new Job(conf1, "job1");

    job1.setJarByClass(PVidConvert.class);
    job1.setMapperClass(Map2.class);
    job1.setReducerClass(Reduce2.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);
  }
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
  public static void runJob(String mysqlJar, String output) throws Exception {
    Configuration conf = new Configuration();

    JobHelper.addJarForJob(conf, mysqlJar);

    DBConfiguration.configureDB(
        conf,
        "com.mysql.jdbc.Driver",
        "jdbc:mysql://localhost/sqoop_test" + "?user=hip_sqoop_user&password=password");

    Job job = new Job(conf);
    job.setJarByClass(DBImportExportMapReduce.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(DBInputFormat.class);
    job.setOutputFormatClass(DBOutputFormat.class);

    job.setMapOutputKeyClass(StockRecord.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(StockRecord.class);
    job.setOutputValueClass(NullWritable.class);

    job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.setNumReduceTasks(4);

    DBInputFormat.setInput(
        job, StockRecord.class, "select * from stocks", "SELECT COUNT(id) FROM stocks");

    DBOutputFormat.setOutput(job, "stocks_export", StockRecord.fields);

    Path outputPath = new Path(output);

    FileOutputFormat.setOutputPath(job, outputPath);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    job.waitForCompletion(true);
  }
Пример #12
0
  public static void main(String args[]) throws Exception {
    Configuration c = new Configuration();
    if (args.length != 2) {
      System.out.println("provide sufficient arguments");
      System.exit(-1);
    }
    Job job = Job.getInstance(c, "Wordcount");
    job.setJarByClass(Wordcount.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //	job.submit();
    job.waitForCompletion(true);
  }
Пример #13
0
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJarByClass(HadoopNBFilter.class);
    job.setJobName("hadoopnbfilter");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    boolean jobCompleted = job.waitForCompletion(true);
    return jobCompleted ? 0 : 1;
  }
Пример #14
0
  public X(String input, String output, Configuration config, FileSystem hdfs) throws Exception {

    Job job = new Job(config);
    job.setJarByClass(codesquare.badges.badge_21_22_23.Pass2.class);
    job.setJobName("");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path("/user/interns/Extras/bossList.txt"));
    FileOutputFormat.setOutputPath(job, new Path(output));
    try {
      job.waitForCompletion(true);
    } catch (IOException e) {
      System.out.println("No Input Paths to run this MapReduce on!");
    }
  }
  @Override
  public int run(String[] arg0) throws Exception {
    // config a job and start it
    Configuration conf = getConf();
    Job job = new Job(conf, "Index construction..");
    job.setJarByClass(IndexConstructorDriver.class);
    job.setMapperClass(IndexConstructorMapper.class);
    job.setReducerClass(IndexConstructorReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(InvertedListWritable.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // can add the dir by the config
    FileSystem fs = FileSystem.get(conf);
    String workdir = conf.get("org.joy.crawler.dir", "crawler/");
    fs.delete(new Path(workdir + "indexOutput/"), true);
    FileInputFormat.addInputPath(job, new Path(workdir + "content/"));
    FileOutputFormat.setOutputPath(job, new Path(workdir + "indexOutput/"));
    System.out.println(
        "indexer starts to work, it begins to construct the index, please wait ...\n");
    return job.waitForCompletion(true) ? 0 : 1;
  }
  public int run(String args[]) throws Exception {
    Configuration conf = getConf();

    /* creates a new job with given job name */
    Job job = Job.getInstance(conf, "Mat_vect_mul");
    job.setJarByClass(Mat_vect_mul.class);

    /* adds input-vector file to the cache */
    DistributedCache.addCacheFile(new Path(args[1]).toUri(), job.getConfiguration());

    /* set the key class and value class for the job output */
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    /* set mapper and reducer for the job */
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);

    /* set the input foramt and output format for the job */
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    /* add path to the list of inputs for the map-reduce job */
    FileInputFormat.addInputPath(job, new Path(args[0]));

    /* set path of the output diretcory for the map-reduce job*/
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    /* Submit the job, then poll for progress until the job is complete */
    boolean succ = job.waitForCompletion(true);
    if (!succ) {
      System.out.println("Job failed, exiting");
      return -1;
    }

    return 0;
  }
Пример #17
0
  public static void main(String[] args) throws Exception {

    final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020";
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(WordCount.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);
    if (args.length > 2) {
      job.setNumReduceTasks(Integer.parseInt(args[2]));
    }

    job.setMapperClass(CountMapper.class);
    job.setReducerClass(CountReducer.class);
    job.setJarByClass(WordCount.class);
    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt"));
    FileSystem fs = FileSystem.get(conf);
    // handle (e.g. delete) existing output path
    Path outputDestination = new Path(args[0] + args[1]);
    if (fs.exists(outputDestination)) {
      fs.delete(outputDestination, true);
    }

    // set output path & start job1
    FileOutputFormat.setOutputPath(job, outputDestination);
    int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1;
  }