public static void main(String[] args)
     throws IOException, ClassNotFoundException, InterruptedException {
   // TODO Auto-generated method stub
   Configuration conf = new Configuration();
   conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ",");
   Job job = Job.getInstance(conf);
   job.setJarByClass(MapsideJoinDriver.class);
   job.setMapperClass(MapsideJoinMapper.class);
   job.setInputFormatClass(CompositeInputFormat.class);
   String expr =
       CompositeInputFormat.compose(
           "inner", KeyValueTextInputFormat.class, new Path(args[0]), new Path(args[1]));
   // String expr = CompositeInputFormat.compose("outer",KeyValueTextInputFormat.class , new
   // Path(args[0]),new Path(args[1]));
   job.getConfiguration().set("mapreduce.join.expr", expr);
   job.setNumReduceTasks(0);
   TextOutputFormat.setOutputPath(job, new Path(args[2]));
   job.waitForCompletion(true);
 }
Esempio n. 2
0
  public int run(
      String oldRankVectorInput, String newRankVectorInput, String output, String l1NormOutput)
      throws Exception {
    Configuration conf = new Configuration();
    conf.set("rightHandSideSum", rightHandSideSum + "");
    conf.set("rankVectorSum", rankVectorSum + "");

    Job job = new Job(conf, "Rank Vector Normalize");
    job.setJarByClass(RankVectorNormalize.class);

    job.setMapperClass(RankVectorNormalize.RankVectorNormalizeMapper.class);
    job.setReducerClass(RankVectorNormalize.RankVectorNormalizeReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BlockVectorElement.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BlockVectorElement.class);

    job.setInputFormatClass(CompositeInputFormat.class);
    String joinStatement =
        CompositeInputFormat.compose(
            "inner", SequenceFileInputFormat.class, oldRankVectorInput, newRankVectorInput);
    conf.set("mapred.join.expr", joinStatement);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(output));

    MultipleOutputs.addNamedOutput(
        job,
        l1NormOutput,
        SequenceFileOutputFormat.class,
        NullWritable.class,
        DoubleWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }
Esempio n. 3
0
  /**
   * The main driver for sort program. Invoke this method to submit the map/reduce job.
   *
   * @throws Exception When there is communication problems with the job tracker.
   */
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String join_reduces = conf.get(REDUCES_PER_HOST);
    if (join_reduces != null) {
      num_reduces = cluster.getTaskTrackers() * Integer.parseInt(join_reduces);
    }
    Job job = new Job(conf);
    job.setJobName("join");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = TupleWritable.class;
    String op = "inner";
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i])) {
          num_reduces = Integer.parseInt(args[++i]);
        } else if ("-inFormat".equals(args[i])) {
          inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
        } else if ("-outFormat".equals(args[i])) {
          outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
        } else if ("-outKey".equals(args[i])) {
          outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
        } else if ("-outValue".equals(args[i])) {
          outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
        } else if ("-joinOp".equals(args[i])) {
          op = args[++i];
        } else {
          otherArgs.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage(); // exits
      }
    }

    // Set user-supplied (possibly default) job configs
    job.setNumReduceTasks(num_reduces);

    if (otherArgs.size() < 2) {
      System.out.println("ERROR: Wrong number of parameters: ");
      return printUsage();
    }

    FileOutputFormat.setOutputPath(job, new Path(otherArgs.remove(otherArgs.size() - 1)));
    List<Path> plist = new ArrayList<Path>(otherArgs.size());
    for (String s : otherArgs) {
      plist.add(new Path(s));
    }

    job.setInputFormatClass(CompositeInputFormat.class);
    job.getConfiguration()
        .set(
            CompositeInputFormat.JOIN_EXPR,
            CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0])));
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
        "The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
  }