Пример #1
1
  private void configureMapperTypesIfPossible(Job j, Class<? extends Mapper> mapper) {
    // Find mapper
    Class<?> targetClass = mapper;
    Type targetType = mapper;

    do {
      targetType = targetClass.getGenericSuperclass();
      targetClass = targetClass.getSuperclass();
    } while (targetClass != null
        && targetClass != Object.class
        && !Mapper.class.equals(targetClass));

    if (targetType instanceof ParameterizedType) {
      Type[] params = ((ParameterizedType) targetType).getActualTypeArguments();
      if (params.length == 4) {
        // set each param (if possible);
        if (params[2] instanceof Class) {
          Class<?> clz = (Class<?>) params[2];
          if (!clz.isInterface()) j.setMapOutputKeyClass(clz);
        }

        // set each param (if possible);
        if (params[3] instanceof Class) {
          Class<?> clz = (Class<?>) params[3];
          if (!clz.isInterface()) {
            j.setMapOutputValueClass(clz);
          }
        }
      }
    }
  }
Пример #2
0
  @Override
  public int run(String[] args) throws Exception {

    // TODO Auto-generated method stub

    Configuration conf = getConf();
    // conf.set("stat_date", dateString);

    Job job = new Job(conf, "DayhslogUserDateNewMac");
    job.setJarByClass(DayhslogUserDate.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp"));
    FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true);
    job.setMapperClass(DayhslogUserDateNewMacMapper.class);
    job.setReducerClass(DayhslogUserDateNewMacReducer.class);

    // job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(10);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job resultJob = new Job(conf, "DayhslogUserDate");

      resultJob.setJarByClass(DayhslogUserDate.class);

      FileInputFormat.addInputPath(resultJob, new Path(args[0]));
      FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp"));
      FileOutputFormat.setOutputPath(resultJob, new Path(args[2]));
      FileSystem.get(conf).delete(new Path(args[2]), true);

      resultJob.setMapperClass(DayhslogUserDateMapper.class);
      resultJob.setReducerClass(DayhslogUserDateReducer.class);

      resultJob.setNumReduceTasks(10);

      resultJob.setMapOutputKeyClass(Text.class);
      resultJob.setMapOutputValueClass(Text.class);

      resultJob.setOutputKeyClass(Text.class);
      resultJob.setOutputValueClass(Text.class);

      code = resultJob.waitForCompletion(true) ? 0 : 1;
    }
    Path tmpPath = new Path(args[2] + "tmp");
    FileSystem.get(conf).delete(tmpPath, true);
    System.exit(code);

    return code;
  }
  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    //	Logger log  = Logger.getLogger("sds");
    Job job = new Job(conf, "Max ");

    job.setMapOutputKeyClass(CompositeKey.class);

    job.setPartitionerClass(ActualKeyPartitioner.class);
    job.setGroupingComparatorClass(ActualKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setJarByClass(map_reduce.class);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setNumReduceTasks(27);

    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Пример #4
0
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: topreviews <in> [<in>...] <out>");
      System.exit(2);
    }

    Job job = Job.getInstance(conf, "Top Five Reviews");
    job.setJarByClass(TopFive.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapperClass(TopFiveMapper.class);
    job.setReducerClass(TopFiveReducer.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setMapOutputValueClass(TextPair.class);

    job.setOutputKeyClass(TextPair.class);
    job.setOutputValueClass(TextPair.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
  }
Пример #5
0
  /**
   * Create a map-only Hadoop Job out of the passed in parameters.  Does not set the
   * Job name.
   *
   * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
   */
  @SuppressWarnings("rawtypes")
	public static Job prepareJob(Path inputPath,
                                 Path outputPath,
                                 Class<? extends InputFormat> inputFormat,
                                 Class<? extends Mapper> mapper,
                                 Class<? extends Writable> mapperKey,
                                 Class<? extends Writable> mapperValue,
                                 Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    //Job job = new Job(new Configuration(conf));
  	Job job = Job.getInstance(conf);
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
  }
Пример #6
0
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }
Пример #8
0
  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }
Пример #9
0
  @Override
  public int run(String[] args) throws Exception {
    final int ret = parseArgs(args);
    if (ret < 0) {
      return ret;
    }

    Job job = Job.getInstance(getConf());
    job.setJarByClass(GreeDiFirst.class);
    job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount));

    job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount);
    job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount);

    job.setNumReduceTasks(partitionCount);

    SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWithVectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(GreeDiReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
  }
Пример #10
0
  public static void main(String[] args) throws Exception {

    /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure.
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
      System.err.println("Usage: hubsandspokesload <in> <out> <finalout>");
      System.exit(2);
    }
    Job job = new Job(conf, "hubsandspokesload");
    job.setJarByClass(HubsAndSpokes.class);
    job.setMapperClass(HubSpokeLoadMapper.class);
    job.setReducerClass(HubSpokeLoadReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NodeWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
      System.exit(2);
    }

    /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke
    /// Value at each Node in the graph iteratively.

    dijkstra(otherArgs[1], otherArgs[2]);

    // dijkstra("output1", "finaloutput");

  }
Пример #11
0
  public Job getJob(Configuration conf) throws IOException {
    Job job = new Job(conf, "pivoting");

    job.setJarByClass(PivotingReducer.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(PivotingReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setPartitionerClass(RuleWritable.SourcePartitioner.class);

    FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected"));
    int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
    if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);

    int numReducers = conf.getInt("thrax.reducers", 4);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted"));
    FileOutputFormat.setCompressOutput(job, true);

    return job;
  }
Пример #12
0
  /**
   * Run a local map reduce job to read records from HCatalog table and verify if the count is as
   * expected
   *
   * @param readCount
   * @param filter
   * @return
   * @throws Exception
   */
  List<HCatRecord> runMRRead(int readCount, String filter) throws Exception {
    MapRead.readCount = 0;
    readRecords.clear();

    Configuration conf = new Configuration();
    conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true");
    Job job = new Job(conf, "hcat mapreduce read test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(HCatMapReduceTest.MapRead.class);

    // input/output settings
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    HCatInputFormat.setInput(job, dbName, tableName, filter);

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput");
    if (fs.exists(path)) {
      fs.delete(path, true);
    }

    TextOutputFormat.setOutputPath(job, path);

    job.waitForCompletion(true);
    Assert.assertEquals(readCount, MapRead.readCount);

    return readRecords;
  }
Пример #13
0
  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    if (args.length < 1) {
      System.out.println("missing input ");
      System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = new Job(new JobConf(conf));
    job.setJarByClass(InstallApp.class);
    job.setMapperClass(InstallMapper.class);
    job.setReducerClass(InstalleReduce.class);
    job.setJobName("installtaotaosou");
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileSystem fs = FileSystem.get(URI.create(args[0]), conf);
    FileStatus fileList[] = fs.listStatus(new Path(args[0]));

    int length = fileList.length;

    for (int i = 0; i < length; i++) {

      FileInputFormat.addInputPath(job, fileList[i].getPath());
    }

    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
    return 0;
  }
Пример #14
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "simple feature writer");

    job.setJarByClass(FeatureWriterJob.class);
    job.setMapperClass(MyMapper.class);
    job.setInputFormatClass(GeoMesaInputFormat.class);
    job.setOutputFormatClass(GeoMesaOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScalaSimpleFeature.class);
    job.setNumReduceTasks(0);

    Map<String, String> params = new HashMap<String, String>();
    params.put("instanceId", "myinstance");
    params.put("zookeepers", "zoo1,zoo2,zoo3");
    params.put("user", "myuser");
    params.put("password", "mypassword");
    params.put("tableName", "mycatalog");

    Query query = new Query("myfeature", ECQL.toFilter("BBOX(geom, -165,5,-50,75)"));

    GeoMesaInputFormat.configure(job, params, query);

    Map<String, String> outParams = new HashMap<String, String>();
    outParams.put("instanceId", "myinstance");
    outParams.put("zookeepers", "zoo1,zoo2,zoo3");
    outParams.put("user", "myuser");
    outParams.put("password", "mypassword");
    outParams.put("tableName", "mycatalog_2");

    GeoMesaOutputFormat.configureDataStore(job, outParams);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
Пример #15
0
  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, AnalysisMapReduce.class.getName());
    job.setJarByClass(AnalysisMapReduce.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(AnalysicMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //		job.setNumReduceTasks(0);

    job.setNumReduceTasks(1);

    job.setReducerClass(AnalysicReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(NullWritable.class);

    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(out)) {
      fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);

    return job.waitForCompletion(true) ? 0 : 1;
  }
Пример #16
0
  @Override
  public int run(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: WhiteHouseVisitorDriver <input path> <output path>");
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
    }

    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(WhiteHouseVisitorDriver.class);

    // input file format
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // map/combine/reduce class definition
    job.setMapperClass(WhiteHouseVisitorMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);

    // Key, Value set type definition
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }
Пример #17
0
  public static void main(String[] args) throws Exception {
    {
      Configuration conf = new Configuration();
      conf.set("mapreduce.job.jar", "wcwin.jar"); // 此处代码,一定放在Job任务前面,否则会报类找不到的异常
      Job job = Job.getInstance(conf, "winjob");
      getConf(conf);
      job.setJarByClass(WordCount.class);
      job.setMapperClass(WMapper.class);
      job.setReducerClass(WReducer.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(IntWritable.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);

      String path = "/wc/output";
      //			FileSystem fs = FileSystem.get(conf);
      Path p = new Path(path);
      //			if (fs.exists(p)) {
      //				fs.delete(p, true);
      //				System.out.println("输出路径存在,已删除!");
      //			}

      FileInputFormat.setInputPaths(job, "/wc/srcdata");
      FileOutputFormat.setOutputPath(job, p);
      printEnv(job);
      System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }
 private void configureMapTasks(String arg, Job job) throws IOException {
   job.setMapperClass(IdentityMapper.class);
   job.setMapOutputKeyClass(keyClass);
   job.setMapOutputValueClass(valueClass);
   job.setInputFormatClass(SequenceFileInputFormat.class);
   SequenceFileInputFormat.setInputPaths(job, arg);
 }
Пример #19
0
  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }
Пример #20
0
  /** RUN */
  @Override
  public int run(String[] args) throws Exception {

    if (args.length != 3) {
      System.err.println("Usage: wordcount <input_dir> <output_dir> <reducers>");
      return -1;
    }

    Job job = new Job(getConf(), "PigMix L13");
    job.setJarByClass(L13.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(ReadInput.class);
    job.setReducerClass(Join.class);

    Properties props = System.getProperties();
    Configuration conf = job.getConfiguration();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      conf.set((String) entry.getKey(), (String) entry.getValue());
    }

    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_page_views"));
    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_power_users_samples"));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "/L13out"));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    return job.waitForCompletion(true) ? 0 : -1;
  }
Пример #21
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
Пример #22
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Implicit rating estimator MR";
    job.setJobName(jobName);

    job.setJarByClass(ImplicitRatingEstimator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ImplicitRatingEstimator.RatingEstimatorMapper.class);
    job.setReducerClass(ImplicitRatingEstimator.RatingEstimatorReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Job job =
        Job.getInstance(
            getConf(),
            "Import vessel locations from files in "
                + args[0]
                + " into table cdb_vessel:vessel_location"); // co

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setJarByClass(ImportVTLocationFromFileWithReducer.class);
    job.setJobName("Vessel_location_injection");
    job.setInputFormatClass(VTVesselLocationFileInputFormat.class);
    job.setMapOutputKeyClass(Key_IMOAndRecordTime.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setPartitionerClass(Partitioner_IMO.class);
    job.setGroupingComparatorClass(GroupComparator_IMO.class);

    job.setReducerClass(ImportReducer.class);
    job.setNumReduceTasks(Integer.parseInt(args[1]));

    job.setOutputFormatClass(NullOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }
Пример #24
0
  /**
   * Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link
   * #writePartitionFile}.
   */
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
      try {
        if ("-r".equals(args[i])) {
          job.setNumReduceTasks(Integer.parseInt(args[++i]));
        } else if ("-inFormat".equals(args[i])) {
          job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class));
        } else if ("-keyClass".equals(args[i])) {
          job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class));
        } else if ("-splitSample".equals(args[i])) {
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new SplitSampler<K, V>(numSamples, maxSplits);
        } else if ("-splitRandom".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int numSamples = Integer.parseInt(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
        } else if ("-splitInterval".equals(args[i])) {
          double pcnt = Double.parseDouble(args[++i]);
          int maxSplits = Integer.parseInt(args[++i]);
          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
          sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
        } else {
          otherArgs.add(args[i]);
        }
      } catch (NumberFormatException except) {
        System.out.println("ERROR: Integer expected instead of " + args[i]);
        return printUsage();
      } catch (ArrayIndexOutOfBoundsException except) {
        System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
        return printUsage();
      }
    }
    if (job.getNumReduceTasks() <= 1) {
      System.err.println("Sampler requires more than one reducer");
      return printUsage();
    }
    if (otherArgs.size() < 2) {
      System.out.println("ERROR: Wrong number of parameters: ");
      return printUsage();
    }
    if (null == sampler) {
      sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }

    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderPartitioner.setPartitionFile(getConf(), outf);
    for (String s : otherArgs) {
      FileInputFormat.addInputPath(job, new Path(s));
    }
    InputSampler.<K, V>writePartitionFile(job, sampler);

    return 0;
  }
Пример #25
0
  private void doMapReduce() {
    try {
      Job job = Job.getInstance();

      job.getConfiguration().set(OutputFormat.NAMESPACE, "/");
      job.getConfiguration().set(OutputFormat.TABLE, "LoadTest");
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue());
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
      job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows);
      job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients);
      job.setJarByClass(LoadTest.class);
      job.setJobName("Hypertable MapReduce connector LoadTest");
      job.setInputFormatClass(LoadInputFormat.class);
      job.setOutputFormatClass(OutputFormat.class);
      job.setMapOutputKeyClass(KeyWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setMapperClass(LoadMapper.class);
      job.setReducerClass(LoadReducer.class);
      job.setNumReduceTasks(this.clients);

      job.waitForCompletion(true);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #26
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(conf);
    job.setJarByClass(StatisticsGenerator.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(args[0]));
    SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(StatisticsMap.class);
    job.setReducerClass(StatisticsReduce.class);
    job.setMapOutputKeyClass(SortedMapWritableComparable.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setNumReduceTasks(16);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    long startTime =
        ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId());
    boolean success = job.waitForCompletion(true);
    long endTime =
        ManagementFactory.getThreadMXBean().getThreadCpuTime(Thread.currentThread().getId());
    double duration = (endTime - startTime) / Math.pow(10, 9);
    logger.info(
        "=== Job Finished in " + duration + " seconds " + (success ? "(success)" : "(failure)"));
    return success ? 0 : 1;
  }
Пример #27
0
  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }
Пример #28
0
  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(5);

    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(args[0]);
    FileStatus[] stats = fs.listStatus(dir);
    numFiles = stats.length;

    Job job = new Job(conf);
    job.setJarByClass(FileCombiner.class);
    job.setJobName("File Combiner");

    job.setMapperClass(FileCombinerMapper.class);
    job.setReducerClass(FileCombinerReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }
Пример #29
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Running aggregates  for numerical attributes";
    job.setJobName(jobName);

    job.setJarByClass(RunningAggregator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "chombo");
    job.setMapperClass(RunningAggregator.AggrMapper.class);
    job.setReducerClass(RunningAggregator.AggrReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }
Пример #30
0
  @Override
  public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("TeraSortCloud");
    job.setJarByClass(this.getClass());
    Opts opts = new Opts();
    opts.parseArgs(TeraSortIngest.class.getName(), args);

    job.setInputFormatClass(RangeInputFormat.class);
    job.setMapperClass(SortGenMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(AccumuloOutputFormat.class);
    opts.setAccumuloConfigs(job);
    BatchWriterConfig bwConfig = new BatchWriterConfig().setMaxMemory(10L * 1000 * 1000);
    AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig);

    Configuration conf = job.getConfiguration();
    conf.setLong(NUMROWS, opts.numRows);
    conf.setInt("cloudgen.minkeylength", opts.minKeyLength);
    conf.setInt("cloudgen.maxkeylength", opts.maxKeyLength);
    conf.setInt("cloudgen.minvaluelength", opts.minValueLength);
    conf.setInt("cloudgen.maxvaluelength", opts.maxValueLength);
    conf.set("cloudgen.tablename", opts.getTableName());

    if (args.length > 10) conf.setInt(NUMSPLITS, opts.splits);

    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
  }