Java Job.setReducerClass示例，org.apache.hadoop.mapreduce.Job.setReducerClass Java示例

示例#1

0

显示文件

文件： HashCount1.java 项目： Zeldon/BigData_Class

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml"));
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml"));

    // ===== Stage 1 =====
    Job job1 = new Job(conf, "Stage 1: Frequency Count");
    job1.setJarByClass(HashCount1.class);
    job1.setMapperClass(Mapper1.class);
    // job1.setCombinerClass(Combine1.class);
    job1.setReducerClass(Reducer1.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(IntWritable.class);
    job1.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz"));
    FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1"));
    job1.waitForCompletion(true);

    // ===== Stage 2 =====
    Job job2 = new Job(conf, "Stage 2: Sort");
    job2.setJarByClass(HashCount1.class);
    job2.setMapperClass(Mapper2.class);
    // job1.setCombinerClass(IntSumReducer.class);
    job2.setReducerClass(Reducer2.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);
    job2.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1"));
    FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2"));
    job2.waitForCompletion(true);

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

示例#2

0

显示文件

文件： PVidConvert.java 项目： ksadhu/video_conversion

  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "job");

    job.setJarByClass(PVidConvert.class);
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/"));
    job.waitForCompletion(true);

    Configuration conf1 = new Configuration();
    Job job1 = new Job(conf1, "job1");

    job1.setJarByClass(PVidConvert.class);
    job1.setMapperClass(Map2.class);
    job1.setReducerClass(Reduce2.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);
  }

示例#3

0

显示文件

文件： DayhslogUserDate.java 项目： wisgood/mobile-core

  @Override
  public int run(String[] args) throws Exception {

    // TODO Auto-generated method stub

    Configuration conf = getConf();
    // conf.set("stat_date", dateString);

    Job job = new Job(conf, "DayhslogUserDateNewMac");
    job.setJarByClass(DayhslogUserDate.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp"));
    FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true);
    job.setMapperClass(DayhslogUserDateNewMacMapper.class);
    job.setReducerClass(DayhslogUserDateNewMacReducer.class);

    // job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(10);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job resultJob = new Job(conf, "DayhslogUserDate");

      resultJob.setJarByClass(DayhslogUserDate.class);

      FileInputFormat.addInputPath(resultJob, new Path(args[0]));
      FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp"));
      FileOutputFormat.setOutputPath(resultJob, new Path(args[2]));
      FileSystem.get(conf).delete(new Path(args[2]), true);

      resultJob.setMapperClass(DayhslogUserDateMapper.class);
      resultJob.setReducerClass(DayhslogUserDateReducer.class);

      resultJob.setNumReduceTasks(10);

      resultJob.setMapOutputKeyClass(Text.class);
      resultJob.setMapOutputValueClass(Text.class);

      resultJob.setOutputKeyClass(Text.class);
      resultJob.setOutputValueClass(Text.class);

      code = resultJob.waitForCompletion(true) ? 0 : 1;
    }
    Path tmpPath = new Path(args[2] + "tmp");
    FileSystem.get(conf).delete(tmpPath, true);
    System.exit(code);

    return code;
  }

示例#4

0

显示文件

文件： VoteCount.java 项目： sharayumungel/Hadoop-MapReduce

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job1 = new Job(conf, "combine votes");
    job1.setJarByClass(VoteCount.class);
    job1.setMapperClass(MergeFilesMapper.class);
    job1.setCombinerClass(MergedFilesReducer.class);
    job1.setReducerClass(MergedFilesReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp"));

    job1.waitForCompletion(true);

    Job job2 = new Job(conf, "votes count");
    job2.setJarByClass(VoteCount.class);
    job2.setMapperClass(CalculateVotesMapper.class);
    job2.setCombinerClass(CalculateVotesReducer.class);
    job2.setReducerClass(CalculateVotesReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp"));
    FileOutputFormat.setOutputPath(job2, new Path(args[1]));

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

示例#5

0

显示文件

文件： DeliverFormatForUVMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    job.setJarByClass(DeliverFormatForUVMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");
    String tmpDir = outputDir + "_tmp";
    Path tmpOut = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOut);
    tmpOut.getFileSystem(conf).delete(tmpOut, true);

    job.setMapperClass(DeliverFormatForUVMapper.class);
    job.setCombinerClass(DeliverFormatForUVCombiner.class);
    job.setReducerClass(DeliverFormatForUVReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("reduce_num", 20));

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {

      // this job is for combining  small files into one
      Job combineJob = new Job(conf, "CombineTmpData");
      combineJob.setJarByClass(DeliverFormatForUVMR.class);

      FileInputFormat.addInputPath(combineJob, new Path(tmpDir));
      FileOutputFormat.setOutputPath(combineJob, new Path(outputDir));
      combineJob.setMapperClass(IdentityMapper.class);
      combineJob.setReducerClass(IdentityReducer.class);

      combineJob.setInputFormatClass(KeyValueTextInputFormat.class);
      combineJob.setOutputFormatClass(TextOutputFormat.class);

      combineJob.setOutputKeyClass(Text.class);
      combineJob.setOutputValueClass(Text.class);

      TextOutputFormat.setCompressOutput(combineJob, true);
      TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class);

      combineJob.setNumReduceTasks(1);
      code = combineJob.waitForCompletion(true) ? 0 : 1;
    }

    FileSystem.get(conf).delete(tmpOut, true);
    LzoIndexer lzoIndexer = new LzoIndexer(conf);
    lzoIndexer.index(new Path(outputDir));
    System.exit(code);
    return code;
  }

示例#6

0

显示文件

文件： HFileOutputFormat2.java 项目： mringg/hbase

  static void configureIncrementalLoad(
      Job job,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Class<? extends OutputFormat<?, ?>> cls)
      throws IOException, UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setOutputFormatClass(cls);

    // Based on the configured map output class, set the correct reducer to properly
    // sort the incoming values.
    // TODO it would be nice to pick one or the other of these formats.
    if (KeyValue.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(KeyValueSortReducer.class);
    } else if (Put.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(PutSortReducer.class);
    } else if (Text.class.equals(job.getMapOutputValueClass())) {
      job.setReducerClass(TextSortReducer.class);
    } else {
      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
    }

    conf.setStrings(
        "io.serializations",
        conf.get("io.serializations"),
        MutationSerialization.class.getName(),
        ResultSerialization.class.getName(),
        KeyValueSerialization.class.getName());

    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
      // record this table name for creating writer by favored nodes
      LOG.info("bulkload locality sensitive enabled");
      conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
    }

    // Use table's region boundaries for TOP split points.
    LOG.info("Looking up current regions for table " + regionLocator.getName());
    List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
    LOG.info(
        "Configuring "
            + startKeys.size()
            + " reduce partitions "
            + "to match current region count");
    job.setNumReduceTasks(startKeys.size());

    configurePartitioner(job, startKeys);
    // Set compression algorithms based on column families
    configureCompression(conf, tableDescriptor);
    configureBloomType(tableDescriptor, conf);
    configureBlockSize(tableDescriptor, conf);
    configureDataBlockEncoding(tableDescriptor, conf);

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
  }

示例#7

0

显示文件

文件： ClientUserInstallMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, "ClientUserInstallMR");
    job.setJarByClass(ClientUserInstallMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");

    String tmpDir = outputDir + "_tmp";
    Path tmpOutput = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOutput);
    tmpOutput.getFileSystem(conf).delete(tmpOutput, true);

    job.setMapperClass(ClientUserInstallFirstMapper.class);
    job.setReducerClass(ClientUserInstallFirstReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(30);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job secondJob = new Job(conf, "ClientUserInstallResult");
      secondJob.setJarByClass(ClientUserInstallMR.class);
      conf.set("stat_date", conf.get("stat_date"));

      FileInputFormat.addInputPath(secondJob, new Path(tmpDir));
      Path output = new Path(outputDir);
      FileOutputFormat.setOutputPath(secondJob, output);
      output.getFileSystem(conf).delete(output, true);

      secondJob.setMapperClass(ClientUserInstallSecondMapper.class);
      secondJob.setReducerClass(ClientUserInstallSecondReduce.class);

      secondJob.setInputFormatClass(KeyValueTextInputFormat.class);
      secondJob.setOutputFormatClass(TextOutputFormat.class);
      secondJob.setOutputKeyClass(Text.class);
      secondJob.setOutputValueClass(Text.class);

      secondJob.setNumReduceTasks(1);

      code = secondJob.waitForCompletion(true) ? 0 : 1;
    }
    FileSystem.get(conf).delete(tmpOutput, true);
    System.exit(code);
    return code;
  }

示例#8

0

显示文件

文件： LoadTest.java 项目： hypertable/hypertable

  private void doMapReduce() {
    try {
      Job job = Job.getInstance();

      job.getConfiguration().set(OutputFormat.NAMESPACE, "/");
      job.getConfiguration().set(OutputFormat.TABLE, "LoadTest");
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLAGS, MutatorFlag.NO_LOG_SYNC.getValue());
      job.getConfiguration().setInt(OutputFormat.MUTATOR_FLUSH_INTERVAL, 0);
      job.getConfiguration().setInt("LoadSplit.TOTAL_ROWS", this.totalRows);
      job.getConfiguration().setInt("LoadSplit.CLIENTS", this.clients);
      job.setJarByClass(LoadTest.class);
      job.setJobName("Hypertable MapReduce connector LoadTest");
      job.setInputFormatClass(LoadInputFormat.class);
      job.setOutputFormatClass(OutputFormat.class);
      job.setMapOutputKeyClass(KeyWritable.class);
      job.setMapOutputValueClass(BytesWritable.class);
      job.setMapperClass(LoadMapper.class);
      job.setReducerClass(LoadReducer.class);
      job.setNumReduceTasks(this.clients);

      job.waitForCompletion(true);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

示例#9

0

显示文件

文件： GreeDiFirst.java 项目： uvictor/wikimining

  @Override
  public int run(String[] args) throws Exception {
    final int ret = parseArgs(args);
    if (ret < 0) {
      return ret;
    }

    Job job = Job.getInstance(getConf());
    job.setJarByClass(GreeDiFirst.class);
    job.setJobName(String.format("Coverage-GreeDiFirst[%s %s]", partitionCount, selectCount));

    job.getConfiguration().setInt(Fields.PARTITION_COUNT.get(), partitionCount);
    job.getConfiguration().setInt(Fields.SELECT_COUNT.get(), selectCount);

    job.setNumReduceTasks(partitionCount);

    SetupHelper.getInstance().setSequenceInput(job, inputPath).setTextOutput(job, outputPath);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentWithVectorWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(GreeDiReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
  }

示例#10

0

显示文件

文件： BrowerLogFormatMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }

示例#11

0

显示文件

文件： Apriori.java 项目： UCLAScAi/StreamMill

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    int simpleCount = 0;
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: wordcount <in> <out>");
      System.exit(2);
    }
    double startTime = System.currentTimeMillis();
    Job job = null;
    while (simpleCount < numOfIterations) {
      job = new Job(conf, "Fixed Iteration Experiment");
      FileOutputFormat.setOutputPath(job, new Path(otherArgs[1] + simpleCount));
      job.setJarByClass(Apriori.class);
      job.setMapperClass(TokenizerMapper.class);
      job.setCombinerClass(IntSumReducer.class);
      job.setReducerClass(IntSumReducer.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
      job.waitForCompletion(true);
      simpleCount++;
      if (onlyLocalIteration) break;
    }

    org.apache.hadoop.mapreduce.Counter c =
        job.getCounters().findCounter("Apriori$TotalSum", "STARTS_WITH_DIGIT");
    if (onlyLocalIteration) System.out.print("Hybrid Iteration: ");
    System.out.println(
        "Total time: "
            + (System.currentTimeMillis() - startTime)
            + "ms"
            + " my count: "
            + c.getValue());
  }

示例#12

0

显示文件

文件： RunningAggregator.java 项目： vbajaria/chombo

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Running aggregates  for numerical attributes";
    job.setJobName(jobName);

    job.setJarByClass(RunningAggregator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "chombo");
    job.setMapperClass(RunningAggregator.AggrMapper.class);
    job.setReducerClass(RunningAggregator.AggrReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

示例#13

0

显示文件

文件： OutlinkGrowthAnalysis.java 项目： yavcular/WebGraphConstruction

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }

示例#14

0

显示文件

文件： WordCount.java 项目： dimajix/hadoop-training

  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }

示例#15

0

显示文件

文件： UtilityPredictor.java 项目： pranab/sifarish

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

示例#16

0

显示文件

文件： ParaphrasePivotingJob.java 项目： fone4u/thrax

  public Job getJob(Configuration conf) throws IOException {
    Job job = new Job(conf, "pivoting");

    job.setJarByClass(PivotingReducer.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(PivotingReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setPartitionerClass(RuleWritable.SourcePartitioner.class);

    FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected"));
    int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
    if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);

    int numReducers = conf.getInt("thrax.reducers", 4);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted"));
    FileOutputFormat.setCompressOutput(job, true);

    return job;
  }

示例#17

0

显示文件

文件： HubsAndSpokes.java 项目： kidaak/Hadoop-MapReduce-1

  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }

示例#18

0

显示文件

文件： HubsAndSpokes.java 项目： kidaak/Hadoop-MapReduce-1

  public static void main(String[] args) throws Exception {

    /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure.
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
      System.err.println("Usage: hubsandspokesload <in> <out> <finalout>");
      System.exit(2);
    }
    Job job = new Job(conf, "hubsandspokesload");
    job.setJarByClass(HubsAndSpokes.class);
    job.setMapperClass(HubSpokeLoadMapper.class);
    job.setReducerClass(HubSpokeLoadReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NodeWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
      System.exit(2);
    }

    /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke
    /// Value at each Node in the graph iteratively.

    dijkstra(otherArgs[1], otherArgs[2]);

    // dijkstra("output1", "finaloutput");

  }

示例#19

0

显示文件

文件： OSMGrid.java 项目： dzuongld/SP-GiST

 @Override
 public int run(String[] args) throws Exception {
   if (args.length < 7) {
     System.exit(-1);
   }
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "OSM-Gridding");
   job.setJarByClass(OSMGrid.class);
   job.setOutputKeyClass(WritablePoint.class);
   job.setOutputValueClass(LongWritable.class);
   job.setMapperClass(OSMMapper.class);
   job.setPartitionerClass(GridPartitioner.class);
   job.setReducerClass(OSMReducer.class);
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(TextOutputFormat.class);
   FileInputFormat.addInputPath(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));
   job.getConfiguration().set(OSMMapper.MINLAT, args[2]);
   job.getConfiguration().set(OSMMapper.MINLON, args[3]);
   job.getConfiguration().set(OSMMapper.MAXLAT, args[4]);
   job.getConfiguration().set(OSMMapper.MAXLON, args[5]);
   job.getConfiguration().set(OSMReducer.GRID, args[6]);
   job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6]));
   boolean succ = job.waitForCompletion(true);
   return succ ? 0 : 1;
 }

示例#20

0

显示文件

文件： LevNestDissectJob.java 项目： swapster/hadoop

  private static void StartingJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection Starting");

    job.setJarByClass(LevNestDissectJob.class);
    job.setMapperClass(StartVertexMapper.class);
    job.setReducerClass(StartVertexReducer.class);

    in = out.suffix("/" + outPath_count);
    FileInputFormat.addInputPath(job, in);

    out_start = out.suffix("/" + outPath_start);
    if (fs.exists(out_start)) {
      fs.delete(out_start, true);
    }
    FileOutputFormat.setOutputPath(job, out_start);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VertexWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);

    depth = depth == 0 ? depth + 1 : depth;
    wasStart = true;
  }

示例#21

0

显示文件

文件： MatrixMultiplyJob.java 项目： steveash/decomposer

  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    conf.set("job.name", System.currentTimeMillis() + "/");
    Properties configProps = loadJobProperties();

    CacheUtils.addSerializableToCache(conf, randomDenseMapVector(100000), "inputVector");

    Job job = new Job(conf, "matrix multiply");
    job.setJarByClass(MatrixMultiplyJob.class);
    job.setMapperClass(MatrixMultiplyMapper.class);
    job.setReducerClass(MatrixMultiplyReducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(MapVectorWritableComparable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String timestamp = new Date().toString().replace(' ', '_').replace(':', '_');

    FileInputFormat.addInputPath(
        job, new Path(configProps.getProperty("sparse.vector.output.path")));
    FileOutputFormat.setOutputPath(
        job, new Path(configProps.getProperty("dense.vector.output.path") + timestamp));

    return job.waitForCompletion(true) ? 1 : -1;
  }

示例#22

0

显示文件

文件： TCMReasoner.java 项目： hualichenxi/bio-tcm-cloud

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }

示例#23

0

显示文件

文件： RemoveDup.java 项目： LurieMrZhang/hadoop

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: RemoveDup <in> [<in>...] <out>");
      System.exit(2);
    }

    // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在)
    // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]);

    Job job = Job.getInstance(conf, "RemoveDup");
    job.setJarByClass(RemoveDup.class);
    job.setMapperClass(RemoveDupMapper.class);
    job.setCombinerClass(RemoveDupReducer.class);
    job.setReducerClass(RemoveDupReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#24

0

显示文件

文件： SampleProgram.java 项目： yangboz/verdant-octo-woof

  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }

示例#25

0

显示文件

文件： PageRank.java 项目： xuyang06/dataAnalysis

  public static void main(String[] args) throws Exception {

    String paths = "/user/cloudera/00";
    String path1 = paths;
    String path2 = "";

    for (int i = 1; i <= 3; i++) {
      System.out.println("Now exectuing the " + i + "-th job!");
      Job job = new Job();
      path2 = paths + i;
      job.setJarByClass(PageRank.class);
      job.setJobName("PageRank");
      path2 = paths + i;
      FileInputFormat.addInputPath(job, new Path(path1));
      FileOutputFormat.setOutputPath(job, new Path(path2));

      job.setMapperClass(PageRankMapper.class);
      job.setReducerClass(PageRankReducer.class);

      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);
      path1 = path2;
      job.waitForCompletion(true);
    }
    // System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#26

0

显示文件

文件： ImportVTLocationFromFileWithReducer.java 项目： sihanwang/vesselmovemnt

  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Job job =
        Job.getInstance(
            getConf(),
            "Import vessel locations from files in "
                + args[0]
                + " into table cdb_vessel:vessel_location"); // co

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setJarByClass(ImportVTLocationFromFileWithReducer.class);
    job.setJobName("Vessel_location_injection");
    job.setInputFormatClass(VTVesselLocationFileInputFormat.class);
    job.setMapOutputKeyClass(Key_IMOAndRecordTime.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setPartitionerClass(Partitioner_IMO.class);
    job.setGroupingComparatorClass(GroupComparator_IMO.class);

    job.setReducerClass(ImportReducer.class);
    job.setNumReduceTasks(Integer.parseInt(args[1]));

    job.setOutputFormatClass(NullOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

示例#27

0

显示文件

文件： HandsomeSearch.java 项目： JiCaiCai/cloudproject

  public static void main(String[] args) throws Exception {
    sourcePhoto = "/home/hduser/workspace/images/source.jpg";
    sourceFingerprint = SimilarImageSearch.produceFingerPrint(sourcePhoto);

    final Configuration conf = new Configuration();
    MongoConfigUtil.setInputURI(conf, "mongodb://localhost/photo.fingerprint");
    MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/photo.handsomeOut");
    System.out.println("Conf: " + conf);

    final Job job = new Job(conf, "similar photo");

    job.setJarByClass(MdbSimilarPhoto.class);

    // Mapper,Reduce and Combiner type definition
    job.setMapperClass(PhotoMapper.class);

    job.setCombinerClass(SimilarityReducer.class);
    job.setReducerClass(SimilarityReducer.class);

    // output key/value type definition
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    // InputFormat and OutputFormat type definition
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#28

0

显示文件

文件： MaxTemperatureWithCounters.java 项目： sihanwang/hadoopforvessel

  @Override
  public int run(String[] args) throws Exception {

    System.out.println("-------------Printing configuration-------------------");

    Configuration conf = getConf();
    for (Entry<String, String> entry : conf) {
      System.out.printf("%s=%s\n", entry.getKey(), entry.getValue());
    }

    System.out.println("-------------Printing configuration done--------------");

    Job job = JobBuilder.parseInputAndOutput(this, getConf(), args);
    if (job == null) {
      return -1;
    }

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(MaxTemperatureMapperWithCounters.class);
    job.setCombinerClass(MaxTemperatureReducer.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

示例#29

0

显示文件

文件： WordCount.java 项目： y-tag/java-Hadoop-MapReduceSample

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#30

0

显示文件

文件： FileCombiner.java 项目： hfausta/thesis-file-combiner

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(5);

    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(args[0]);
    FileStatus[] stats = fs.listStatus(dir);
    numFiles = stats.length;

    Job job = new Job(conf);
    job.setJarByClass(FileCombiner.class);
    job.setJobName("File Combiner");

    job.setMapperClass(FileCombinerMapper.class);
    job.setReducerClass(FileCombinerReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }