Java FileOutputFormat.setOutputPathの例、org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath Javaの例

コード例 #1

0

ファイルを表示

ファイル: PVidConvert.java プロジェクト: ksadhu/video_conversion

  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "job");

    job.setJarByClass(PVidConvert.class);
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/"));
    job.waitForCompletion(true);

    Configuration conf1 = new Configuration();
    Job job1 = new Job(conf1, "job1");

    job1.setJarByClass(PVidConvert.class);
    job1.setMapperClass(Map2.class);
    job1.setReducerClass(Reduce2.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);
  }

コード例 #2

0

ファイルを表示

ファイル: HashCount1.java プロジェクト: Zeldon/BigData_Class

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml"));
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml"));

    // ===== Stage 1 =====
    Job job1 = new Job(conf, "Stage 1: Frequency Count");
    job1.setJarByClass(HashCount1.class);
    job1.setMapperClass(Mapper1.class);
    // job1.setCombinerClass(Combine1.class);
    job1.setReducerClass(Reducer1.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(IntWritable.class);
    job1.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz"));
    FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1"));
    job1.waitForCompletion(true);

    // ===== Stage 2 =====
    Job job2 = new Job(conf, "Stage 2: Sort");
    job2.setJarByClass(HashCount1.class);
    job2.setMapperClass(Mapper2.class);
    // job1.setCombinerClass(IntSumReducer.class);
    job2.setReducerClass(Reducer2.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);
    job2.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1"));
    FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2"));
    job2.waitForCompletion(true);

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

コード例 #3

0

ファイルを表示

ファイル: DayhslogUserDate.java プロジェクト: wisgood/mobile-core

  @Override
  public int run(String[] args) throws Exception {

    // TODO Auto-generated method stub

    Configuration conf = getConf();
    // conf.set("stat_date", dateString);

    Job job = new Job(conf, "DayhslogUserDateNewMac");
    job.setJarByClass(DayhslogUserDate.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp"));
    FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true);
    job.setMapperClass(DayhslogUserDateNewMacMapper.class);
    job.setReducerClass(DayhslogUserDateNewMacReducer.class);

    // job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(10);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job resultJob = new Job(conf, "DayhslogUserDate");

      resultJob.setJarByClass(DayhslogUserDate.class);

      FileInputFormat.addInputPath(resultJob, new Path(args[0]));
      FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp"));
      FileOutputFormat.setOutputPath(resultJob, new Path(args[2]));
      FileSystem.get(conf).delete(new Path(args[2]), true);

      resultJob.setMapperClass(DayhslogUserDateMapper.class);
      resultJob.setReducerClass(DayhslogUserDateReducer.class);

      resultJob.setNumReduceTasks(10);

      resultJob.setMapOutputKeyClass(Text.class);
      resultJob.setMapOutputValueClass(Text.class);

      resultJob.setOutputKeyClass(Text.class);
      resultJob.setOutputValueClass(Text.class);

      code = resultJob.waitForCompletion(true) ? 0 : 1;
    }
    Path tmpPath = new Path(args[2] + "tmp");
    FileSystem.get(conf).delete(tmpPath, true);
    System.exit(code);

    return code;
  }

コード例 #4

0

ファイルを表示

ファイル: VoteCount.java プロジェクト: sharayumungel/Hadoop-MapReduce

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job1 = new Job(conf, "combine votes");
    job1.setJarByClass(VoteCount.class);
    job1.setMapperClass(MergeFilesMapper.class);
    job1.setCombinerClass(MergedFilesReducer.class);
    job1.setReducerClass(MergedFilesReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp"));

    job1.waitForCompletion(true);

    Job job2 = new Job(conf, "votes count");
    job2.setJarByClass(VoteCount.class);
    job2.setMapperClass(CalculateVotesMapper.class);
    job2.setCombinerClass(CalculateVotesReducer.class);
    job2.setReducerClass(CalculateVotesReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp"));
    FileOutputFormat.setOutputPath(job2, new Path(args[1]));

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

コード例 #5

0

ファイルを表示

ファイル: DeliverFormatForUVMR.java プロジェクト: wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    job.setJarByClass(DeliverFormatForUVMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");
    String tmpDir = outputDir + "_tmp";
    Path tmpOut = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOut);
    tmpOut.getFileSystem(conf).delete(tmpOut, true);

    job.setMapperClass(DeliverFormatForUVMapper.class);
    job.setCombinerClass(DeliverFormatForUVCombiner.class);
    job.setReducerClass(DeliverFormatForUVReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("reduce_num", 20));

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {

      // this job is for combining  small files into one
      Job combineJob = new Job(conf, "CombineTmpData");
      combineJob.setJarByClass(DeliverFormatForUVMR.class);

      FileInputFormat.addInputPath(combineJob, new Path(tmpDir));
      FileOutputFormat.setOutputPath(combineJob, new Path(outputDir));
      combineJob.setMapperClass(IdentityMapper.class);
      combineJob.setReducerClass(IdentityReducer.class);

      combineJob.setInputFormatClass(KeyValueTextInputFormat.class);
      combineJob.setOutputFormatClass(TextOutputFormat.class);

      combineJob.setOutputKeyClass(Text.class);
      combineJob.setOutputValueClass(Text.class);

      TextOutputFormat.setCompressOutput(combineJob, true);
      TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class);

      combineJob.setNumReduceTasks(1);
      code = combineJob.waitForCompletion(true) ? 0 : 1;
    }

    FileSystem.get(conf).delete(tmpOut, true);
    LzoIndexer lzoIndexer = new LzoIndexer(conf);
    lzoIndexer.index(new Path(outputDir));
    System.exit(code);
    return code;
  }

コード例 #6

0

ファイルを表示

ファイル: ClientUserInstallMR.java プロジェクト: wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, "ClientUserInstallMR");
    job.setJarByClass(ClientUserInstallMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");

    String tmpDir = outputDir + "_tmp";
    Path tmpOutput = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOutput);
    tmpOutput.getFileSystem(conf).delete(tmpOutput, true);

    job.setMapperClass(ClientUserInstallFirstMapper.class);
    job.setReducerClass(ClientUserInstallFirstReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(30);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job secondJob = new Job(conf, "ClientUserInstallResult");
      secondJob.setJarByClass(ClientUserInstallMR.class);
      conf.set("stat_date", conf.get("stat_date"));

      FileInputFormat.addInputPath(secondJob, new Path(tmpDir));
      Path output = new Path(outputDir);
      FileOutputFormat.setOutputPath(secondJob, output);
      output.getFileSystem(conf).delete(output, true);

      secondJob.setMapperClass(ClientUserInstallSecondMapper.class);
      secondJob.setReducerClass(ClientUserInstallSecondReduce.class);

      secondJob.setInputFormatClass(KeyValueTextInputFormat.class);
      secondJob.setOutputFormatClass(TextOutputFormat.class);
      secondJob.setOutputKeyClass(Text.class);
      secondJob.setOutputValueClass(Text.class);

      secondJob.setNumReduceTasks(1);

      code = secondJob.waitForCompletion(true) ? 0 : 1;
    }
    FileSystem.get(conf).delete(tmpOutput, true);
    System.exit(code);
    return code;
  }

コード例 #7

0

ファイルを表示

ファイル: TestHDFSWrite.java プロジェクト: nfergu/parquet-test

  public int run(String[] args) throws Exception {

    if (args.length < 2) {
      LOG.error("Usage: " + getClass().getName() + " inputFileHDFS outputFileHDFS");
      return 1;
    }

    String inputFile = args[0];
    String outputFile = args[1] + System.nanoTime();

    Configuration configuration = getConf();
    Job job = new Job(configuration);
    job.setJarByClass(getClass());
    job.setJobName(getClass().getName());
    job.setMapperClass(ReadRequestMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputFile));
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    job.waitForCompletion(true);

    return 0;
  }

コード例 #8

0

ファイルを表示

ファイル: OutlinkGrowthAnalysis.java プロジェクト: yavcular/WebGraphConstruction

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }

コード例 #9

0

ファイルを表示

ファイル: WordCount.java プロジェクト: dimajix/hadoop-training

  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }

コード例 #10

0

ファイルを表示

ファイル: TCMReasoner.java プロジェクト: hualichenxi/bio-tcm-cloud

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }

コード例 #11

0

ファイルを表示

ファイル: SampleProgram.java プロジェクト: yangboz/verdant-octo-woof

  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }

コード例 #12

0

ファイルを表示

ファイル: ESIndexCreator.java プロジェクト: morpheus-lab/java-bigdata-201506

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "ESIndexCreator");

    job.setJarByClass(ESIndexCreator.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(ESIndexCreator.MyMapper.class);

    job.setNumReduceTasks(0); // Skip Reduce Task

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // 프로그램 인자
    // 0: 입력 파일 경로
    // 1: 출력 파일 경로
    // 2: elastic search server's host name
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.getConfiguration().set("host", args[2]);

    job.waitForCompletion(true);
  }

コード例 #13

0

ファイルを表示

ファイル: Apriori.java プロジェクト: UCLAScAi/StreamMill

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    int simpleCount = 0;
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
      System.err.println("Usage: wordcount <in> <out>");
      System.exit(2);
    }
    double startTime = System.currentTimeMillis();
    Job job = null;
    while (simpleCount < numOfIterations) {
      job = new Job(conf, "Fixed Iteration Experiment");
      FileOutputFormat.setOutputPath(job, new Path(otherArgs[1] + simpleCount));
      job.setJarByClass(Apriori.class);
      job.setMapperClass(TokenizerMapper.class);
      job.setCombinerClass(IntSumReducer.class);
      job.setReducerClass(IntSumReducer.class);
      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(IntWritable.class);
      FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
      job.waitForCompletion(true);
      simpleCount++;
      if (onlyLocalIteration) break;
    }

    org.apache.hadoop.mapreduce.Counter c =
        job.getCounters().findCounter("Apriori$TotalSum", "STARTS_WITH_DIGIT");
    if (onlyLocalIteration) System.out.print("Hybrid Iteration: ");
    System.out.println(
        "Total time: "
            + (System.currentTimeMillis() - startTime)
            + "ms"
            + " my count: "
            + c.getValue());
  }

コード例 #14

0

ファイルを表示

ファイル: FileCombiner.java プロジェクト: hfausta/thesis-file-combiner

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();
    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(5);

    FileSystem fs = FileSystem.get(conf);
    Path dir = new Path(args[0]);
    FileStatus[] stats = fs.listStatus(dir);
    numFiles = stats.length;

    Job job = new Job(conf);
    job.setJarByClass(FileCombiner.class);
    job.setJobName("File Combiner");

    job.setMapperClass(FileCombinerMapper.class);
    job.setReducerClass(FileCombinerReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
  }

コード例 #15

0

ファイルを表示

ファイル: Population.java プロジェクト: terminiter/HDP-Java-Class-Labs

  @Override
  public int run(String[] arg0) throws Exception {
    Job job = Job.getInstance(getConf(), "PopulationJob");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Population.class);

    Path out = new Path("totalorder");
    FileInputFormat.setInputPaths(job, "populations");
    FileOutputFormat.setOutputPath(job, out);
    out.getFileSystem(conf).delete(out, true);

    job.setMapperClass(PopulationMapper.class);
    job.setReducerClass(PopulationReducer.class);
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(5);

    // Configure the TotalOrderPartitioner here...
    job.setPartitionerClass(TotalOrderPartitioner.class);
    InputSampler.Sampler<Text, Text> sampler =
        new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3);
    InputSampler.writePartitionFile(job, sampler);
    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionURI);

    return job.waitForCompletion(true) ? 0 : 1;
  }

コード例 #16

0

ファイルを表示

ファイル: ParaphrasePivotingJob.java プロジェクト: fone4u/thrax

  public Job getJob(Configuration conf) throws IOException {
    Job job = new Job(conf, "pivoting");

    job.setJarByClass(PivotingReducer.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(PivotingReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setPartitionerClass(RuleWritable.SourcePartitioner.class);

    FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected"));
    int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
    if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);

    int numReducers = conf.getInt("thrax.reducers", 4);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted"));
    FileOutputFormat.setCompressOutput(job, true);

    return job;
  }

コード例 #17

0

ファイルを表示

ファイル: UtilityPredictor.java プロジェクト: pranab/sifarish

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

コード例 #18

0

ファイルを表示

ファイル: MatrixMultiplyJob.java プロジェクト: steveash/decomposer

  @Override
  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    conf.set("job.name", System.currentTimeMillis() + "/");
    Properties configProps = loadJobProperties();

    CacheUtils.addSerializableToCache(conf, randomDenseMapVector(100000), "inputVector");

    Job job = new Job(conf, "matrix multiply");
    job.setJarByClass(MatrixMultiplyJob.class);
    job.setMapperClass(MatrixMultiplyMapper.class);
    job.setReducerClass(MatrixMultiplyReducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(MapVectorWritableComparable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String timestamp = new Date().toString().replace(' ', '_').replace(':', '_');

    FileInputFormat.addInputPath(
        job, new Path(configProps.getProperty("sparse.vector.output.path")));
    FileOutputFormat.setOutputPath(
        job, new Path(configProps.getProperty("dense.vector.output.path") + timestamp));

    return job.waitForCompletion(true) ? 1 : -1;
  }

コード例 #19

0

ファイルを表示

ファイル: WordCount.java プロジェクト: y-tag/java-Hadoop-MapReduceSample

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

コード例 #20

0

ファイルを表示

ファイル: LevNestDissectJob.java プロジェクト: swapster/hadoop

  private static void StartingJob()
      throws IOException, InterruptedException, ClassNotFoundException {

    conf = new Configuration();
    fs = FileSystem.get(conf);
    conf.setLong("my.vertex.num", num);
    job = Job.getInstance(conf, "Levelized Nested Dissection Starting");

    job.setJarByClass(LevNestDissectJob.class);
    job.setMapperClass(StartVertexMapper.class);
    job.setReducerClass(StartVertexReducer.class);

    in = out.suffix("/" + outPath_count);
    FileInputFormat.addInputPath(job, in);

    out_start = out.suffix("/" + outPath_start);
    if (fs.exists(out_start)) {
      fs.delete(out_start, true);
    }
    FileOutputFormat.setOutputPath(job, out_start);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(VertexWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.waitForCompletion(true);

    depth = depth == 0 ? depth + 1 : depth;
    wasStart = true;
  }

コード例 #21

0

ファイルを表示

ファイル: BrowerLogFormatMR.java プロジェクト: wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }

コード例 #22

0

ファイルを表示

ファイル: TestHFileOutputFormat2.java プロジェクト: mringg/hbase

  private void runIncrementalPELoad(
      Configuration conf,
      HTableDescriptor tableDescriptor,
      RegionLocator regionLocator,
      Path outDir)
      throws IOException, UnsupportedEncodingException, InterruptedException,
          ClassNotFoundException {
    Job job = new Job(conf, "testLocalMRIncrementalLoad");
    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
    job.getConfiguration()
        .setStrings(
            "io.serializations",
            conf.get("io.serializations"),
            MutationSerialization.class.getName(),
            ResultSerialization.class.getName(),
            KeyValueSerialization.class.getName());
    setupRandomGeneratorMapper(job);
    HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
    FileOutputFormat.setOutputPath(job, outDir);

    assertFalse(util.getTestFileSystem().exists(outDir));

    assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());

    assertTrue(job.waitForCompletion(true));
  }

コード例 #23

0

ファイルを表示

ファイル: RemoveDup.java プロジェクト: LurieMrZhang/hadoop

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: RemoveDup <in> [<in>...] <out>");
      System.exit(2);
    }

    // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在)
    // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]);

    Job job = Job.getInstance(conf, "RemoveDup");
    job.setJarByClass(RemoveDup.class);
    job.setMapperClass(RemoveDupMapper.class);
    job.setCombinerClass(RemoveDupReducer.class);
    job.setReducerClass(RemoveDupReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

コード例 #24

0

ファイルを表示

ファイル: OSMGrid.java プロジェクト: dzuongld/SP-GiST

 @Override
 public int run(String[] args) throws Exception {
   if (args.length < 7) {
     System.exit(-1);
   }
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "OSM-Gridding");
   job.setJarByClass(OSMGrid.class);
   job.setOutputKeyClass(WritablePoint.class);
   job.setOutputValueClass(LongWritable.class);
   job.setMapperClass(OSMMapper.class);
   job.setPartitionerClass(GridPartitioner.class);
   job.setReducerClass(OSMReducer.class);
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(TextOutputFormat.class);
   FileInputFormat.addInputPath(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));
   job.getConfiguration().set(OSMMapper.MINLAT, args[2]);
   job.getConfiguration().set(OSMMapper.MINLON, args[3]);
   job.getConfiguration().set(OSMMapper.MAXLAT, args[4]);
   job.getConfiguration().set(OSMMapper.MAXLON, args[5]);
   job.getConfiguration().set(OSMReducer.GRID, args[6]);
   job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6]));
   boolean succ = job.waitForCompletion(true);
   return succ ? 0 : 1;
 }

コード例 #25

0

ファイルを表示

ファイル: PageRank.java プロジェクト: xuyang06/dataAnalysis

  public static void main(String[] args) throws Exception {

    String paths = "/user/cloudera/00";
    String path1 = paths;
    String path2 = "";

    for (int i = 1; i <= 3; i++) {
      System.out.println("Now exectuing the " + i + "-th job!");
      Job job = new Job();
      path2 = paths + i;
      job.setJarByClass(PageRank.class);
      job.setJobName("PageRank");
      path2 = paths + i;
      FileInputFormat.addInputPath(job, new Path(path1));
      FileOutputFormat.setOutputPath(job, new Path(path2));

      job.setMapperClass(PageRankMapper.class);
      job.setReducerClass(PageRankReducer.class);

      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);
      path1 = path2;
      job.waitForCompletion(true);
    }
    // System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

コード例 #26

0

ファイルを表示

ファイル: TruthyIndexFeatureCounter.java プロジェクト: salsaproj/IndexedHBase

  /** Job configuration. */
  public static Job configureJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String columnFamily = args[1];
    String outputPath = args[2];
    String rowKeyType = args[3];
    conf.set("row.key.type", rowKeyType);
    conf.set("table.name", tableName);
    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes(columnFamily));
    scan.setBatch(ConstantsTruthy.TRUTHY_TABLE_SCAN_BATCH);

    conf.set("mapred.map.tasks.speculative.execution", "false");
    conf.set("mapred.reduce.tasks.speculative.execution", "false");
    Job job =
        Job.getInstance(
            conf, "Count the column count and indexRecordSize for each row in " + tableName);
    job.setJarByClass(TruthyIndexFeatureCounter.class);
    TableMapReduceUtil.initTableMapperJob(
        tableName, scan, TfcMapper.class, Text.class, Text.class, job, true);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    TableMapReduceUtil.addDependencyJars(job);
    return job;
  }

コード例 #27

0

ファイルを表示

ファイル: RunningAggregator.java プロジェクト: vbajaria/chombo

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Running aggregates  for numerical attributes";
    job.setJobName(jobName);

    job.setJarByClass(RunningAggregator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "chombo");
    job.setMapperClass(RunningAggregator.AggrMapper.class);
    job.setReducerClass(RunningAggregator.AggrReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

コード例 #28

0

ファイルを表示

ファイル: HubsAndSpokes.java プロジェクト: kidaak/Hadoop-MapReduce-1

  public static void main(String[] args) throws Exception {

    /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure.
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
      System.err.println("Usage: hubsandspokesload <in> <out> <finalout>");
      System.exit(2);
    }
    Job job = new Job(conf, "hubsandspokesload");
    job.setJarByClass(HubsAndSpokes.class);
    job.setMapperClass(HubSpokeLoadMapper.class);
    job.setReducerClass(HubSpokeLoadReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NodeWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
      System.exit(2);
    }

    /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke
    /// Value at each Node in the graph iteratively.

    dijkstra(otherArgs[1], otherArgs[2]);

    // dijkstra("output1", "finaloutput");

  }

コード例 #29

0

ファイルを表示

ファイル: PartialBuilder.java プロジェクト: hmcl/mahout

  @Override
  protected void configureJob(Job job) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setJarByClass(PartialBuilder.class);

    FileInputFormat.setInputPaths(job, getDataPath());
    FileOutputFormat.setOutputPath(job, getOutputPath(conf));

    job.setOutputKeyClass(TreeID.class);
    job.setOutputValueClass(MapredOutput.class);

    job.setMapperClass(Step1Mapper.class);
    job.setNumReduceTasks(0); // no reducers

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // For this implementation to work, mapred.map.tasks needs to be set to the actual
    // number of mappers Hadoop will use:
    TextInputFormat inputFormat = new TextInputFormat();
    List<?> splits = inputFormat.getSplits(job);
    if (splits == null || splits.isEmpty()) {
      log.warn("Unable to compute number of splits?");
    } else {
      int numSplits = splits.size();
      log.info("Setting mapred.map.tasks = {}", numSplits);
      conf.setInt("mapred.map.tasks", numSplits);
    }
  }

コード例 #30

0

ファイルを表示

ファイル: HubsAndSpokes.java プロジェクト: kidaak/Hadoop-MapReduce-1

  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }