Java FileInputFormat示例，org.apache.hadoop.mapreduce.lib.input.FileInputFormat Java示例

示例#1

0

显示文件

文件： ParaphrasePivotingJob.java 项目： fone4u/thrax

  public Job getJob(Configuration conf) throws IOException {
    Job job = new Job(conf, "pivoting");

    job.setJarByClass(PivotingReducer.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(PivotingReducer.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(MapWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(MapWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setPartitionerClass(RuleWritable.SourcePartitioner.class);

    FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "collected"));
    int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
    if (maxSplitSize != 0) FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);

    int numReducers = conf.getInt("thrax.reducers", 4);
    job.setNumReduceTasks(numReducers);

    FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + "pivoted"));
    FileOutputFormat.setCompressOutput(job, true);

    return job;
  }

示例#2

0

显示文件

文件： VoteCount.java 项目： sharayumungel/Hadoop-MapReduce

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job1 = new Job(conf, "combine votes");
    job1.setJarByClass(VoteCount.class);
    job1.setMapperClass(MergeFilesMapper.class);
    job1.setCombinerClass(MergedFilesReducer.class);
    job1.setReducerClass(MergedFilesReducer.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1] + "-tmp"));

    job1.waitForCompletion(true);

    Job job2 = new Job(conf, "votes count");
    job2.setJarByClass(VoteCount.class);
    job2.setMapperClass(CalculateVotesMapper.class);
    job2.setCombinerClass(CalculateVotesReducer.class);
    job2.setReducerClass(CalculateVotesReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path(args[1] + "-tmp"));
    FileOutputFormat.setOutputPath(job2, new Path(args[1]));

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

示例#3

0

显示文件

文件： L13.java 项目： ekaynar/BenchmarkScripts

  /** RUN */
  @Override
  public int run(String[] args) throws Exception {

    if (args.length != 3) {
      System.err.println("Usage: wordcount <input_dir> <output_dir> <reducers>");
      return -1;
    }

    Job job = new Job(getConf(), "PigMix L13");
    job.setJarByClass(L13.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setMapperClass(ReadInput.class);
    job.setReducerClass(Join.class);

    Properties props = System.getProperties();
    Configuration conf = job.getConfiguration();
    for (Map.Entry<Object, Object> entry : props.entrySet()) {
      conf.set((String) entry.getKey(), (String) entry.getValue());
    }

    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_page_views"));
    FileInputFormat.addInputPath(job, new Path(args[0] + "/pigmix_power_users_samples"));
    FileOutputFormat.setOutputPath(job, new Path(args[1] + "/L13out"));
    job.setNumReduceTasks(Integer.parseInt(args[2]));

    return job.waitForCompletion(true) ? 0 : -1;
  }

示例#4

0

显示文件

文件： JoinMain.java 项目： kiichi7/WorkFiles

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    //		    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //		    if (otherArgs.length != 3) {
    //		      System.err.println("Usage: <tradeTableDir> <payTableDir> <output>");
    //		      System.exit(2);
    //		    }
    //		    String tradeTableDir = args[0];
    //		    String payTableDir = args[1];
    //		    String joinTableDir = args[2];
    Job job = new Job(conf, "Join");
    job.setJarByClass(JoinMain.class);
    job.setMapperClass(PreMapper.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setPartitionerClass(KeyPartition.class);
    job.setGroupingComparatorClass(FirstComparator.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setReducerClass(CommonReduce.class);
    FileInputFormat.addInputPath(job, new Path("/user/hadoop/input/load3/action.txt"));
    FileInputFormat.addInputPath(job, new Path("/user/hadoop/input/load3/alipay.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/user/hadoop/output3/"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#5

0

显示文件

文件： DataCleanJob.java 项目： ashish-17/drift

  public static void runDailyTrendEstimationJob(String inputPath) throws Exception {

    Configuration conf = new Configuration();
    Job job;
    try {
      conf.set("mongo.output.uri", "mongodb://52.33.93.221:27017/mongo_hadoop.page_trends");
      job = Job.getInstance(conf, "dataclean");

      job.setJarByClass(DataCleanJob.class);
      job.setMapperClass(DataCleanMapper.class);
      job.setReducerClass(DataCleanReducer.class);

      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(PageDataValue.class);
      job.setOutputKeyClass(NullWritable.class);
      job.setOutputValueClass(MongoUpdateWritable.class);

      FileInputFormat.setInputDirRecursive(job, true);
      FileInputFormat.addInputPath(job, new Path(inputPath));

      MongoConfig mongoConfig = new MongoConfig(conf);
      mongoConfig.setOutputFormat(MongoOutputFormat.class);

      job.setOutputFormatClass(MongoOutputFormat.class);

      System.exit(job.waitForCompletion(true) ? 0 : 1);

    } catch (IOException e) {
      e.printStackTrace();
    } catch (ClassNotFoundException e) {
      e.printStackTrace();
    } catch (InterruptedException e) {
      e.printStackTrace();
    }
  }

示例#6

0

显示文件

文件： HashCount1.java 项目： Zeldon/BigData_Class

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/core-site.xml"));
    // conf.addResource(new Path("/home/hadoop/hadoop/hadoop-1.2.1/conf/hdfs-site.xml"));

    // ===== Stage 1 =====
    Job job1 = new Job(conf, "Stage 1: Frequency Count");
    job1.setJarByClass(HashCount1.class);
    job1.setMapperClass(Mapper1.class);
    // job1.setCombinerClass(Combine1.class);
    job1.setReducerClass(Reducer1.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(IntWritable.class);
    job1.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job1, new Path("/TwitterInFiles/twitter_10k.tar.gz"));
    FileOutputFormat.setOutputPath(job1, new Path("/twitterOuts/output1"));
    job1.waitForCompletion(true);

    // ===== Stage 2 =====
    Job job2 = new Job(conf, "Stage 2: Sort");
    job2.setJarByClass(HashCount1.class);
    job2.setMapperClass(Mapper2.class);
    // job1.setCombinerClass(IntSumReducer.class);
    job2.setReducerClass(Reducer2.class);
    job2.setOutputKeyClass(IntWritable.class);
    job2.setOutputValueClass(Text.class);
    job2.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job2, new Path("/twitterOuts/output1"));
    FileOutputFormat.setOutputPath(job2, new Path("/twitterOuts/output2"));
    job2.waitForCompletion(true);

    System.exit(job2.waitForCompletion(true) ? 0 : 1);
  }

示例#7

0

显示文件

文件： DayhslogUserDate.java 项目： wisgood/mobile-core

  @Override
  public int run(String[] args) throws Exception {

    // TODO Auto-generated method stub

    Configuration conf = getConf();
    // conf.set("stat_date", dateString);

    Job job = new Job(conf, "DayhslogUserDateNewMac");
    job.setJarByClass(DayhslogUserDate.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2] + "tmp"));
    FileSystem.get(conf).delete(new Path(args[2] + "tmp"), true);
    job.setMapperClass(DayhslogUserDateNewMacMapper.class);
    job.setReducerClass(DayhslogUserDateNewMacReducer.class);

    // job.setInputFormatClass(LzoTextInputFormat.class);

    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(10);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job resultJob = new Job(conf, "DayhslogUserDate");

      resultJob.setJarByClass(DayhslogUserDate.class);

      FileInputFormat.addInputPath(resultJob, new Path(args[0]));
      FileInputFormat.addInputPath(resultJob, new Path(args[2] + "tmp"));
      FileOutputFormat.setOutputPath(resultJob, new Path(args[2]));
      FileSystem.get(conf).delete(new Path(args[2]), true);

      resultJob.setMapperClass(DayhslogUserDateMapper.class);
      resultJob.setReducerClass(DayhslogUserDateReducer.class);

      resultJob.setNumReduceTasks(10);

      resultJob.setMapOutputKeyClass(Text.class);
      resultJob.setMapOutputValueClass(Text.class);

      resultJob.setOutputKeyClass(Text.class);
      resultJob.setOutputValueClass(Text.class);

      code = resultJob.waitForCompletion(true) ? 0 : 1;
    }
    Path tmpPath = new Path(args[2] + "tmp");
    FileSystem.get(conf).delete(tmpPath, true);
    System.exit(code);

    return code;
  }

示例#8

0

显示文件

文件： MapTestID.java 项目： cliu0507/scalable-machine-learning

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60);

    Path train_file = new Path(args[0]);
    Path test_file = new Path(args[1]);
    conf.set("train_file", train_file.getParent().getName());

    Job job = new Job(conf, "MapTestID");
    job.setJarByClass(MapTestID.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(MapTestID.MapTestIDMap.class);
    job.setReducerClass(MapTestID.MapTestIDReduce.class);

    job.setNumReduceTasks(300);

    FileInputFormat.addInputPath(job, train_file);
    FileInputFormat.addInputPath(job, test_file);
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    return job.waitForCompletion(true) ? 0 : 1;
  }

示例#9

0

显示文件

文件： PVidConvert.java 项目： ksadhu/video_conversion

  public static void main(String[] args)
      throws IOException, ClassNotFoundException, InterruptedException {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "job");

    job.setJarByClass(PVidConvert.class);
    job.setMapperClass(Map1.class);
    job.setReducerClass(Reduce1.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path("/tmp/temporary_execution/"));
    job.waitForCompletion(true);

    Configuration conf1 = new Configuration();
    Job job1 = new Job(conf1, "job1");

    job1.setJarByClass(PVidConvert.class);
    job1.setMapperClass(Map2.class);
    job1.setReducerClass(Reduce2.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path("/tmp/temporary_execution/"));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    job1.waitForCompletion(true);
  }

示例#10

0

显示文件

文件： DeliverFormatForUVMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    job.setJarByClass(DeliverFormatForUVMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");
    String tmpDir = outputDir + "_tmp";
    Path tmpOut = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOut);
    tmpOut.getFileSystem(conf).delete(tmpOut, true);

    job.setMapperClass(DeliverFormatForUVMapper.class);
    job.setCombinerClass(DeliverFormatForUVCombiner.class);
    job.setReducerClass(DeliverFormatForUVReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(conf.getInt("reduce_num", 20));

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {

      // this job is for combining  small files into one
      Job combineJob = new Job(conf, "CombineTmpData");
      combineJob.setJarByClass(DeliverFormatForUVMR.class);

      FileInputFormat.addInputPath(combineJob, new Path(tmpDir));
      FileOutputFormat.setOutputPath(combineJob, new Path(outputDir));
      combineJob.setMapperClass(IdentityMapper.class);
      combineJob.setReducerClass(IdentityReducer.class);

      combineJob.setInputFormatClass(KeyValueTextInputFormat.class);
      combineJob.setOutputFormatClass(TextOutputFormat.class);

      combineJob.setOutputKeyClass(Text.class);
      combineJob.setOutputValueClass(Text.class);

      TextOutputFormat.setCompressOutput(combineJob, true);
      TextOutputFormat.setOutputCompressorClass(combineJob, LzopCodec.class);

      combineJob.setNumReduceTasks(1);
      code = combineJob.waitForCompletion(true) ? 0 : 1;
    }

    FileSystem.get(conf).delete(tmpOut, true);
    LzoIndexer lzoIndexer = new LzoIndexer(conf);
    lzoIndexer.index(new Path(outputDir));
    System.exit(code);
    return code;
  }

示例#11

0

显示文件

文件： AvroStorageUtils.java 项目： boskiApache/NewPig

 /** get input paths to job config */
 public static boolean addInputPaths(String pathString, Job job) throws IOException {
   Configuration conf = job.getConfiguration();
   FileSystem fs = FileSystem.get(conf);
   HashSet<Path> paths = new HashSet<Path>();
   if (getAllSubDirs(URI.create(pathString), job, paths)) {
     paths.addAll(Arrays.asList(FileInputFormat.getInputPaths(job)));
     FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
     return true;
   }
   return false;
 }

示例#12

0

显示文件

文件： ClientUserInstallMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, "ClientUserInstallMR");
    job.setJarByClass(ClientUserInstallMR.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    String outputDir = conf.get("output_dir");

    String tmpDir = outputDir + "_tmp";
    Path tmpOutput = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, tmpOutput);
    tmpOutput.getFileSystem(conf).delete(tmpOutput, true);

    job.setMapperClass(ClientUserInstallFirstMapper.class);
    job.setReducerClass(ClientUserInstallFirstReduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(30);

    int code = job.waitForCompletion(true) ? 0 : 1;

    if (code == 0) {
      Job secondJob = new Job(conf, "ClientUserInstallResult");
      secondJob.setJarByClass(ClientUserInstallMR.class);
      conf.set("stat_date", conf.get("stat_date"));

      FileInputFormat.addInputPath(secondJob, new Path(tmpDir));
      Path output = new Path(outputDir);
      FileOutputFormat.setOutputPath(secondJob, output);
      output.getFileSystem(conf).delete(output, true);

      secondJob.setMapperClass(ClientUserInstallSecondMapper.class);
      secondJob.setReducerClass(ClientUserInstallSecondReduce.class);

      secondJob.setInputFormatClass(KeyValueTextInputFormat.class);
      secondJob.setOutputFormatClass(TextOutputFormat.class);
      secondJob.setOutputKeyClass(Text.class);
      secondJob.setOutputValueClass(Text.class);

      secondJob.setNumReduceTasks(1);

      code = secondJob.waitForCompletion(true) ? 0 : 1;
    }
    FileSystem.get(conf).delete(tmpOutput, true);
    System.exit(code);
    return code;
  }

示例#13

0

显示文件

文件： PageRank.java 项目： xuyang06/dataAnalysis

  public static void main(String[] args) throws Exception {

    String paths = "/user/cloudera/00";
    String path1 = paths;
    String path2 = "";

    for (int i = 1; i <= 3; i++) {
      System.out.println("Now exectuing the " + i + "-th job!");
      Job job = new Job();
      path2 = paths + i;
      job.setJarByClass(PageRank.class);
      job.setJobName("PageRank");
      path2 = paths + i;
      FileInputFormat.addInputPath(job, new Path(path1));
      FileOutputFormat.setOutputPath(job, new Path(path2));

      job.setMapperClass(PageRankMapper.class);
      job.setReducerClass(PageRankReducer.class);

      job.setOutputKeyClass(Text.class);
      job.setOutputValueClass(Text.class);
      path1 = path2;
      job.waitForCompletion(true);
    }
    // System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#14

0

显示文件

文件： WordCount.java 项目： dimajix/hadoop-training

  private boolean runJob(Configuration conf)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(conf, "WordCount");
    job.setJarByClass(WordCount.class);

    // Configure input format and files
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(inputDir));

    // Configure output format and files
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));

    // set up mapper, combiner and reducer
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountReducer.class);

    // set sorting, grouping and partitioning
    // set key and value types
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true);
  }

示例#15

0

显示文件

文件： RemoveDup.java 项目： LurieMrZhang/hadoop

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
      System.err.println("Usage: RemoveDup <in> [<in>...] <out>");
      System.exit(2);
    }

    // 删除输出目录(可选,省得多次运行时,总是报OUTPUT目录已存在)
    // HDFSUtil.deleteFile(conf, otherArgs[otherArgs.length - 1]);

    Job job = Job.getInstance(conf, "RemoveDup");
    job.setJarByClass(RemoveDup.class);
    job.setMapperClass(RemoveDupMapper.class);
    job.setCombinerClass(RemoveDupReducer.class);
    job.setReducerClass(RemoveDupReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    for (int i = 0; i < otherArgs.length - 1; ++i) {
      FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#16

0

显示文件

文件： TCMReasoner.java 项目： hualichenxi/bio-tcm-cloud

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (args.length < 1) {
      System.out.println("USAGE: RFDSReasoner [pool path] [options]");
      return;
    }

    Job job = new Job(conf, "reasoner");
    job.setJarByClass(TCMReasoner.class);
    System.out.println(args[0]);

    job.setMapperClass(TCMMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Triple.class);

    job.setReducerClass(TCMReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Triple.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);
    Counter derivedTriples =
        job.getCounters()
            .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS");
    System.out.println(derivedTriples.getValue());

    return;
  }

示例#17

0

显示文件

文件： BrowerLogFormatMR.java 项目： wisgood/mobile-core

  public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser gop = new GenericOptionsParser(conf, args);
    conf = gop.getConfiguration();

    Job job = new Job(conf, conf.get("job_name"));
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path output = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, output);
    output.getFileSystem(conf).delete(output, true);

    job.setJarByClass(BrowerLogFormatMR.class);
    job.setMapperClass(BrowerLogFormatMapper.class);
    job.setReducerClass(BrowerLogFormatReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);

    int code = job.waitForCompletion(true) ? 0 : 1;
    return code;
  }

示例#18

0

显示文件

文件： ImportVTLocationFromFileWithReducer.java 项目： sihanwang/vesselmovemnt

  public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Job job =
        Job.getInstance(
            getConf(),
            "Import vessel locations from files in "
                + args[0]
                + " into table cdb_vessel:vessel_location"); // co

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setJarByClass(ImportVTLocationFromFileWithReducer.class);
    job.setJobName("Vessel_location_injection");
    job.setInputFormatClass(VTVesselLocationFileInputFormat.class);
    job.setMapOutputKeyClass(Key_IMOAndRecordTime.class);
    job.setMapOutputValueClass(TextArrayWritable.class);

    job.setPartitionerClass(Partitioner_IMO.class);
    job.setGroupingComparatorClass(GroupComparator_IMO.class);

    job.setReducerClass(ImportReducer.class);
    job.setNumReduceTasks(Integer.parseInt(args[1]));

    job.setOutputFormatClass(NullOutputFormat.class);

    return job.waitForCompletion(true) ? 0 : 1;
  }

示例#19

0

显示文件

文件： HubsAndSpokes.java 项目： kidaak/Hadoop-MapReduce-1

  public static void main(String[] args) throws Exception {

    /// Create JOB 1 to convert all the flight data in to NODE : Graph Structure.
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
      System.err.println("Usage: hubsandspokesload <in> <out> <finalout>");
      System.exit(2);
    }
    Job job = new Job(conf, "hubsandspokesload");
    job.setJarByClass(HubsAndSpokes.class);
    job.setMapperClass(HubSpokeLoadMapper.class);
    job.setReducerClass(HubSpokeLoadReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(NodeWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    boolean b = job.waitForCompletion(true);
    if (!b) {
      System.exit(2);
    }

    /// Call Job 2 where we perform HITS Algorithm to calculate Hub and Spoke
    /// Value at each Node in the graph iteratively.

    dijkstra(otherArgs[1], otherArgs[2]);

    // dijkstra("output1", "finaloutput");

  }

示例#20

0

显示文件

文件： WordCount.java 项目： y-tag/java-Hadoop-MapReduceSample

  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] remainArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (remainArgs.length != 2) {
      System.err.println("Usage: wordcount <input> <output>");
      System.exit(1);
    }

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    job.setNumReduceTasks(4);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem.get(conf).delete(new Path(remainArgs[1]), true);

    FileInputFormat.setInputPaths(job, new Path(remainArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(remainArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }

示例#21

0

显示文件

文件： ESIndexCreator.java 项目： morpheus-lab/java-bigdata-201506

  public static void main(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, "ESIndexCreator");

    job.setJarByClass(ESIndexCreator.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(ESIndexCreator.MyMapper.class);

    job.setNumReduceTasks(0); // Skip Reduce Task

    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // 프로그램 인자
    // 0: 입력 파일 경로
    // 1: 출력 파일 경로
    // 2: elastic search server's host name
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.getConfiguration().set("host", args[2]);

    job.waitForCompletion(true);
  }

示例#22

0

显示文件

文件： InjectorJob.java 项目： tyolab/tyobot

 public Map<String, Object> run(Map<String, Object> args) throws Exception {
   getConf().setLong("injector.current.time", System.currentTimeMillis());
   Path input;
   Object path = args.get(Nutch.ARG_SEEDDIR);
   if (path instanceof Path) {
     input = (Path) path;
   } else {
     input = new Path(path.toString());
   }
   numJobs = 1;
   currentJobNum = 0;
   currentJob = new NutchJob(getConf(), "inject " + input);
   FileInputFormat.addInputPath(currentJob, input);
   currentJob.setMapperClass(UrlMapper.class);
   currentJob.setMapOutputKeyClass(String.class);
   currentJob.setMapOutputValueClass(WebPage.class);
   currentJob.setOutputFormatClass(GoraOutputFormat.class);
   DataStore<String, WebPage> store =
       StorageUtils.createWebStore(currentJob.getConfiguration(), String.class, WebPage.class);
   GoraOutputFormat.setOutput(currentJob, store, true);
   currentJob.setReducerClass(Reducer.class);
   currentJob.setNumReduceTasks(0);
   currentJob.waitForCompletion(true);
   ToolUtil.recordJobStatus(null, currentJob, results);
   return results;
 }

示例#23

0

显示文件

文件： TestHDFSWrite.java 项目： nfergu/parquet-test

  public int run(String[] args) throws Exception {

    if (args.length < 2) {
      LOG.error("Usage: " + getClass().getName() + " inputFileHDFS outputFileHDFS");
      return 1;
    }

    String inputFile = args[0];
    String outputFile = args[1] + System.nanoTime();

    Configuration configuration = getConf();
    Job job = new Job(configuration);
    job.setJarByClass(getClass());
    job.setJobName(getClass().getName());
    job.setMapperClass(ReadRequestMap.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputFile));
    FileOutputFormat.setOutputPath(job, new Path(outputFile));

    job.waitForCompletion(true);

    return 0;
  }

示例#24

0

显示文件

文件： HubsAndSpokes.java 项目： kidaak/Hadoop-MapReduce-1

  public static void dijkstra(String input, String output) throws Exception {

    String temp = output;

    ///  Run HITS Algorithm JOB:2 For 32 Times
    /// Setting the Value of k-> 32

    for (int i = 0; i < 32; i++) {
      Configuration conf = new Configuration();
      Job job = new Job(conf, "hubsandspokes");
      job.setJarByClass(HubsAndSpokes.class);
      job.setMapperClass(HubSpokeMapper.class);
      job.setReducerClass(HubSpokeReducer.class);
      job.setMapOutputKeyClass(Text.class);
      job.setMapOutputValueClass(NodeWritable.class);
      job.setOutputKeyClass(NodeWritable.class);
      job.setOutputValueClass(Text.class);
      job.setNumReduceTasks(1);
      FileInputFormat.addInputPath(job, new Path(input));
      FileOutputFormat.setOutputPath(job, new Path(output));

      // Toggle the value of Input and Output variable
      // For Next iteration
      input = output;
      output = temp + Integer.toString(i);

      // Wait for completing the JOB
      boolean b = job.waitForCompletion(true);
      if (!b) System.exit(2);
      // System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
  }

示例#25

0

显示文件

文件： OutlinkGrowthAnalysis.java 项目： yavcular/WebGraphConstruction

  public static void main(String[] args)
      throws IOException, InterruptedException, ClassNotFoundException {

    if (args.length != 2) throw new IllegalArgumentException(args.length + " usage: ... ");

    String bitvectorpath = args[0], outputPath = args[1];

    Configuration conf = new Configuration();
    Job job = Job.getInstance(new Cluster(conf), conf);
    job.setJobName("int key replace phase1");
    job.setJarByClass(OutlinkGrowthAnalysis.class);

    job.setMapperClass(BVIdentitiyMapper.class);
    job.setReducerClass(AnaylseOLGrowthReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TabSeperatedTextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileInputFormat.setInputPaths(job, new Path(bitvectorpath));

    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
  }

示例#26

0

显示文件

文件： SampleProgram.java 项目： yangboz/verdant-octo-woof

  public int run(String[] args) throws Exception {
    // Check input arguments
    if (args.length != 2) {
      System.out.println("Usage: firstprog <input HIB> <output directory>");
      System.exit(0);
    }

    // Initialize and configure MapReduce job
    Job job = Job.getInstance();
    // Set input format class which parses the input HIB and spawns map tasks
    //        job.setInputFormatClass(ImageBundleInputFormat.class);
    job.setInputFormatClass(HibInputFormat.class);
    // Set the driver, mapper, and reducer classes which express the computation
    job.setJarByClass(SampleProgram.class);
    job.setMapperClass(SampleProgramMapper.class);
    job.setReducerClass(SampleProgramReducer.class);
    // Set the types for the key/value pairs passed to/from map and reduce layers
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatImage.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    // Set the input and output paths on the HDFS
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Execute the MapReduce job and block until it complets
    boolean success = job.waitForCompletion(true);

    // Return success or failure
    return success ? 0 : 1;
  }

示例#27

0

显示文件

文件： RunningAggregator.java 项目： vbajaria/chombo

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Running aggregates  for numerical attributes";
    job.setJobName(jobName);

    job.setJarByClass(RunningAggregator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "chombo");
    job.setMapperClass(RunningAggregator.AggrMapper.class);
    job.setReducerClass(RunningAggregator.AggrReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }

示例#28

0

显示文件

文件： OSMGrid.java 项目： dzuongld/SP-GiST

 @Override
 public int run(String[] args) throws Exception {
   if (args.length < 7) {
     System.exit(-1);
   }
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "OSM-Gridding");
   job.setJarByClass(OSMGrid.class);
   job.setOutputKeyClass(WritablePoint.class);
   job.setOutputValueClass(LongWritable.class);
   job.setMapperClass(OSMMapper.class);
   job.setPartitionerClass(GridPartitioner.class);
   job.setReducerClass(OSMReducer.class);
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(TextOutputFormat.class);
   FileInputFormat.addInputPath(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));
   job.getConfiguration().set(OSMMapper.MINLAT, args[2]);
   job.getConfiguration().set(OSMMapper.MINLON, args[3]);
   job.getConfiguration().set(OSMMapper.MAXLAT, args[4]);
   job.getConfiguration().set(OSMMapper.MAXLON, args[5]);
   job.getConfiguration().set(OSMReducer.GRID, args[6]);
   job.setNumReduceTasks(Integer.parseInt(args[6]) * Integer.parseInt(args[6]));
   boolean succ = job.waitForCompletion(true);
   return succ ? 0 : 1;
 }

示例#29

0

显示文件

文件： PartialBuilder.java 项目： hmcl/mahout

  @Override
  protected void configureJob(Job job) throws IOException {
    Configuration conf = job.getConfiguration();

    job.setJarByClass(PartialBuilder.class);

    FileInputFormat.setInputPaths(job, getDataPath());
    FileOutputFormat.setOutputPath(job, getOutputPath(conf));

    job.setOutputKeyClass(TreeID.class);
    job.setOutputValueClass(MapredOutput.class);

    job.setMapperClass(Step1Mapper.class);
    job.setNumReduceTasks(0); // no reducers

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    // For this implementation to work, mapred.map.tasks needs to be set to the actual
    // number of mappers Hadoop will use:
    TextInputFormat inputFormat = new TextInputFormat();
    List<?> splits = inputFormat.getSplits(job);
    if (splits == null || splits.isEmpty()) {
      log.warn("Unable to compute number of splits?");
    } else {
      int numSplits = splits.size();
      log.info("Setting mapred.map.tasks = {}", numSplits);
      conf.setInt("mapred.map.tasks", numSplits);
    }
  }

示例#30

0

显示文件

文件： UtilityPredictor.java 项目： pranab/sifarish

  @Override
  public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
  }