Example #1
0
 public Recommend recommend(long userId, Collection<Long> shopIds) throws IOException {
   Recommend recommend = new Recommend(client);
   recommend.setUserId(userId);
   recommend.setIds(shopIds);
   client.initialize(recommend);
   return recommend;
 }
Example #2
0
  public static void run(Map<String, String> path) throws IOException {
    JobConf conf = Recommend.config();

    String input = path.get("Step2Input");
    String output = path.get("Step2Output");

    HdfsDAO hdfs = new HdfsDAO(Recommend.HDFS, conf);
    hdfs.rmr(output);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Step2_UserVectorToCooccurrenceMapper.class);
    //        conf.setCombinerClass(Step2_UserVectorToConoccurrenceReducer.class);
    //        conf.setReducerClass(Step2_UserVectorToConoccurrenceReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));

    RunningJob job = JobClient.runJob(conf);
    while (!job.isComplete()) {
      job.waitForCompletion();
    }
  }
  public static void run(Map<String, String> path) throws IOException {
    JobConf conf = Recommend.config();
    conf.setJarByClass(Step1.class);

    String input = "hdfs://192.168.201.11:9000/user/hdfs/recommend/data1"; // 提交的处理训练集的目录
    String input1 = path.get("Step1Input");
    String output = path.get("Step1Output");

    HdfsDAO hdfs = new HdfsDAO(Recommend.HDFS, conf);
    hdfs.rmr(output);
    hdfs.rmr(input1);
    hdfs.mkdirs(input1);
    //       hdfs.copyFile(path.get("data"), input);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Step1_ToUserPreMapper.class);
    conf.setCombinerClass(Step1_ToItemVectorReducer.class);
    conf.setReducerClass(Step1_ToItemVectorReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //        int maxCurrentReduceTasks = conf.getInt("mapred.tasktracker.reduce.tasks.maximum", 1);
    //        int ReduceTasks = (int) (2 * maxCurrentReduceTasks * 0.95);
    //        conf.setNumReduceTasks(ReduceTasks);
    /*
     * reduce的数目最好等于0.95或1.75乘以工作节点数,再乘以mapred.tasktracker.reduce.tasks.maximum(一般是每个节点的CPU数)
     * 因子为0.95时会让所有的reduce任务立即装载,并当map任务结束时复制它们的输出结果。当因子为1.75时,一些reduce任务会立即
     * 被装载,而其他一些则会等待。更早的节点会较早地完成第一轮reduce 任务并开始第二轮。最慢的节点第二轮不需要处理任何的reduce
     * 任务,这样可以带来更好的负载平衡。
     */
    //        conf.setNumReduceTasks(3);  //这里是在虚拟机下测试,就直接设成节点数

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(output));

    RunningJob job = JobClient.runJob(conf);
    while (!job.isComplete()) {
      job.waitForCompletion();
    }
  }