예제 #1
0
  private void createPageRankLinksDirectly() throws IOException, URISyntaxException {

    log.info("Creating PageRank links", null);

    JobConf job = new JobConf(PagerankData.class);
    String jobname = "Create pagerank links";

    Path fout = new Path(options.getResultPath(), EDGES_DIR_NAME);

    job.setJobName(jobname);
    setPageRankLinksOptions(job);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    //		job.setMapOutputKeyClass(LongWritable.class);
    //		job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy.getPath());
    job.setInputFormat(NLineInputFormat.class);

    job.setMapperClass(DummyToPageRankLinksMapper.class);

    if (options.isSequenceOut()) {
      job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
      job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.getCodecClass()) {
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
      FileOutputFormat.setCompressOutput(job, true);
      FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
    }

    FileOutputFormat.setOutputPath(job, fout);

    log.info("Running Job: " + jobname);
    log.info("Dummy file " + dummy.getPath() + " as input");
    log.info("Edges file " + fout + " as output");
    JobClient.runJob(job);
    log.info("Finished Running Job: " + jobname);
  }
예제 #2
0
  private void createPageRankNodesDirectly() throws IOException {

    log.info("Creating PageRank nodes...", null);

    Path fout = new Path(options.getResultPath(), VERTICALS_DIR_NAME);

    JobConf job = new JobConf(PagerankData.class);
    String jobname = "Create pagerank nodes";

    job.setJobName(jobname);
    setPageRankNodesOptions(job);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, dummy.getPath());
    job.setInputFormat(NLineInputFormat.class);

    if (balance) {
      /**
       * * Balance the output order of nodes, to prevent the running of pagerank bench from
       * potential data skew
       */
      job.setMapOutputKeyClass(LongWritable.class);
      job.setMapOutputValueClass(NullWritable.class);

      job.setMapperClass(BalancedLinkNodesMapper.class);
      job.setReducerClass(BalancedLinkNodesReducer.class);
      //			job.setPartitionerClass(ModulusPartitioner.class);

      if (options.getNumReds() > 0) {
        job.setNumReduceTasks(options.getNumReds());
      } else {
        job.setNumReduceTasks(Utils.getMaxNumReds());
      }
    } else {
      job.setMapOutputKeyClass(Text.class);
      job.setMapperClass(DummyToNodesMapper.class);
      job.setNumReduceTasks(0);
    }

    if (options.isSequenceOut()) {
      job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
      job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.getCodecClass()) {
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
      FileOutputFormat.setCompressOutput(job, true);
      FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
    }

    FileOutputFormat.setOutputPath(job, fout);

    log.info("Running Job: " + jobname);
    log.info("Dummy file " + dummy.getPath() + " as input");
    log.info("Vertices file " + fout + " as output");
    JobClient.runJob(job);
    log.info("Finished Running Job: " + jobname);
  }