示例#1
0
  private void createPageRankLinksDirectly() throws IOException, URISyntaxException {

    log.info("Creating PageRank links", null);

    JobConf job = new JobConf(PagerankData.class);
    String jobname = "Create pagerank links";

    Path fout = new Path(options.getResultPath(), EDGES_DIR_NAME);

    job.setJobName(jobname);
    setPageRankLinksOptions(job);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    //		job.setMapOutputKeyClass(LongWritable.class);
    //		job.setMapOutputValueClass(Text.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy.getPath());
    job.setInputFormat(NLineInputFormat.class);

    job.setMapperClass(DummyToPageRankLinksMapper.class);

    if (options.isSequenceOut()) {
      job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
      job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.getCodecClass()) {
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
      FileOutputFormat.setCompressOutput(job, true);
      FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
    }

    FileOutputFormat.setOutputPath(job, fout);

    log.info("Running Job: " + jobname);
    log.info("Dummy file " + dummy.getPath() + " as input");
    log.info("Edges file " + fout + " as output");
    JobClient.runJob(job);
    log.info("Finished Running Job: " + jobname);
  }
  /**
   * Test of {@link FileQueue} can identify compressed file and provide readers to extract
   * uncompressed data only if input-compression is enabled.
   */
  @Test
  public void testFileQueueDecompression() throws IOException {
    JobConf conf = new JobConf();
    FileSystem lfs = FileSystem.getLocal(conf);
    String inputLine = "Hi Hello!";

    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true);
    org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class);

    // define the test's root temp directory
    Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp"))
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());

    Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression");
    lfs.delete(tempDir, true);

    // create a compressed file
    Path compressedFile = new Path(tempDir, "test");
    OutputStream out =
        CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
    writer.write(inputLine);
    writer.close();

    compressedFile = compressedFile.suffix(".gz");
    // now read back the data from the compressed stream using FileQueue
    long fileSize = lfs.listStatus(compressedFile)[0].getLen();
    CombineFileSplit split =
        new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize});
    FileQueue queue = new FileQueue(split, conf);
    byte[] bytes = new byte[inputLine.getBytes().length];
    queue.read(bytes);
    queue.close();
    String readLine = new String(bytes);
    assertEquals("Compression/Decompression error", inputLine, readLine);
  }
 public static RecordWriter getHiveRecordWriter(
     JobConf jc,
     TableDesc tableInfo,
     Class<? extends Writable> outputClass,
     FileSinkDesc conf,
     Path outPath)
     throws HiveException {
   try {
     HiveOutputFormat<?, ?> hiveOutputFormat = tableInfo.getOutputFileFormatClass().newInstance();
     boolean isCompressed = conf.getCompressed();
     JobConf jc_output = jc;
     if (isCompressed) {
       jc_output = new JobConf(jc);
       String codecStr = conf.getCompressCodec();
       if (codecStr != null && !codecStr.trim().equals("")) {
         Class<? extends CompressionCodec> codec =
             (Class<? extends CompressionCodec>) Class.forName(codecStr);
         FileOutputFormat.setOutputCompressorClass(jc_output, codec);
       }
       String type = conf.getCompressType();
       if (type != null && !type.trim().equals("")) {
         CompressionType style = CompressionType.valueOf(type);
         SequenceFileOutputFormat.setOutputCompressionType(jc, style);
       }
     }
     return getRecordWriter(
         jc_output,
         hiveOutputFormat,
         outputClass,
         isCompressed,
         tableInfo.getProperties(),
         outPath);
   } catch (Exception e) {
     throw new HiveException(e);
   }
 }
示例#4
0
  private void createPageRankNodesDirectly() throws IOException {

    log.info("Creating PageRank nodes...", null);

    Path fout = new Path(options.getResultPath(), VERTICALS_DIR_NAME);

    JobConf job = new JobConf(PagerankData.class);
    String jobname = "Create pagerank nodes";

    job.setJobName(jobname);
    setPageRankNodesOptions(job);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, dummy.getPath());
    job.setInputFormat(NLineInputFormat.class);

    if (balance) {
      /**
       * * Balance the output order of nodes, to prevent the running of pagerank bench from
       * potential data skew
       */
      job.setMapOutputKeyClass(LongWritable.class);
      job.setMapOutputValueClass(NullWritable.class);

      job.setMapperClass(BalancedLinkNodesMapper.class);
      job.setReducerClass(BalancedLinkNodesReducer.class);
      //			job.setPartitionerClass(ModulusPartitioner.class);

      if (options.getNumReds() > 0) {
        job.setNumReduceTasks(options.getNumReds());
      } else {
        job.setNumReduceTasks(Utils.getMaxNumReds());
      }
    } else {
      job.setMapOutputKeyClass(Text.class);
      job.setMapperClass(DummyToNodesMapper.class);
      job.setNumReduceTasks(0);
    }

    if (options.isSequenceOut()) {
      job.setOutputFormat(SequenceFileOutputFormat.class);
    } else {
      job.setOutputFormat(TextOutputFormat.class);
    }

    if (null != options.getCodecClass()) {
      job.set("mapred.output.compression.type", "BLOCK");
      job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
      FileOutputFormat.setCompressOutput(job, true);
      FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
    }

    FileOutputFormat.setOutputPath(job, fout);

    log.info("Running Job: " + jobname);
    log.info("Dummy file " + dummy.getPath() + " as input");
    log.info("Vertices file " + fout + " as output");
    JobClient.runJob(job);
    log.info("Finished Running Job: " + jobname);
  }