private void createPageRankLinksDirectly() throws IOException, URISyntaxException { log.info("Creating PageRank links", null); JobConf job = new JobConf(PagerankData.class); String jobname = "Create pagerank links"; Path fout = new Path(options.getResultPath(), EDGES_DIR_NAME); job.setJobName(jobname); setPageRankLinksOptions(job); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); // job.setMapOutputKeyClass(LongWritable.class); // job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, dummy.getPath()); job.setInputFormat(NLineInputFormat.class); job.setMapperClass(DummyToPageRankLinksMapper.class); if (options.isSequenceOut()) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.getCodecClass()) { job.set("mapred.output.compression.type", "BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass()); } FileOutputFormat.setOutputPath(job, fout); log.info("Running Job: " + jobname); log.info("Dummy file " + dummy.getPath() + " as input"); log.info("Edges file " + fout + " as output"); JobClient.runJob(job); log.info("Finished Running Job: " + jobname); }
/** * Test of {@link FileQueue} can identify compressed file and provide readers to extract * uncompressed data only if input-compression is enabled. */ @Test public void testFileQueueDecompression() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); compressedFile = compressedFile.suffix(".gz"); // now read back the data from the compressed stream using FileQueue long fileSize = lfs.listStatus(compressedFile)[0].getLen(); CombineFileSplit split = new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize}); FileQueue queue = new FileQueue(split, conf); byte[] bytes = new byte[inputLine.getBytes().length]; queue.read(bytes); queue.close(); String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); }
public static RecordWriter getHiveRecordWriter( JobConf jc, TableDesc tableInfo, Class<? extends Writable> outputClass, FileSinkDesc conf, Path outPath) throws HiveException { try { HiveOutputFormat<?, ?> hiveOutputFormat = tableInfo.getOutputFileFormatClass().newInstance(); boolean isCompressed = conf.getCompressed(); JobConf jc_output = jc; if (isCompressed) { jc_output = new JobConf(jc); String codecStr = conf.getCompressCodec(); if (codecStr != null && !codecStr.trim().equals("")) { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class.forName(codecStr); FileOutputFormat.setOutputCompressorClass(jc_output, codec); } String type = conf.getCompressType(); if (type != null && !type.trim().equals("")) { CompressionType style = CompressionType.valueOf(type); SequenceFileOutputFormat.setOutputCompressionType(jc, style); } } return getRecordWriter( jc_output, hiveOutputFormat, outputClass, isCompressed, tableInfo.getProperties(), outPath); } catch (Exception e) { throw new HiveException(e); } }
private void createPageRankNodesDirectly() throws IOException { log.info("Creating PageRank nodes...", null); Path fout = new Path(options.getResultPath(), VERTICALS_DIR_NAME); JobConf job = new JobConf(PagerankData.class); String jobname = "Create pagerank nodes"; job.setJobName(jobname); setPageRankNodesOptions(job); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, dummy.getPath()); job.setInputFormat(NLineInputFormat.class); if (balance) { /** * * Balance the output order of nodes, to prevent the running of pagerank bench from * potential data skew */ job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setMapperClass(BalancedLinkNodesMapper.class); job.setReducerClass(BalancedLinkNodesReducer.class); // job.setPartitionerClass(ModulusPartitioner.class); if (options.getNumReds() > 0) { job.setNumReduceTasks(options.getNumReds()); } else { job.setNumReduceTasks(Utils.getMaxNumReds()); } } else { job.setMapOutputKeyClass(Text.class); job.setMapperClass(DummyToNodesMapper.class); job.setNumReduceTasks(0); } if (options.isSequenceOut()) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.getCodecClass()) { job.set("mapred.output.compression.type", "BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass()); } FileOutputFormat.setOutputPath(job, fout); log.info("Running Job: " + jobname); log.info("Dummy file " + dummy.getPath() + " as input"); log.info("Vertices file " + fout + " as output"); JobClient.runJob(job); log.info("Finished Running Job: " + jobname); }