/** * Generate random data, compress it, index and md5 hash the data. Then read it all back and md5 * that too, to verify that it all went ok. * * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); Assume.assumeTrue(CoreTestUtil.okToRunLzoTests(conf)); FileSystem.getLocal(conf).close(); // remove cached filesystem (if any) FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir_, true); localFs.mkdirs(outputDir_); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir_); TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir_, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir_, lzoFileName_); LzoIndex.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir_); List<InputSplit> is = inputFormat.getSplits(job); // verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5_.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5_.digest())); }
/** * Run a local map reduce job to read records from HCatalog table and verify if the count is as * expected * * @param readCount * @param filter * @return * @throws Exception */ List<HCatRecord> runMRRead(int readCount, String filter) throws Exception { MapRead.readCount = 0; readRecords.clear(); Configuration conf = new Configuration(); conf.set(HiveConf.ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname, "true"); Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName, filter); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); if (fs.exists(path)) { fs.delete(path, true); } TextOutputFormat.setOutputPath(job, path); job.waitForCompletion(true); Assert.assertEquals(readCount, MapRead.readCount); return readRecords; }
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage : need <input path> <output path>"); return 1; } Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); Configuration conf = getConf(); Job job = new Job(conf, getClass().getName() + "--answer"); job.setJarByClass(CombinerAnswer.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setCombinerClass(MyReducer.class); job.setMapOutputValueClass(IntWritable.class); job.setMapOutputKeyClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
@SuppressWarnings({"unchecked", "rawtypes"}) public void start( Path outputDir, int numReducers, boolean concurrent, String accessKey, String secretKey) throws GoraException, IOException, Exception { LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers); // DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new // Configuration()); auth = new BasicAWSCredentials(accessKey, secretKey); DataStore<Long, cidynamonode> store = WSDataStoreFactory.createDataStore( DynamoDBStore.class, DynamoDBKey.class, cidynamonode.class, auth); job = new Job(getConf()); if (!job.getConfiguration() .get("io.serializations") .contains("org.apache.hadoop.io.serializer.JavaSerialization")) { job.getConfiguration() .set( "io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization"); } job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Query query = store.newQuery(); // if (!concurrent) { // no concurrency filtering, only need prev field // query.setFields("prev"); // } else { // readFlushed(job.getCon figuration()); // } GoraMapper.initMapperJob( job, query, store, DynamoDBKey.class, VLongWritable.class, VerifyMapper.class, true); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); store.close(); job.submit(); }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ","); Job job = Job.getInstance(conf); job.setJarByClass(MapsideJoinDriver.class); job.setMapperClass(MapsideJoinMapper.class); job.setInputFormatClass(CompositeInputFormat.class); String expr = CompositeInputFormat.compose( "inner", KeyValueTextInputFormat.class, new Path(args[0]), new Path(args[1])); // String expr = CompositeInputFormat.compose("outer",KeyValueTextInputFormat.class , new // Path(args[0]),new Path(args[1])); job.getConfiguration().set("mapreduce.join.expr", expr); job.setNumReduceTasks(0); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.waitForCompletion(true); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 4) { System.err.println( "Usage: ReplicatedJoin <user data> <comment data> <out> [inner|leftouter]"); System.exit(1); } String joinType = otherArgs[3]; if (!(joinType.equalsIgnoreCase("inner") || joinType.equalsIgnoreCase("leftouter"))) { System.err.println("Join type not set to inner or leftouter"); System.exit(2); } // Configure the join type Job job = new Job(conf, "Replicated Join"); job.getConfiguration().set("join.type", joinType); job.setJarByClass(ReplicatedJoinDriver.class); job.setMapperClass(ReplicatedJoinMapper.class); job.setNumReduceTasks(0); TextInputFormat.setInputPaths(job, new Path(otherArgs[1])); TextOutputFormat.setOutputPath(job, new Path(otherArgs[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Configure the DistributedCache DistributedCache.addCacheFile(new Path(otherArgs[0]).toUri(), job.getConfiguration()); DistributedCache.setLocalFiles(job.getConfiguration(), otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 3); }