@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); StoreFuncInterface storeFunc = store.getStoreFunc(); JobContext jc = HadoopShims.createJobContext(conf, new JobID()); OutputFormat<?, ?> outputFormat = storeFunc.getOutputFormat(); PigOutputFormat.setLocation(jc, store); context = HadoopShims.createTaskAttemptContext(conf, HadoopShims.getNewTaskAttemptID()); PigOutputFormat.setLocation(context, store); try { outputFormat.checkOutputSpecs(jc); } catch (InterruptedException e) { throw new IOException(e); } try { outputCommitter = outputFormat.getOutputCommitter(context); outputCommitter.setupJob(jc); outputCommitter.setupTask(context); writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); return storeFunc; }
public void testBinary() throws IOException, InterruptedException { Configuration conf = new Configuration(); Job job = new Job(conf); Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq"); Random r = new Random(); long seed = r.nextLong(); r.setSeed(seed); FileOutputFormat.setOutputPath(job, outdir); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class); SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class); SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); BytesWritable bkey = new BytesWritable(); BytesWritable bval = new BytesWritable(); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat(); OutputCommitter committer = outputFormat.getOutputCommitter(context); committer.setupJob(job); RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context); IntWritable iwritable = new IntWritable(); DoubleWritable dwritable = new DoubleWritable(); DataOutputBuffer outbuf = new DataOutputBuffer(); LOG.info("Creating data by SequenceFileAsBinaryOutputFormat"); try { for (int i = 0; i < RECORDS; ++i) { iwritable = new IntWritable(r.nextInt()); iwritable.write(outbuf); bkey.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); dwritable = new DoubleWritable(r.nextDouble()); dwritable.write(outbuf); bval.set(outbuf.getData(), 0, outbuf.getLength()); outbuf.reset(); writer.write(bkey, bval); } } finally { writer.close(context); } committer.commitTask(context); committer.commitJob(job); InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>(); int count = 0; r.setSeed(seed); SequenceFileInputFormat.setInputPaths(job, outdir); LOG.info("Reading data by SequenceFileInputFormat"); for (InputSplit split : iformat.getSplits(job)) { RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context); MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); try { int sourceInt; double sourceDouble; while (reader.nextKeyValue()) { sourceInt = r.nextInt(); sourceDouble = r.nextDouble(); iwritable = reader.getCurrentKey(); dwritable = reader.getCurrentValue(); assertEquals( "Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get()); assertTrue( "Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0); ++count; } } finally { reader.close(); } } assertEquals("Some records not found", RECORDS, count); }
@SuppressWarnings("unchecked") @Override public void run() { JobID jobId = profile.getJobID(); JobContext jContext = new JobContextImpl(conf, jobId); org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = null; try { outputCommitter = createOutputCommitter(conf.getUseNewMapper(), jobId, conf); } catch (Exception e) { LOG.info("Failed to createOutputCommitter", e); return; } try { TaskSplitMetaInfo[] taskSplitMetaInfos = SplitMetaInfoReader.readSplitMetaInfo(jobId, localFs, conf, systemJobDir); int numReduceTasks = job.getNumReduceTasks(); if (numReduceTasks > 1 || numReduceTasks < 0) { // we only allow 0 or 1 reducer in local mode numReduceTasks = 1; job.setNumReduceTasks(1); } outputCommitter.setupJob(jContext); status.setSetupProgress(1.0f); Map<TaskAttemptID, MapOutputFile> mapOutputFiles = Collections.synchronizedMap(new HashMap<TaskAttemptID, MapOutputFile>()); List<MapTaskRunnable> taskRunnables = getMapTaskRunnables(taskSplitMetaInfos, jobId, mapOutputFiles); ExecutorService mapService = createMapExecutor(taskRunnables.size()); // Start populating the executor with work units. // They may begin running immediately (in other threads). for (Runnable r : taskRunnables) { mapService.submit(r); } try { mapService.shutdown(); // Instructs queue to drain. // Wait for tasks to finish; do not use a time-based timeout. // (See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6179024) LOG.info("Waiting for map tasks"); mapService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); } catch (InterruptedException ie) { // Cancel all threads. mapService.shutdownNow(); throw ie; } LOG.info("Map task executor complete."); // After waiting for the map tasks to complete, if any of these // have thrown an exception, rethrow it now in the main thread context. for (MapTaskRunnable r : taskRunnables) { if (r.storedException != null) { throw new Exception(r.storedException); } } TaskAttemptID reduceId = new TaskAttemptID(new TaskID(jobId, false, 0), 0); try { if (numReduceTasks > 0) { ReduceTask reduce = new ReduceTask(systemJobFile.toString(), reduceId, 0, mapIds.size(), 1); reduce.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); JobConf localConf = new JobConf(job); localConf.set("mapreduce.jobtracker.address", "local"); TaskRunner.setupChildMapredLocalDirs(reduce, localConf); // move map output to reduce input for (int i = 0; i < mapIds.size(); i++) { if (!this.isInterrupted()) { TaskAttemptID mapId = mapIds.get(i); Path mapOut = mapOutputFiles.get(mapId).getOutputFile(); MapOutputFile localOutputFile = new MapOutputFile(); localOutputFile.setConf(localConf); Path reduceIn = localOutputFile.getInputFileForWrite( mapId.getTaskID(), localFs.getFileStatus(mapOut).getLen()); if (!localFs.mkdirs(reduceIn.getParent())) { throw new IOException( "Mkdirs failed to create " + reduceIn.getParent().toString()); } if (!localFs.rename(mapOut, reduceIn)) throw new IOException("Couldn't rename " + mapOut); } else { throw new InterruptedException(); } } if (!this.isInterrupted()) { reduce.setJobFile(localJobFile.toString()); localConf.setUser(reduce.getUser()); reduce.localizeConfiguration(localConf); reduce.setConf(localConf); reduce_tasks += 1; myMetrics.launchReduce(reduce.getTaskID()); reduce.run(localConf, this); myMetrics.completeReduce(reduce.getTaskID()); reduce_tasks -= 1; } else { throw new InterruptedException(); } } } finally { for (MapOutputFile output : mapOutputFiles.values()) { output.removeAll(); } } // delete the temporary directory in output directory outputCommitter.commitJob(jContext); status.setCleanupProgress(1.0f); if (killed) { this.status.setRunState(JobStatus.KILLED); } else { this.status.setRunState(JobStatus.SUCCEEDED); } JobEndNotifier.localRunnerNotification(job, status); } catch (Throwable t) { try { outputCommitter.abortJob(jContext, org.apache.hadoop.mapreduce.JobStatus.State.FAILED); } catch (IOException ioe) { LOG.info("Error cleaning up job:" + id); } status.setCleanupProgress(1.0f); if (killed) { this.status.setRunState(JobStatus.KILLED); } else { this.status.setRunState(JobStatus.FAILED); } LOG.warn(id, t); JobEndNotifier.localRunnerNotification(job, status); } finally { try { fs.delete(systemJobFile.getParent(), true); // delete submit dir localFs.delete(localJobFile, true); // delete local copy // Cleanup distributed cache taskDistributedCacheManager.release(); trackerDistributedCacheManager.purgeCache(); } catch (IOException e) { LOG.warn("Error cleaning up " + id + ": " + e); } } }