void checkFormat(Job job) throws Exception { TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); MyClassMessagePackBase64LineInputFormat format = new MyClassMessagePackBase64LineInputFormat(); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); for (int j = 0; j < splits.size(); j++) { RecordReader<LongWritable, MyClassWritable> reader = format.createRecordReader(splits.get(j), attemptContext); reader.initialize(splits.get(j), attemptContext); int count = 0; try { while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); MyClassWritable val = reader.getCurrentValue(); MyClass mc = val.get(); assertEquals(mc.v, count); assertEquals(mc.s, Integer.toString(count)); count++; } } finally { reader.close(); } } }
/** Get the record reader for the next chunk in this CombineFileSplit. */ protected boolean initNextRecordReader() throws IOException { if (curReader != null) { curReader.close(); curReader = null; if (idx > 0) { progress += split.getLength(idx - 1); // done processing so far } } // if all chunks have been processed, nothing more to do. if (idx == split.getNumPaths()) { return false; } // get a record reader for the idx-th chunk try { Configuration conf = context.getConfiguration(); // setup some helper config variables. conf.set(MRJobConfig.MAP_INPUT_FILE, split.getPath(idx).toString()); conf.setLong(MRJobConfig.MAP_INPUT_START, split.getOffset(idx)); conf.setLong(MRJobConfig.MAP_INPUT_PATH, split.getLength(idx)); curReader = rrConstructor.newInstance(new Object[] {split, context, Integer.valueOf(idx)}); if (idx > 0) { // initialize() for the first RecordReader will be called by MapTask; // we're responsible for initializing subsequent RecordReaders. curReader.initialize(split, context); } } catch (Exception e) { throw new RuntimeException(e); } idx++; return true; }
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); // LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; // LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); // LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + // key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } // LOG.info("splits["+j+"]="+splits[j]+" count=" + // count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
public void close() throws IOException { if (curReader != null) { curReader.close(); curReader = null; } }