public void testStep0Mapper() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Step0OutputCollector collector = new Step0OutputCollector(numMaps); Reporter reporter = Reporter.NULL; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Step0Mapper mapper = new Step0Mapper(); mapper.configure(p); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); } mapper.map(key, value, collector, reporter); size++; } mapper.close(); // validate the mapper's output assertEquals(p, collector.keys[p]); assertEquals(firstKey.longValue(), collector.values[p].getFirstId()); assertEquals(size, collector.values[p].getSize()); } }
public void testProcessOutput() throws Exception { Random rng = RandomUtils.getRandom(); // create a dataset large enough to be split up String descriptor = Utils.randomDescriptor(rng, numAttributes); double[][] source = Utils.randomDoubles(rng, descriptor, numInstances); // each instance label is its index in the dataset int labelId = Utils.findLabel(descriptor); for (int index = 0; index < numInstances; index++) { source[index][labelId] = index; } String[] sData = Utils.double2String(source); // write the data to a file Path dataPath = Utils.writeDataToTestFile(sData); // prepare a data converter Dataset dataset = DataLoader.generateDataset(descriptor, sData); DataConverter converter = new DataConverter(dataset); JobConf job = new JobConf(); job.setNumMapTasks(numMaps); FileInputFormat.setInputPaths(job, dataPath); // retrieve the splits TextInputFormat input = (TextInputFormat) job.getInputFormat(); InputSplit[] splits = input.getSplits(job, numMaps); InputSplit[] sorted = Arrays.copyOf(splits, splits.length); Builder.sortSplits(sorted); Reporter reporter = Reporter.NULL; int[] keys = new int[numMaps]; Step0Output[] values = new Step0Output[numMaps]; int[] expectedIds = new int[numMaps]; for (int p = 0; p < numMaps; p++) { InputSplit split = sorted[p]; RecordReader<LongWritable, Text> reader = input.getRecordReader(split, job, reporter); LongWritable key = reader.createKey(); Text value = reader.createValue(); Long firstKey = null; int size = 0; while (reader.next(key, value)) { if (firstKey == null) { firstKey = key.get(); expectedIds[p] = converter.convert(0, value.toString()).label; } size++; } keys[p] = p; values[p] = new Step0Output(firstKey, size); } Step0Output[] partitions = Step0Job.processOutput(keys, values); int[] actualIds = Step0Output.extractFirstIds(partitions); assertTrue( "Expected: " + Arrays.toString(expectedIds) + " But was: " + Arrays.toString(actualIds), Arrays.equals(expectedIds, actualIds)); }