@Test public void testCrossValidationOneSchema() throws TupleMRException, IOException { Configuration conf = getConf(); int maxIndex = SCHEMA.getFields().size() - 1; for (int randomSchema = 0; randomSchema < MAX_RANDOM_SCHEMAS; randomSchema++) { Schema schema = permuteSchema(SCHEMA); OrderBy sortCriteria = createRandomSortCriteria(schema, maxIndex + 1); // TODO could we get empty group fields ?? String[] groupFields = getFirstFields(sortCriteria, 1 + random.nextInt(sortCriteria.getElements().size() - 1)); ITuple[] tuples = new ITuple[] {new Tuple(schema), new Tuple(schema)}; for (ITuple tuple : tuples) { fillTuple(false, tuple, 0, maxIndex); } for (int minIndex = maxIndex; minIndex >= 0; minIndex--) { /* trick for speeding up the tests */ DCUtils.cleanupTemporaryInstanceCache(conf, "comparator.dat"); TupleMRConfigBuilder builder = new TupleMRConfigBuilder(); builder.addIntermediateSchema(schema); builder.setGroupByFields(groupFields); builder.setOrderBy(sortCriteria); TupleMRConfig tupleMRConf = builder.buildConf(); TupleMRConfig.set(tupleMRConf, conf); // tupleMRConf has changed -> we need a new Serialization object ser = new HadoopSerialization(conf); SortComparator sortComparator = new SortComparator(); GroupComparator groupComparator = new GroupComparator(); sortComparator.setConf(conf); groupComparator.setConf(conf); for (ITuple tuple : tuples) { fillTuple(true, tuple, minIndex, maxIndex); } for (int indexTuple1 = 0; indexTuple1 < tuples.length; indexTuple1++) { for (int indexTuple2 = indexTuple1 + 1; indexTuple2 < tuples.length; indexTuple2++) { ITuple tuple1 = tuples[indexTuple1]; ITuple tuple2 = tuples[indexTuple2]; assertSameComparison("Sort comparator", sortComparator, tuple1, tuple2); assertOppositeOrEqualsComparison(sortComparator, tuple1, tuple2); assertSameComparison("Group comparator", groupComparator, tuple1, tuple2); assertOppositeOrEqualsComparison(groupComparator, tuple1, tuple2); } } } } }
/** * Constructs the DelegatingRecordReader. * * @param split TaggegInputSplit object * @param context TaskAttemptContext object * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") public DelegatingRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Find the InputFormat and then the RecordReader from the // TaggedInputSplit. TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; InputFormat<K, V> inputFormat = (InputFormat<K, V>) DCUtils.loadSerializedObjectInDC( context.getConfiguration(), InputFormat.class, taggedInputSplit.getInputFormatFile(), true); originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context); }
@SuppressWarnings("unchecked") private void initHandlerContextAndCollector(Context context) throws IOException, InterruptedException, TupleMRException { String fileName = context.getConfiguration().get(SimpleReducer.CONF_REDUCER_HANDLER); handler = DCUtils.loadSerializedObjectInDC( context.getConfiguration(), TupleRollupReducer.class, fileName, true); collector = handler .new Collector((ReduceContext<DatumWrapper<ITuple>, NullWritable, Object, Object>) context); this.context = new TupleMRContext( (ReduceContext<DatumWrapper<ITuple>, NullWritable, Object, Object>) context, tupleMRConfig); handler.setup(this.context, collector); }