Ejemplo n.º 1
0
  @Test
  public void testCrossValidationOneSchema() throws TupleMRException, IOException {
    Configuration conf = getConf();

    int maxIndex = SCHEMA.getFields().size() - 1;

    for (int randomSchema = 0; randomSchema < MAX_RANDOM_SCHEMAS; randomSchema++) {
      Schema schema = permuteSchema(SCHEMA);
      OrderBy sortCriteria = createRandomSortCriteria(schema, maxIndex + 1);
      // TODO could we get empty group fields ??
      String[] groupFields =
          getFirstFields(sortCriteria, 1 + random.nextInt(sortCriteria.getElements().size() - 1));
      ITuple[] tuples = new ITuple[] {new Tuple(schema), new Tuple(schema)};
      for (ITuple tuple : tuples) {
        fillTuple(false, tuple, 0, maxIndex);
      }

      for (int minIndex = maxIndex; minIndex >= 0; minIndex--) {
        /* trick for speeding up the tests */
        DCUtils.cleanupTemporaryInstanceCache(conf, "comparator.dat");
        TupleMRConfigBuilder builder = new TupleMRConfigBuilder();
        builder.addIntermediateSchema(schema);
        builder.setGroupByFields(groupFields);
        builder.setOrderBy(sortCriteria);

        TupleMRConfig tupleMRConf = builder.buildConf();
        TupleMRConfig.set(tupleMRConf, conf);

        // tupleMRConf has changed -> we need a new Serialization object
        ser = new HadoopSerialization(conf);

        SortComparator sortComparator = new SortComparator();
        GroupComparator groupComparator = new GroupComparator();

        sortComparator.setConf(conf);
        groupComparator.setConf(conf);

        for (ITuple tuple : tuples) {
          fillTuple(true, tuple, minIndex, maxIndex);
        }
        for (int indexTuple1 = 0; indexTuple1 < tuples.length; indexTuple1++) {
          for (int indexTuple2 = indexTuple1 + 1; indexTuple2 < tuples.length; indexTuple2++) {
            ITuple tuple1 = tuples[indexTuple1];
            ITuple tuple2 = tuples[indexTuple2];
            assertSameComparison("Sort comparator", sortComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(sortComparator, tuple1, tuple2);
            assertSameComparison("Group comparator", groupComparator, tuple1, tuple2);
            assertOppositeOrEqualsComparison(groupComparator, tuple1, tuple2);
          }
        }
      }
    }
  }
 /**
  * Constructs the DelegatingRecordReader.
  *
  * @param split TaggegInputSplit object
  * @param context TaskAttemptContext object
  * @throws IOException
  * @throws InterruptedException
  */
 @SuppressWarnings("unchecked")
 public DelegatingRecordReader(InputSplit split, TaskAttemptContext context)
     throws IOException, InterruptedException {
   // Find the InputFormat and then the RecordReader from the
   // TaggedInputSplit.
   TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
   InputFormat<K, V> inputFormat =
       (InputFormat<K, V>)
           DCUtils.loadSerializedObjectInDC(
               context.getConfiguration(),
               InputFormat.class,
               taggedInputSplit.getInputFormatFile(),
               true);
   originalRR = inputFormat.createRecordReader(taggedInputSplit.getInputSplit(), context);
 }
Ejemplo n.º 3
0
 @SuppressWarnings("unchecked")
 private void initHandlerContextAndCollector(Context context)
     throws IOException, InterruptedException, TupleMRException {
   String fileName = context.getConfiguration().get(SimpleReducer.CONF_REDUCER_HANDLER);
   handler =
       DCUtils.loadSerializedObjectInDC(
           context.getConfiguration(), TupleRollupReducer.class, fileName, true);
   collector =
       handler
       .new Collector((ReduceContext<DatumWrapper<ITuple>, NullWritable, Object, Object>) context);
   this.context =
       new TupleMRContext(
           (ReduceContext<DatumWrapper<ITuple>, NullWritable, Object, Object>) context,
           tupleMRConfig);
   handler.setup(this.context, collector);
 }