Пример #1
0
 @Test(expected = TupleMRException.class)
 public void testGroupByObjectDifferentSerialization() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   List<Field> fields = new ArrayList<Field>();
   fields.add(Field.createObject("my_object", A.class));
   b.addIntermediateSchema(new Schema("schema1", fields));
   fields.clear();
   Field avroField = Field.createObject("my_object", Object.class);
   avroField.setObjectSerialization(AvroFieldSerialization.class);
   fields.add(avroField);
   b.addIntermediateSchema(new Schema("schema2", fields));
   b.setGroupByFields("my_object");
   b.buildConf();
 }
Пример #2
0
  @Test
  public void testMoreThanEightProps() {
    List<Field> fields = new ArrayList<Field>(30);
    for (int i = 0; i < 30; i++) {
      fields.add(Field.create("field" + i, Type.STRING));
    }
    Schema schema = new Schema("bigmammaschema", fields);

    ITuple tuple = new Tuple(schema);
    for (int i = 0; i < 30; i++) {
      // Fill in with some default values
      tuple.set("field" + i, "defaultValue" + i);
    }

    NullableTuple nullableTuple = new NullableTuple(tuple);
    for (int i = 0; i < 30; i++) {
      // Assert the default values
      assertEquals("defaultValue" + i, nullableTuple.get("field" + i));
    }

    // Set fields to null one by one and assert that things go well
    for (int i = 0; i < 30; i++) {
      nullableTuple.set("field" + i, null);
      assertEquals(null, nullableTuple.getNullable("field" + i));
      nullableTuple.set("field" + i, "defaultValue" + i);
      assertEquals("defaultValue" + i, nullableTuple.getNullable("field" + i));
    }
  }
Пример #3
0
 protected static void fillObject(boolean isRandom, ITuple tuple, Field field, int index) {
   Object instance = ReflectionUtils.newInstance(field.getObjectClass(), null);
   if (instance instanceof A) {
     A a = (A) instance;
     a.setId(isRandom ? random.nextInt() + "" : "");
     a.setUrl(isRandom ? random.nextLong() + "" : "");
   }
   tuple.set(index, instance);
 }
Пример #4
0
 static {
   List<Field> fields = new ArrayList<Field>();
   fields.add(Field.create("int_field", Type.INT));
   fields.add(Field.create("string_field", Type.STRING));
   fields.add(Field.create("long_field", Type.LONG));
   fields.add(Field.create("float_field", Type.FLOAT));
   fields.add(Field.create("double_field", Type.DOUBLE));
   fields.add(Field.create("boolean_field", Type.BOOLEAN));
   fields.add(Field.createEnum("enum_field", Order.class));
   fields.add(Field.createObject("thrift_field", A.class));
   SCHEMA = new Schema("schema", fields);
 }
Пример #5
0
 @Test
 public void testGroupByObjectSameSerialization() throws TupleMRException {
   TupleMRConfigBuilder b = new TupleMRConfigBuilder();
   List<Field> fields = new ArrayList<Field>();
   fields.add(Field.createObject("my_object", A.class));
   b.addIntermediateSchema(new Schema("schema1", fields));
   b.addIntermediateSchema(new Schema("schema2", fields));
   b.setGroupByFields("my_object");
   b.buildConf();
 }
Пример #6
0
 /**
  * Creates a random sort criteria based in the specified schema.
  *
  * @throws TupleMRException
  */
 protected static OrderBy createRandomSortCriteria(Schema schema, int numFields)
     throws TupleMRException {
   List<SortElement> builder = new ArrayList<SortElement>();
   for (int i = 0; i < numFields; i++) {
     Field field = schema.getField(i);
     if (random.nextBoolean()) {
       // With custom comparator
       builder.add(
           new SortElement(
               field.getName(),
               random.nextBoolean() ? Order.ASC : Order.DESC,
               new ReverseEqualsComparator(field.getType(), field.getObjectClass())));
     } else {
       // Without custom comparator
       builder.add(
           new SortElement(field.getName(), random.nextBoolean() ? Order.ASC : Order.DESC));
     }
   }
   return new OrderBy(builder);
 }
  public Job getJob(Configuration conf, String input, String output)
      throws TupleMRException, IOException {
    FileSystem fs = FileSystem.get(conf);
    fs.delete(new Path(output), true);

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("word", Type.STRING));
    fields.add(Field.create("count", Type.INT));
    Schema schema = new Schema("schema", fields);

    TupleMRBuilder cg = new TupleMRBuilder(conf, "Pangool WordCount");
    cg.addIntermediateSchema(schema);
    cg.setGroupByFields("word");
    cg.setJarByClass(PangoolWordCount.class);
    cg.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new Split());
    cg.setOutput(
        new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, Text.class);
    cg.setTupleReducer(new Count());
    cg.setTupleCombiner(new CountCombiner());

    return cg.createJob();
  }
Пример #8
0
 /** Fills the fields specified by the range (minIndex, maxIndex) with random data. */
 protected static void fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) {
   try {
     for (int i = minIndex; i <= maxIndex; i++) {
       Field field = tuple.getSchema().getField(i);
       switch (field.getType()) {
         case INT:
           tuple.set(i, isRandom ? random.nextInt() : 0);
           break;
         case LONG:
           tuple.set(i, isRandom ? random.nextLong() : 0);
           break;
         case BOOLEAN:
           tuple.set(i, isRandom ? random.nextBoolean() : false);
           break;
         case DOUBLE:
           tuple.set(i, isRandom ? random.nextDouble() : 0.0);
           break;
         case FLOAT:
           tuple.set(i, isRandom ? random.nextFloat() : 0f);
           break;
         case STRING:
           fillString(isRandom, tuple, i);
           break;
         case ENUM:
           fillEnum(isRandom, field, tuple, i);
           break;
         case OBJECT:
           fillObject(isRandom, tuple, field, i);
           break;
         default:
           throw new IllegalArgumentException("Not supported type " + field.getType());
       }
     }
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
Пример #9
0
  @Before
  public void init() throws TupleMRException {
    this.schema1 =
        new Schema(
            "schema1", Fields.parse("int_field:int, string_field:string,boolean_field:boolean"));
    this.schema2 =
        new Schema("schema2", Fields.parse("long_field:long,boolean_field:boolean, int_field:int"));

    List<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("int_field", Type.INT));
    fields.add(Field.create("string_field", Type.STRING));
    fields.add(Field.create("long_field", Type.LONG));
    fields.add(Field.create("float_field", Type.FLOAT));
    fields.add(Field.create("double_field", Type.DOUBLE));
    fields.add(Field.create("boolean_field", Type.BOOLEAN));
    fields.add(Field.createEnum("enum_field", Order.class));
    fields.add(Field.createObject("thrift_field", A.class));
    this.schema3 = new Schema("schema3", fields);
  }
Пример #10
0
  @Test
  public void testCompare() {
    ArrayList<Field> fields = new ArrayList<Field>();
    fields.add(Field.create("int", Field.Type.INT));
    Schema s = new Schema("schema", fields);
    Criteria cWithCustom =
        new Criteria(new OrderBy().add("int", Order.ASC, revIntComp).getElements());
    Criteria c = new Criteria(new OrderBy().add("int", Order.ASC).getElements());
    Tuple t1 = new Tuple(s);
    Tuple t2 = new Tuple(s);
    int index[] = new int[] {0};

    t1.set("int", 1);
    t2.set("int", 2);

    SortComparator sortComparator = new SortComparator();

    assertPositive(sortComparator.compare(s, cWithCustom, t1, index, t2, index));
    assertNegative(sortComparator.compare(s, c, t1, index, t2, index));
  }
Пример #11
0
 protected static void fillEnum(boolean isRandom, Field field, ITuple tuple, int index)
     throws Exception {
   Method method = field.getObjectClass().getMethod("values", (Class[]) null);
   Enum[] values = (Enum[]) method.invoke(null);
   tuple.set(index, values[isRandom ? random.nextInt(values.length) : 0]);
 }