@Test(expected = TupleMRException.class) public void testGroupByObjectDifferentSerialization() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); List<Field> fields = new ArrayList<Field>(); fields.add(Field.createObject("my_object", A.class)); b.addIntermediateSchema(new Schema("schema1", fields)); fields.clear(); Field avroField = Field.createObject("my_object", Object.class); avroField.setObjectSerialization(AvroFieldSerialization.class); fields.add(avroField); b.addIntermediateSchema(new Schema("schema2", fields)); b.setGroupByFields("my_object"); b.buildConf(); }
@Test public void testMoreThanEightProps() { List<Field> fields = new ArrayList<Field>(30); for (int i = 0; i < 30; i++) { fields.add(Field.create("field" + i, Type.STRING)); } Schema schema = new Schema("bigmammaschema", fields); ITuple tuple = new Tuple(schema); for (int i = 0; i < 30; i++) { // Fill in with some default values tuple.set("field" + i, "defaultValue" + i); } NullableTuple nullableTuple = new NullableTuple(tuple); for (int i = 0; i < 30; i++) { // Assert the default values assertEquals("defaultValue" + i, nullableTuple.get("field" + i)); } // Set fields to null one by one and assert that things go well for (int i = 0; i < 30; i++) { nullableTuple.set("field" + i, null); assertEquals(null, nullableTuple.getNullable("field" + i)); nullableTuple.set("field" + i, "defaultValue" + i); assertEquals("defaultValue" + i, nullableTuple.getNullable("field" + i)); } }
protected static void fillObject(boolean isRandom, ITuple tuple, Field field, int index) { Object instance = ReflectionUtils.newInstance(field.getObjectClass(), null); if (instance instanceof A) { A a = (A) instance; a.setId(isRandom ? random.nextInt() + "" : ""); a.setUrl(isRandom ? random.nextLong() + "" : ""); } tuple.set(index, instance); }
static { List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("int_field", Type.INT)); fields.add(Field.create("string_field", Type.STRING)); fields.add(Field.create("long_field", Type.LONG)); fields.add(Field.create("float_field", Type.FLOAT)); fields.add(Field.create("double_field", Type.DOUBLE)); fields.add(Field.create("boolean_field", Type.BOOLEAN)); fields.add(Field.createEnum("enum_field", Order.class)); fields.add(Field.createObject("thrift_field", A.class)); SCHEMA = new Schema("schema", fields); }
@Test public void testGroupByObjectSameSerialization() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); List<Field> fields = new ArrayList<Field>(); fields.add(Field.createObject("my_object", A.class)); b.addIntermediateSchema(new Schema("schema1", fields)); b.addIntermediateSchema(new Schema("schema2", fields)); b.setGroupByFields("my_object"); b.buildConf(); }
/** * Creates a random sort criteria based in the specified schema. * * @throws TupleMRException */ protected static OrderBy createRandomSortCriteria(Schema schema, int numFields) throws TupleMRException { List<SortElement> builder = new ArrayList<SortElement>(); for (int i = 0; i < numFields; i++) { Field field = schema.getField(i); if (random.nextBoolean()) { // With custom comparator builder.add( new SortElement( field.getName(), random.nextBoolean() ? Order.ASC : Order.DESC, new ReverseEqualsComparator(field.getType(), field.getObjectClass()))); } else { // Without custom comparator builder.add( new SortElement(field.getName(), random.nextBoolean() ? Order.ASC : Order.DESC)); } } return new OrderBy(builder); }
public Job getJob(Configuration conf, String input, String output) throws TupleMRException, IOException { FileSystem fs = FileSystem.get(conf); fs.delete(new Path(output), true); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("word", Type.STRING)); fields.add(Field.create("count", Type.INT)); Schema schema = new Schema("schema", fields); TupleMRBuilder cg = new TupleMRBuilder(conf, "Pangool WordCount"); cg.addIntermediateSchema(schema); cg.setGroupByFields("word"); cg.setJarByClass(PangoolWordCount.class); cg.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class), new Split()); cg.setOutput( new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class, Text.class); cg.setTupleReducer(new Count()); cg.setTupleCombiner(new CountCombiner()); return cg.createJob(); }
/** Fills the fields specified by the range (minIndex, maxIndex) with random data. */ protected static void fillTuple(boolean isRandom, ITuple tuple, int minIndex, int maxIndex) { try { for (int i = minIndex; i <= maxIndex; i++) { Field field = tuple.getSchema().getField(i); switch (field.getType()) { case INT: tuple.set(i, isRandom ? random.nextInt() : 0); break; case LONG: tuple.set(i, isRandom ? random.nextLong() : 0); break; case BOOLEAN: tuple.set(i, isRandom ? random.nextBoolean() : false); break; case DOUBLE: tuple.set(i, isRandom ? random.nextDouble() : 0.0); break; case FLOAT: tuple.set(i, isRandom ? random.nextFloat() : 0f); break; case STRING: fillString(isRandom, tuple, i); break; case ENUM: fillEnum(isRandom, field, tuple, i); break; case OBJECT: fillObject(isRandom, tuple, field, i); break; default: throw new IllegalArgumentException("Not supported type " + field.getType()); } } } catch (Exception e) { throw new RuntimeException(e); } }
@Before public void init() throws TupleMRException { this.schema1 = new Schema( "schema1", Fields.parse("int_field:int, string_field:string,boolean_field:boolean")); this.schema2 = new Schema("schema2", Fields.parse("long_field:long,boolean_field:boolean, int_field:int")); List<Field> fields = new ArrayList<Field>(); fields.add(Field.create("int_field", Type.INT)); fields.add(Field.create("string_field", Type.STRING)); fields.add(Field.create("long_field", Type.LONG)); fields.add(Field.create("float_field", Type.FLOAT)); fields.add(Field.create("double_field", Type.DOUBLE)); fields.add(Field.create("boolean_field", Type.BOOLEAN)); fields.add(Field.createEnum("enum_field", Order.class)); fields.add(Field.createObject("thrift_field", A.class)); this.schema3 = new Schema("schema3", fields); }
@Test public void testCompare() { ArrayList<Field> fields = new ArrayList<Field>(); fields.add(Field.create("int", Field.Type.INT)); Schema s = new Schema("schema", fields); Criteria cWithCustom = new Criteria(new OrderBy().add("int", Order.ASC, revIntComp).getElements()); Criteria c = new Criteria(new OrderBy().add("int", Order.ASC).getElements()); Tuple t1 = new Tuple(s); Tuple t2 = new Tuple(s); int index[] = new int[] {0}; t1.set("int", 1); t2.set("int", 2); SortComparator sortComparator = new SortComparator(); assertPositive(sortComparator.compare(s, cWithCustom, t1, index, t2, index)); assertNegative(sortComparator.compare(s, c, t1, index, t2, index)); }
protected static void fillEnum(boolean isRandom, Field field, ITuple tuple, int index) throws Exception { Method method = field.getObjectClass().getMethod("values", (Class[]) null); Enum[] values = (Enum[]) method.invoke(null); tuple.set(index, values[isRandom ? random.nextInt(values.length) : 0]); }