@Test public void testCommonSortByToCriteria() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema( new Schema("schema1", Fields.parse("a:int,b:string,c:string,blabla:string"))); b.addIntermediateSchema( new Schema("schema2", Fields.parse("a:int,c:string,b:string,bloblo:string"))); b.setGroupByFields("c", "b"); b.setOrderBy( new OrderBy() .add("b", Order.ASC) .add("c", Order.DESC) .addSchemaOrder(Order.DESC) .add("a", Order.DESC)); b.setSpecificOrderBy("schema1", new OrderBy().add("blabla", Order.DESC)); TupleMRConfig config = b.buildConf(); config.getSerializationInfo(); { List<SortElement> expectedCommon = new ArrayList<SortElement>(); expectedCommon.add(new SortElement("b", Order.ASC, Criteria.NullOrder.NULL_SMALLEST)); expectedCommon.add(new SortElement("c", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedCommon), config.getCommonCriteria()); } { List<SortElement> expectedSchema1 = new ArrayList<SortElement>(); expectedSchema1.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); expectedSchema1.add(new SortElement("blabla", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedSchema1), config.getSpecificOrderBys().get(0)); } { List<SortElement> expectedSchema2 = new ArrayList<SortElement>(); expectedSchema2.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedSchema2), config.getSpecificOrderBys().get(1)); } }
@Test(expected = TupleMRException.class) public void testCommonOrderNotEmpty() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.setGroupByFields("b"); b.setOrderBy(new OrderBy()); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testNotAllowedSourceOrderInOneSource() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC)); b.buildConf(); }
@Test public void testAliases1() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema( new Schema("schema1", Fields.parse("ax:int,bx:string,cx:string,blablax:string,p2:string"))); b.addIntermediateSchema( new Schema("schema2", Fields.parse("ay:int,cy:string,by:string,blobloy:string,p:string"))); { Aliases aliases1 = new Aliases(); aliases1.add("a", "ax"); aliases1.add("b", "bx"); aliases1.add("c", "cx"); aliases1.add("blabla", "blablax"); aliases1.add("p", "p2"); b.setFieldAliases("schema1", aliases1); } { Aliases aliases2 = new Aliases(); aliases2.add("a", "ay"); aliases2.add("b", "by"); aliases2.add("c", "cy"); aliases2.add("bloblo", "blobloy"); b.setFieldAliases("schema2", aliases2); } b.setGroupByFields("c", "b"); b.setOrderBy( new OrderBy() .add("b", Order.ASC) .add("c", Order.DESC) .addSchemaOrder(Order.DESC) .add("a", Order.DESC)); b.setSpecificOrderBy("schema1", new OrderBy().add("blabla", Order.DESC)); b.setCustomPartitionFields("p"); TupleMRConfig config = b.buildConf(); SerializationInfo serInfo = config.getSerializationInfo(); System.out.println(serInfo.getCommonSchema()); System.out.println(serInfo.getPartitionFieldsIndexes()); { List<SortElement> expectedCommon = new ArrayList<SortElement>(); expectedCommon.add(new SortElement("b", Order.ASC, Criteria.NullOrder.NULL_SMALLEST)); expectedCommon.add(new SortElement("c", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedCommon), config.getCommonCriteria()); } { List<SortElement> expectedSchema1 = new ArrayList<SortElement>(); expectedSchema1.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); expectedSchema1.add(new SortElement("blabla", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedSchema1), config.getSpecificOrderBys().get(0)); } { List<SortElement> expectedSchema2 = new ArrayList<SortElement>(); expectedSchema2.add(new SortElement("a", Order.DESC, Criteria.NullOrder.NULL_SMALLEST)); Assert.assertEquals(new Criteria(expectedSchema2), config.getSpecificOrderBys().get(1)); } }
@Test(expected = TupleMRException.class) public void testSortFieldWithDifferentTypes2() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:boolean"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC).add("b", Order.DESC)); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testRollupPrefixGroupBy() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.setGroupByFields("b"); b.setOrderBy(new OrderBy().add("b", Order.DESC)); b.setRollupFrom(null); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testCommonOrderPrefixGroupBy2() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string,c:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string,d:string"))); b.setGroupByFields("a", "b"); b.setOrderBy(new OrderBy().add("b", Order.ASC).addSchemaOrder(Order.DESC).add("a", Order.DESC)); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testCustomPartitionFieldsPresentWithSameType() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:long"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC)); b.setCustomPartitionFields("b"); b.buildConf(); }
@Test public void testCorrect2() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC).add("b", Order.DESC)); TupleMRConfig conf = b.buildConf(); conf.getSerializationInfo(); }
@Test(expected = TupleMRException.class) public void testSourceOrderPresentInCommonWhenSecondarySet() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:string"))); b.setGroupByFields("b"); b.setOrderBy(new OrderBy().add("b", Order.DESC)); b.setSpecificOrderBy("schema1", new OrderBy().add("a", Order.ASC)); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testSecondaryOrderNotEmpty() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("c:int,b:string"))); b.setGroupByFields("b"); b.setOrderBy(new OrderBy().add("b", Order.DESC).addSchemaOrder(Order.DESC)); b.setSpecificOrderBy("schema1", new OrderBy()); b.buildConf(); }
@Test(expected = TupleMRException.class) public void testCustomPartitionFieldsNotNull() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC)); String[] array = null; b.setCustomPartitionFields(array); b.buildConf(); }
@Ignore @Test(expected = UnsupportedOperationException.class) public void testNotMutableConfig() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("b:string,a:int"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC)); b.setCustomPartitionFields("b"); b.buildConf(); // TODO }
@Test(expected = TupleMRException.class) public void testSortFieldWithDifferentTypes1() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("a:int,b:boolean"))); b.setGroupByFields("a"); // not allowed to sort in common order by a field that has different types // even after source order // it can be confusing b.setOrderBy(new OrderBy().add("a", Order.ASC).addSchemaOrder(Order.DESC).add("b", Order.DESC)); b.buildConf(); }
@Test public void testCustomPartition() throws TupleMRException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(new Schema("schema1", Fields.parse("a:int,b:string"))); b.addIntermediateSchema(new Schema("schema2", Fields.parse("b:string,a:int"))); b.setGroupByFields("a"); b.setOrderBy(new OrderBy().add("a", Order.ASC)); b.setCustomPartitionFields("b"); TupleMRConfig config = b.buildConf(); System.out.println(config); SerializationInfo serInfo = config.getSerializationInfo(); int[] indexes0 = serInfo.getFieldsToPartition(0); int[] indexes1 = serInfo.getFieldsToPartition(1); Assert.assertArrayEquals(new int[] {1}, indexes0); Assert.assertArrayEquals(new int[] {0}, indexes1); }
@Test public void testWithFieldAliases() throws TupleMRException, IOException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); Schema schema1 = new Schema( "schema1", Fields.parse("user_id:int,operation:string,age:long,timestamp:int,country:string")); Schema schema2 = new Schema("schema2", Fields.parse("id:int,op:string,another_id:int,time:int")); b.addIntermediateSchema(schema1); b.addIntermediateSchema(schema2); b.setFieldAliases("schema1", new Aliases().add("id", "user_id").add("op", "operation")); b.setFieldAliases("schema2", new Aliases().add("timestamp", "time")); b.setGroupByFields("id", "op"); b.setOrderBy( new OrderBy() .add("op", Order.ASC) .add("id", Order.DESC) .addSchemaOrder(Order.DESC) .add("timestamp", Order.DESC)); b.setSpecificOrderBy("schema1", new OrderBy().add("country", Order.DESC)); TupleMRConfig conf = b.buildConf(); Configuration hconf = new Configuration(); TupleMRConfig.set(conf, hconf); TupleMRConfig deserConf = TupleMRConfig.get(hconf); System.out.println(conf); System.out.println("------------"); System.out.println(deserConf); Assert.assertEquals(conf, deserConf); hconf = new Configuration(); TupleMRConfig.set(deserConf, hconf); TupleMRConfig deserConf2 = TupleMRConfig.get(hconf); Assert.assertEquals(conf, deserConf2); }
@Test public void testExtended() throws TupleMRException, IOException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(schema1); b.addIntermediateSchema(schema2); b.addIntermediateSchema(schema3); b.setGroupByFields("int_field"); b.setOrderBy( new OrderBy() .add("int_field", Order.DESC) .addSchemaOrder(Order.DESC) .add("boolean_field", Order.DESC)); b.setRollupFrom("int_field"); b.setSpecificOrderBy( schema3.getName(), new OrderBy() .add( "thrift_field", Order.ASC, Criteria.NullOrder.NULL_SMALLEST, new DummyComparator())); TupleMRConfig conf = b.buildConf(); Configuration hconf = new Configuration(); TupleMRConfig.set(conf, hconf); TupleMRConfig deserConf = TupleMRConfig.get(hconf); System.out.println(conf); System.out.println("------------"); System.out.println(deserConf); Assert.assertEquals(conf, deserConf); hconf = new Configuration(); TupleMRConfig.set(deserConf, hconf); TupleMRConfig deserConf2 = TupleMRConfig.get(hconf); Assert.assertEquals(conf, deserConf2); }
@Test public void testWithCustomPartitionFields() throws TupleMRException, IOException { TupleMRConfigBuilder b = new TupleMRConfigBuilder(); b.addIntermediateSchema(schema1); b.addIntermediateSchema(schema2); b.addIntermediateSchema(schema3); b.setGroupByFields("int_field"); b.setOrderBy( new OrderBy() .add("int_field", Order.DESC) .addSchemaOrder(Order.DESC) .add("boolean_field", Order.DESC)); b.setRollupFrom("int_field"); b.setSpecificOrderBy(schema3.getName(), new OrderBy().add("thrift_field", Order.ASC)); b.setCustomPartitionFields("int_field", "boolean_field"); TupleMRConfig conf = b.buildConf(); TupleMRConfig deserConf = TupleMRConfig.parse(conf.toString()); Assert.assertEquals(conf, deserConf); TupleMRConfig deserConf2 = TupleMRConfig.parse(deserConf.toString()); Assert.assertEquals(conf, deserConf2); System.out.println(conf); System.out.println(deserConf2); }