// Positive test @Test public void testSchemaEqualWithNullSchema1() { List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.BYTEARRAY)); list1.add(new FieldSchema("1b", null)); list1.add(new FieldSchema("1c", DataType.INTEGER)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("1a", DataType.BYTEARRAY)); list2.add(new FieldSchema("1b", null)); list2.add(new FieldSchema("1c", DataType.INTEGER)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); // First check Assert.assertTrue(Schema.equals(schema1, schema2, false, false)); // Manipulate List<FieldSchema> dummyList = new ArrayList<FieldSchema>(); Schema dummySchema = new Schema(dummyList); list2.get(1).schema = dummySchema; // And check again Assert.assertFalse(Schema.equals(schema1, schema2, false, false)); }
@Test public void testSchemaEqualTwoLevelAccess() throws Exception { List<FieldSchema> innerList1 = new ArrayList<FieldSchema>(); innerList1.add(new FieldSchema("11a", DataType.INTEGER)); innerList1.add(new FieldSchema("11b", DataType.LONG)); List<FieldSchema> innerList2 = new ArrayList<FieldSchema>(); innerList2.add(new FieldSchema("11a", DataType.INTEGER)); innerList2.add(new FieldSchema("11b", DataType.LONG)); Schema innerSchema1 = new Schema(innerList1); Schema innerSchema2 = new Schema(innerList2); List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.BYTEARRAY)); list1.add(new FieldSchema("1b", innerSchema1)); list1.add(new FieldSchema("1c", DataType.INTEGER)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("1a", DataType.BYTEARRAY)); list2.add(new FieldSchema("1b", innerSchema2)); list2.add(new FieldSchema("1c", DataType.INTEGER)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); Schema.FieldSchema bagFs1 = new Schema.FieldSchema("b", schema1, DataType.BAG); Schema bagSchema1 = new Schema(bagFs1); Schema.FieldSchema tupleFs = new Schema.FieldSchema("t", schema2, DataType.TUPLE); Schema bagSchema = new Schema(tupleFs); bagSchema.setTwoLevelAccessRequired(true); Schema.FieldSchema bagFs2 = new Schema.FieldSchema("b", bagSchema, DataType.BAG); Schema bagSchema2 = new Schema(bagFs2); Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, false)); innerList2.get(1).alias = "pi"; Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false)); Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, true)); innerList2.get(1).alias = "11b"; innerList2.get(1).type = DataType.BYTEARRAY; Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false)); Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, true, false)); innerList2.get(1).type = DataType.LONG; Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, false)); list2.get(0).type = DataType.CHARARRAY; Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false)); }
@Test public void testMergeDifferentSizeAndTypeMismatch1() throws Throwable { // Generate two schemas List<FieldSchema> innerList1 = new ArrayList<FieldSchema>(); innerList1.add(new FieldSchema("11a", DataType.INTEGER)); innerList1.add(new FieldSchema("11b", DataType.FLOAT)); innerList1.add(new FieldSchema("11c", DataType.CHARARRAY)); List<FieldSchema> innerList2 = new ArrayList<FieldSchema>(); innerList2.add(new FieldSchema("22a", DataType.CHARARRAY)); innerList2.add(new FieldSchema(null, DataType.LONG)); Schema innerSchema1 = new Schema(innerList1); Schema innerSchema2 = new Schema(innerList2); List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.INTEGER)); list1.add(new FieldSchema("1b", innerSchema1)); list1.add(new FieldSchema("1c", DataType.LONG)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("2a", DataType.CHARARRAY)); list2.add(new FieldSchema("2b", innerSchema2)); list2.add(new FieldSchema("2c", DataType.INTEGER)); list2.add(new FieldSchema("2d", DataType.MAP)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); // Merge Schema mergedSchema = Schema.mergeSchema(schema1, schema2, true, true, true); // Generate expected schema List<FieldSchema> expectedInnerList = new ArrayList<FieldSchema>(); expectedInnerList.add(new FieldSchema("22a", DataType.BYTEARRAY)); expectedInnerList.add(new FieldSchema("11b", DataType.FLOAT)); expectedInnerList.add(new FieldSchema("11c", DataType.CHARARRAY)); Schema expectedInner = new Schema(expectedInnerList); List<FieldSchema> expectedList = new ArrayList<FieldSchema>(); expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY)); expectedList.add(new FieldSchema("2b", expectedInner)); expectedList.add(new FieldSchema("2c", DataType.LONG)); expectedList.add(new FieldSchema("2d", DataType.MAP)); Schema expected = new Schema(expectedList); // Compare Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false)); }
@Test public void testNormalNestedMerge1() { // Generate two schemas List<FieldSchema> innerList1 = new ArrayList<FieldSchema>(); innerList1.add(new FieldSchema("11a", DataType.INTEGER)); innerList1.add(new FieldSchema("11b", DataType.FLOAT)); List<FieldSchema> innerList2 = new ArrayList<FieldSchema>(); innerList2.add(new FieldSchema("22a", DataType.DOUBLE)); innerList2.add(new FieldSchema(null, DataType.LONG)); Schema innerSchema1 = new Schema(innerList1); Schema innerSchema2 = new Schema(innerList2); List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.BYTEARRAY)); list1.add(new FieldSchema("1b", innerSchema1)); list1.add(new FieldSchema("1c", DataType.LONG)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("2a", DataType.BYTEARRAY)); list2.add(new FieldSchema("2b", innerSchema2)); list2.add(new FieldSchema("2c", DataType.INTEGER)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); // Merge Schema mergedSchema = schema1.merge(schema2, true); // Generate expected schema List<FieldSchema> expectedInnerList = new ArrayList<FieldSchema>(); expectedInnerList.add(new FieldSchema("22a", DataType.DOUBLE)); expectedInnerList.add(new FieldSchema("11b", DataType.FLOAT)); Schema expectedInner = new Schema(expectedInnerList); List<FieldSchema> expectedList = new ArrayList<FieldSchema>(); expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY)); expectedList.add(new FieldSchema("2b", expectedInner)); expectedList.add(new FieldSchema("2c", DataType.LONG)); Schema expected = new Schema(expectedList); // Compare Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false)); }
@Test public void testSchemaEqual1() { List<FieldSchema> innerList1 = new ArrayList<FieldSchema>(); innerList1.add(new FieldSchema("11a", DataType.INTEGER)); innerList1.add(new FieldSchema("11b", DataType.LONG)); List<FieldSchema> innerList2 = new ArrayList<FieldSchema>(); innerList2.add(new FieldSchema("11a", DataType.INTEGER)); innerList2.add(new FieldSchema("11b", DataType.LONG)); Schema innerSchema1 = new Schema(innerList1); Schema innerSchema2 = new Schema(innerList2); List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.BYTEARRAY)); list1.add(new FieldSchema("1b", innerSchema1)); list1.add(new FieldSchema("1c", DataType.INTEGER)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("1a", DataType.BYTEARRAY)); list2.add(new FieldSchema("1b", innerSchema2)); list2.add(new FieldSchema("1c", DataType.INTEGER)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); Assert.assertTrue(Schema.equals(schema1, schema2, false, false)); innerList2.get(1).alias = "pi"; Assert.assertFalse(Schema.equals(schema1, schema2, false, false)); Assert.assertTrue(Schema.equals(schema1, schema2, false, true)); innerList2.get(1).alias = "11b"; innerList2.get(1).type = DataType.BYTEARRAY; Assert.assertFalse(Schema.equals(schema1, schema2, false, false)); Assert.assertTrue(Schema.equals(schema1, schema2, true, false)); innerList2.get(1).type = DataType.LONG; Assert.assertTrue(Schema.equals(schema1, schema2, false, false)); list2.get(0).type = DataType.CHARARRAY; Assert.assertFalse(Schema.equals(schema1, schema2, false, false)); }
@Test public void testImplicitSplitInCoGroup2() throws Exception { // this query is similar to the one reported in JIRA - PIG-537 LogicalPlanTester planTester = new LogicalPlanTester(); planTester.buildPlan("a = load 'file1' using PigStorage(':') as (name:chararray, marks:int);"); planTester.buildPlan( "b = load 'file2' using PigStorage(':') as (name:chararray, rank:chararray);"); planTester.buildPlan("c = cogroup a by name, b by name;"); planTester.buildPlan("d = foreach c generate group, FLATTEN(a.marks) as newmarks;"); planTester.buildPlan("e = cogroup a by marks, d by newmarks;"); LogicalPlan plan = planTester.buildPlan("f = foreach e generate group, flatten(a), flatten(d);"); // Set the logical plan values correctly in all the operators PlanSetter ps = new PlanSetter(plan); ps.visit(); // run through validator CompilationMessageCollector collector = new CompilationMessageCollector(); TypeCheckingValidator typeValidator = new TypeCheckingValidator(); typeValidator.validate(plan, collector); printMessageCollector(collector); printTypeGraph(plan); if (collector.hasError()) { throw new Exception("Error during type checking"); } // this will run ImplicitSplitInserter TestLogicalOptimizer.optimizePlan(plan); // get Schema of leaf and compare: Schema expectedSchema = Util.getSchemaFromString( "grp: int,A::username: chararray,A::marks: int,AB::group: chararray,AB::newmarks: int"); assertTrue(Schema.equals(expectedSchema, plan.getLeaves().get(0).getSchema(), false, true)); }
@Test public void testMergeNullSchemas1() throws Throwable { List<FieldSchema> innerList2 = new ArrayList<FieldSchema>(); innerList2.add(new FieldSchema("22a", DataType.DOUBLE)); innerList2.add(new FieldSchema(null, DataType.LONG)); Schema innerSchema2 = new Schema(innerList2); List<FieldSchema> list1 = new ArrayList<FieldSchema>(); list1.add(new FieldSchema("1a", DataType.BYTEARRAY)); list1.add(new FieldSchema("1b", null)); list1.add(new FieldSchema("1c", DataType.LONG)); List<FieldSchema> list2 = new ArrayList<FieldSchema>(); list2.add(new FieldSchema("2a", DataType.BYTEARRAY)); list2.add(new FieldSchema("2b", innerSchema2)); list2.add(new FieldSchema("2c", DataType.INTEGER)); Schema schema1 = new Schema(list1); Schema schema2 = new Schema(list2); // Merge Schema mergedSchema = Schema.mergeSchema(schema1, schema2, true, false, true); // Generate expected schema List<FieldSchema> expectedList = new ArrayList<FieldSchema>(); expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY)); expectedList.add(new FieldSchema("2b", null)); expectedList.add(new FieldSchema("2c", DataType.LONG)); Schema expected = new Schema(expectedList); // Compare Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false)); }