コード例 #1
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  // Positive test
  @Test
  public void testSchemaEqualWithNullSchema1() {

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list1.add(new FieldSchema("1b", null));
    list1.add(new FieldSchema("1c", DataType.INTEGER));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list2.add(new FieldSchema("1b", null));
    list2.add(new FieldSchema("1c", DataType.INTEGER));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    // First check
    Assert.assertTrue(Schema.equals(schema1, schema2, false, false));

    // Manipulate
    List<FieldSchema> dummyList = new ArrayList<FieldSchema>();
    Schema dummySchema = new Schema(dummyList);
    list2.get(1).schema = dummySchema;

    // And check again
    Assert.assertFalse(Schema.equals(schema1, schema2, false, false));
  }
コード例 #2
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  @Test
  public void testSchemaEqualTwoLevelAccess() throws Exception {

    List<FieldSchema> innerList1 = new ArrayList<FieldSchema>();
    innerList1.add(new FieldSchema("11a", DataType.INTEGER));
    innerList1.add(new FieldSchema("11b", DataType.LONG));

    List<FieldSchema> innerList2 = new ArrayList<FieldSchema>();
    innerList2.add(new FieldSchema("11a", DataType.INTEGER));
    innerList2.add(new FieldSchema("11b", DataType.LONG));

    Schema innerSchema1 = new Schema(innerList1);
    Schema innerSchema2 = new Schema(innerList2);

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list1.add(new FieldSchema("1b", innerSchema1));
    list1.add(new FieldSchema("1c", DataType.INTEGER));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list2.add(new FieldSchema("1b", innerSchema2));
    list2.add(new FieldSchema("1c", DataType.INTEGER));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    Schema.FieldSchema bagFs1 = new Schema.FieldSchema("b", schema1, DataType.BAG);
    Schema bagSchema1 = new Schema(bagFs1);

    Schema.FieldSchema tupleFs = new Schema.FieldSchema("t", schema2, DataType.TUPLE);
    Schema bagSchema = new Schema(tupleFs);
    bagSchema.setTwoLevelAccessRequired(true);
    Schema.FieldSchema bagFs2 = new Schema.FieldSchema("b", bagSchema, DataType.BAG);
    Schema bagSchema2 = new Schema(bagFs2);

    Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, false));

    innerList2.get(1).alias = "pi";

    Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false));
    Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, true));

    innerList2.get(1).alias = "11b";
    innerList2.get(1).type = DataType.BYTEARRAY;

    Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false));
    Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, true, false));

    innerList2.get(1).type = DataType.LONG;

    Assert.assertTrue(Schema.equals(bagSchema1, bagSchema2, false, false));

    list2.get(0).type = DataType.CHARARRAY;
    Assert.assertFalse(Schema.equals(bagSchema1, bagSchema2, false, false));
  }
コード例 #3
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  @Test
  public void testMergeDifferentSizeAndTypeMismatch1() throws Throwable {

    // Generate two schemas
    List<FieldSchema> innerList1 = new ArrayList<FieldSchema>();
    innerList1.add(new FieldSchema("11a", DataType.INTEGER));
    innerList1.add(new FieldSchema("11b", DataType.FLOAT));
    innerList1.add(new FieldSchema("11c", DataType.CHARARRAY));

    List<FieldSchema> innerList2 = new ArrayList<FieldSchema>();
    innerList2.add(new FieldSchema("22a", DataType.CHARARRAY));
    innerList2.add(new FieldSchema(null, DataType.LONG));

    Schema innerSchema1 = new Schema(innerList1);
    Schema innerSchema2 = new Schema(innerList2);

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.INTEGER));
    list1.add(new FieldSchema("1b", innerSchema1));
    list1.add(new FieldSchema("1c", DataType.LONG));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("2a", DataType.CHARARRAY));
    list2.add(new FieldSchema("2b", innerSchema2));
    list2.add(new FieldSchema("2c", DataType.INTEGER));
    list2.add(new FieldSchema("2d", DataType.MAP));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    // Merge
    Schema mergedSchema = Schema.mergeSchema(schema1, schema2, true, true, true);

    // Generate expected schema
    List<FieldSchema> expectedInnerList = new ArrayList<FieldSchema>();
    expectedInnerList.add(new FieldSchema("22a", DataType.BYTEARRAY));
    expectedInnerList.add(new FieldSchema("11b", DataType.FLOAT));
    expectedInnerList.add(new FieldSchema("11c", DataType.CHARARRAY));

    Schema expectedInner = new Schema(expectedInnerList);

    List<FieldSchema> expectedList = new ArrayList<FieldSchema>();
    expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY));
    expectedList.add(new FieldSchema("2b", expectedInner));
    expectedList.add(new FieldSchema("2c", DataType.LONG));
    expectedList.add(new FieldSchema("2d", DataType.MAP));

    Schema expected = new Schema(expectedList);

    // Compare
    Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false));
  }
コード例 #4
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  @Test
  public void testNormalNestedMerge1() {

    // Generate two schemas
    List<FieldSchema> innerList1 = new ArrayList<FieldSchema>();
    innerList1.add(new FieldSchema("11a", DataType.INTEGER));
    innerList1.add(new FieldSchema("11b", DataType.FLOAT));

    List<FieldSchema> innerList2 = new ArrayList<FieldSchema>();
    innerList2.add(new FieldSchema("22a", DataType.DOUBLE));
    innerList2.add(new FieldSchema(null, DataType.LONG));

    Schema innerSchema1 = new Schema(innerList1);
    Schema innerSchema2 = new Schema(innerList2);

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list1.add(new FieldSchema("1b", innerSchema1));
    list1.add(new FieldSchema("1c", DataType.LONG));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("2a", DataType.BYTEARRAY));
    list2.add(new FieldSchema("2b", innerSchema2));
    list2.add(new FieldSchema("2c", DataType.INTEGER));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    // Merge
    Schema mergedSchema = schema1.merge(schema2, true);

    // Generate expected schema
    List<FieldSchema> expectedInnerList = new ArrayList<FieldSchema>();
    expectedInnerList.add(new FieldSchema("22a", DataType.DOUBLE));
    expectedInnerList.add(new FieldSchema("11b", DataType.FLOAT));

    Schema expectedInner = new Schema(expectedInnerList);

    List<FieldSchema> expectedList = new ArrayList<FieldSchema>();
    expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY));
    expectedList.add(new FieldSchema("2b", expectedInner));
    expectedList.add(new FieldSchema("2c", DataType.LONG));

    Schema expected = new Schema(expectedList);

    // Compare
    Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false));
  }
コード例 #5
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  @Test
  public void testSchemaEqual1() {

    List<FieldSchema> innerList1 = new ArrayList<FieldSchema>();
    innerList1.add(new FieldSchema("11a", DataType.INTEGER));
    innerList1.add(new FieldSchema("11b", DataType.LONG));

    List<FieldSchema> innerList2 = new ArrayList<FieldSchema>();
    innerList2.add(new FieldSchema("11a", DataType.INTEGER));
    innerList2.add(new FieldSchema("11b", DataType.LONG));

    Schema innerSchema1 = new Schema(innerList1);
    Schema innerSchema2 = new Schema(innerList2);

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list1.add(new FieldSchema("1b", innerSchema1));
    list1.add(new FieldSchema("1c", DataType.INTEGER));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list2.add(new FieldSchema("1b", innerSchema2));
    list2.add(new FieldSchema("1c", DataType.INTEGER));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    Assert.assertTrue(Schema.equals(schema1, schema2, false, false));

    innerList2.get(1).alias = "pi";

    Assert.assertFalse(Schema.equals(schema1, schema2, false, false));
    Assert.assertTrue(Schema.equals(schema1, schema2, false, true));

    innerList2.get(1).alias = "11b";
    innerList2.get(1).type = DataType.BYTEARRAY;

    Assert.assertFalse(Schema.equals(schema1, schema2, false, false));
    Assert.assertTrue(Schema.equals(schema1, schema2, true, false));

    innerList2.get(1).type = DataType.LONG;

    Assert.assertTrue(Schema.equals(schema1, schema2, false, false));

    list2.get(0).type = DataType.CHARARRAY;
    Assert.assertFalse(Schema.equals(schema1, schema2, false, false));
  }
コード例 #6
0
  @Test
  public void testImplicitSplitInCoGroup2() throws Exception {
    // this query is similar to the one reported in JIRA - PIG-537
    LogicalPlanTester planTester = new LogicalPlanTester();
    planTester.buildPlan("a = load 'file1' using PigStorage(':') as (name:chararray, marks:int);");
    planTester.buildPlan(
        "b = load 'file2' using PigStorage(':') as (name:chararray, rank:chararray);");
    planTester.buildPlan("c = cogroup a by name, b by name;");
    planTester.buildPlan("d = foreach c generate group, FLATTEN(a.marks) as newmarks;");
    planTester.buildPlan("e = cogroup a by marks, d by newmarks;");
    LogicalPlan plan =
        planTester.buildPlan("f = foreach e generate group, flatten(a), flatten(d);");

    // Set the logical plan values correctly in all the operators
    PlanSetter ps = new PlanSetter(plan);
    ps.visit();

    // run through validator
    CompilationMessageCollector collector = new CompilationMessageCollector();
    TypeCheckingValidator typeValidator = new TypeCheckingValidator();
    typeValidator.validate(plan, collector);
    printMessageCollector(collector);
    printTypeGraph(plan);

    if (collector.hasError()) {
      throw new Exception("Error during type checking");
    }

    // this will run ImplicitSplitInserter
    TestLogicalOptimizer.optimizePlan(plan);

    // get Schema of leaf and compare:
    Schema expectedSchema =
        Util.getSchemaFromString(
            "grp: int,A::username: chararray,A::marks: int,AB::group: chararray,AB::newmarks: int");
    assertTrue(Schema.equals(expectedSchema, plan.getLeaves().get(0).getSchema(), false, true));
  }
コード例 #7
0
ファイル: TestSchema.java プロジェクト: dmeister/pig-cll-gz
  @Test
  public void testMergeNullSchemas1() throws Throwable {

    List<FieldSchema> innerList2 = new ArrayList<FieldSchema>();
    innerList2.add(new FieldSchema("22a", DataType.DOUBLE));
    innerList2.add(new FieldSchema(null, DataType.LONG));

    Schema innerSchema2 = new Schema(innerList2);

    List<FieldSchema> list1 = new ArrayList<FieldSchema>();
    list1.add(new FieldSchema("1a", DataType.BYTEARRAY));
    list1.add(new FieldSchema("1b", null));
    list1.add(new FieldSchema("1c", DataType.LONG));

    List<FieldSchema> list2 = new ArrayList<FieldSchema>();
    list2.add(new FieldSchema("2a", DataType.BYTEARRAY));
    list2.add(new FieldSchema("2b", innerSchema2));
    list2.add(new FieldSchema("2c", DataType.INTEGER));

    Schema schema1 = new Schema(list1);
    Schema schema2 = new Schema(list2);

    // Merge
    Schema mergedSchema = Schema.mergeSchema(schema1, schema2, true, false, true);

    // Generate expected schema

    List<FieldSchema> expectedList = new ArrayList<FieldSchema>();
    expectedList.add(new FieldSchema("2a", DataType.BYTEARRAY));
    expectedList.add(new FieldSchema("2b", null));
    expectedList.add(new FieldSchema("2c", DataType.LONG));

    Schema expected = new Schema(expectedList);

    // Compare
    Assert.assertTrue(Schema.equals(mergedSchema, expected, false, false));
  }