Ejemplo n.º 1
0
  @Test
  public void testNewMergeMismatchType2() throws Throwable {
    LogicalSchema a =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString("a1:chararray, b1:(b11:double, b12:(b121:int)), c1:long"));
    LogicalSchema b =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString("a2:bytearray, b2:(b21:double, b22:long), c2:int"));

    LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union);
    LogicalSchema expected =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString("a1:chararray, b1:(), c1:long"));
    expected.getField(1).schema = new LogicalSchema();
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));

    try {
      LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach);
      Assert.fail();
    } catch (FrontendException e) {
      Assert.assertTrue(e.getErrorCode() == 1031);
    }

    try {
      LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach);
      Assert.fail();
    } catch (FrontendException e) {
      Assert.assertTrue(e.getErrorCode() == 1031);
    }
  }
Ejemplo n.º 2
0
  @Test
  public void testNewNormalNestedMerge2() throws Exception {
    LogicalSchema a =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString(
                "a1:(a11:chararray, a12:float), b1:(b11:chararray, b12:float), c1:long"));
    LogicalSchema b =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString("a2:bytearray, b2:(b21:double, b22:long), c2:chararray"));

    LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union);
    LogicalSchema expected =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString("a1:(a11:chararray, a12:float), b1:(), c1:bytearray"));
    expected.getField(1).schema = new LogicalSchema();
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));

    mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach);
    expected =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString(
                "a1:(a11:chararray, a12:float), b1:(b11:chararray, b12:float), c1:long"));
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));

    mergedSchema = LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach);
    expected =
        org.apache.pig.newplan.logical.Util.translateSchema(
            Utils.getSchemaFromString(
                "a2:(a11:chararray, a12:float), b2:(b21:double, b22:long), c2:chararray"));
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));
  }
Ejemplo n.º 3
0
 @Test
 // See PIG-730
 public void testMergeSchemaWithTwoLevelAccess() throws Exception {
   // Generate two schemas
   Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}");
   Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}");
   s1.getField(0).schema.setTwoLevelAccessRequired(true);
   s1.getField(0).schema.setTwoLevelAccessRequired(false);
   Schema s3 = Schema.mergeSchema(s1, s2, true);
   Assert.assertEquals(s3, s2);
 }
Ejemplo n.º 4
0
 @Test
 // See PIG-730
 public void testMergeSchemaWithTwoLevelAccess3() throws Exception {
   // Generate two schemas
   LogicalSchema ls1 = Utils.parseSchema("a:{t:(a0:int, a1:int)}");
   LogicalSchema ls2 = Utils.parseSchema("b:{t:(b0:int, b1:int)}");
   LogicalSchema ls3 = LogicalSchema.merge(ls1, ls2, MergeMode.LoadForEach);
   Assert.assertTrue(
       org.apache.pig.newplan.logical.Util.translateSchema(ls3)
           .toString()
           .equals("{a: {(a0: int,a1: int)}}"));
 }
Ejemplo n.º 5
0
  @Test
  public void testNewMergeDifferentSize1() throws Throwable {
    LogicalSchema a = Utils.parseSchema("a1:bytearray, b1:long, c1:long");
    LogicalSchema b = Utils.parseSchema("a2:bytearray, b2:long");

    LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union);
    Assert.assertTrue(mergedSchema == null);

    try {
      LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach);
      Assert.fail();
    } catch (FrontendException e) {
      Assert.assertTrue(e.getErrorCode() == 1031);
    }
  }
Ejemplo n.º 6
0
  @Test
  public void testNewMergeNullSchemas() throws Throwable {
    LogicalSchema a = Utils.parseSchema("a1:bytearray, b1:(b11:int, b12:float), c1:long");
    LogicalSchema b = Utils.parseSchema("a2:bytearray, b2:(), c2:int");

    LogicalSchema mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.Union);
    LogicalSchema expected = Utils.parseSchema("a1:bytearray, b1:(), c1:long");
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));

    mergedSchema = LogicalSchema.merge(a, b, LogicalSchema.MergeMode.LoadForEach);
    expected = Utils.parseSchema("a1:bytearray, b1:(b11:int, b12:float), c1:long");
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));

    mergedSchema = LogicalSchema.merge(b, a, LogicalSchema.MergeMode.LoadForEach);
    expected = Utils.parseSchema("a2:bytearray, b2:(b11:int,b12:float), c2:int");
    Assert.assertTrue(LogicalSchema.equals(mergedSchema, expected, false, false));
  }
Ejemplo n.º 7
0
    /** Configures the Reduce plan, the POPackage operator and the reporter thread */
    @SuppressWarnings("unchecked")
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      inIllustrator = inIllustrator(context);
      if (inIllustrator) pack = getPack(context);
      Configuration jConf = context.getConfiguration();
      SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf));
      context
          .getConfiguration()
          .set(
              PigConstants.TASK_INDEX,
              Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
      sJobContext = context;
      sJobConfInternal.set(context.getConfiguration());
      sJobConf = context.getConfiguration();
      try {
        PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list")));
        pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext"));

        // This attempts to fetch all of the generated code from the distributed cache, and resolve
        // it
        SchemaTupleBackend.initialize(jConf, pigContext);

        if (rp == null)
          rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan"));
        stores = PlanHelper.getPhysicalOperators(rp, POStore.class);

        if (!inIllustrator)
          pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package"));
        // To be removed
        if (rp.isEmpty()) log.debug("Reduce Plan empty!");
        else {
          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          rp.explain(baos);
          log.debug(baos.toString());
        }
        pigReporter = new ProgressableReporter();
        if (!(rp.isEmpty())) {
          roots = rp.getRoots().toArray(new PhysicalOperator[1]);
          leaf = rp.getLeaves().get(0);
        }

        // Get the UDF specific context
        MapRedUtil.setupUDFContext(jConf);

      } catch (IOException ioe) {
        String msg = "Problem while configuring reduce plan.";
        throw new RuntimeException(msg, ioe);
      }

      log.info(
          "Aliases being processed per job phase (AliasName[line,offset]): "
              + jConf.get("pig.alias.location"));

      Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
    }
 protected void doAllPredecessors(O node, Set<O> seen, Collection<O> fifo)
     throws VisitorException {
   if (!seen.contains(node)) {
     // We haven't seen this one before.
     Collection<O> preds =
         Utils.mergeCollection(mPlan.getPredecessors(node), mPlan.getSoftLinkPredecessors(node));
     if (preds != null && preds.size() > 0) {
       // Do all our predecessors before ourself
       for (O op : preds) {
         doAllPredecessors(op, seen, fifo);
       }
     }
     // Now do ourself
     seen.add(node);
     fifo.add(node);
   }
 }
Ejemplo n.º 9
0
  // PIG-2146
  @Test
  public void testSchemaInStoreForDistinctLimit() throws Exception {
    // test if the POStore in the 2nd mr plan (that stores the actual output)
    // has a schema
    String query =
        "a = load 'input1' as (a : int,b :float ,c : int);"
            + "b  = distinct a;"
            + "c = limit b 10;"
            + "store c into 'output';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    MapReduceOper secondMrOper = mrPlan.getLeaves().get(0);
    POStore store = (POStore) secondMrOper.reducePlan.getLeaves().get(0);
    assertEquals(
        "compare load and store schema",
        store.getSchema(),
        Utils.getSchemaFromString("a : int,b :float ,c : int"));
  }
Ejemplo n.º 10
0
  public void prepareToWrite(RecordWriter writer) {
    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] {udfContextSignature});

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema != null) {
      // Parse the schema from the string stored in the properties object.
      try {
        schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
      } catch (ParserException pex) {
        logger.warn("Could not parse schema for storing.");
      }
    }

    if (headerTreatment == Headers.DEFAULT) {
      headerTreatment = Headers.SKIP_OUTPUT_HEADER;
    }

    // PigStorage's prepareToWrite()
    super.prepareToWrite(writer);
  }
Ejemplo n.º 11
0
 @Override
 public ResourceSchema getSchema(String location, Job job) throws IOException {
   return Utils.getSchema(this, location, true, job);
 }