Esempio n. 1
0
  /**
   * Change the algebriac function type for algebraic functions in map and combine In map and
   * combine the algebraic functions will be leaf of the plan
   *
   * @param fe
   * @param type
   * @throws PlanException
   */
  private static void changeFunc(POForEach fe, byte type) throws PlanException {
    for (PhysicalPlan plan : fe.getInputPlans()) {
      List<PhysicalOperator> leaves = plan.getLeaves();
      if (leaves == null || leaves.size() != 1) {
        int errCode = 2019;
        String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves.";
        throw new PlanException(msg, errCode, PigException.BUG);
      }

      PhysicalOperator leaf = leaves.get(0);
      if (leaf instanceof POProject) {
        continue;
      }
      if (!(leaf instanceof POUserFunc)) {
        int errCode = 2020;
        String msg =
            "Expected to find plan with UDF or project leaf. Found "
                + leaf.getClass().getSimpleName();
        throw new PlanException(msg, errCode, PigException.BUG);
      }

      POUserFunc func = (POUserFunc) leaf;
      try {
        func.setAlgebraicFunction(type);
      } catch (ExecException e) {
        int errCode = 2075;
        String msg = "Could not set algebraic function type.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }
    }
  }
Esempio n. 2
0
  /**
   * add algebraic functions with appropriate projection to new foreach in combiner
   *
   * @param cfe - the new foreach in combiner
   * @param op2newpos - mapping of physical operator to position in input
   * @throws CloneNotSupportedException
   * @throws PlanException
   */
  private static void addAlgebraicFuncToCombineFE(
      POForEach cfe, Map<PhysicalOperator, Integer> op2newpos)
      throws CloneNotSupportedException, PlanException {
    // an array that we will first populate with physical operators in order
    // of their position in input. Used while adding plans to combine
    // foreach just so that output of combine foreach same positions as
    // input. That means the same operator to position mapping can be used
    // by reduce as well
    PhysicalOperator[] opsInOrder = new PhysicalOperator[op2newpos.size() + 1];
    for (Map.Entry<PhysicalOperator, Integer> op2pos : op2newpos.entrySet()) {
      opsInOrder[op2pos.getValue()] = op2pos.getKey();
    }

    // first position is used by group column and a plan has been added for
    // it, so start with 1
    for (int i = 1; i < opsInOrder.length; i++) {
      // create new inner plan for foreach add cloned copy of given
      // physical operator and a new project. Even if the udf in query
      // takes multiple input, only one project needs to be added because
      // input to this udf will be the INITIAL version of udf evaluated in
      // map.
      PhysicalPlan newPlan = new PhysicalPlan();
      PhysicalOperator newOp = opsInOrder[i].clone();
      newPlan.add(newOp);
      POProject proj = new POProject(createOperatorKey(cfe.getOperatorKey().getScope()), 1, i);
      proj.setResultType(DataType.BAG);
      newPlan.add(proj);
      newPlan.connect(proj, newOp);
      cfe.addInputPlan(newPlan, false);
    }
  }
    /** Configures the Reduce plan, the POPackage operator and the reporter thread */
    @SuppressWarnings("unchecked")
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);
      inIllustrator = inIllustrator(context);
      if (inIllustrator) pack = getPack(context);
      Configuration jConf = context.getConfiguration();
      SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf));
      context
          .getConfiguration()
          .set(
              PigConstants.TASK_INDEX,
              Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
      sJobContext = context;
      sJobConfInternal.set(context.getConfiguration());
      sJobConf = context.getConfiguration();
      try {
        PigContext.setPackageImportList(
            (ArrayList<String>) ObjectSerializer.deserialize(jConf.get("udf.import.list")));
        pigContext = (PigContext) ObjectSerializer.deserialize(jConf.get("pig.pigContext"));

        // This attempts to fetch all of the generated code from the distributed cache, and resolve
        // it
        SchemaTupleBackend.initialize(jConf, pigContext);

        if (rp == null)
          rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf.get("pig.reducePlan"));
        stores = PlanHelper.getPhysicalOperators(rp, POStore.class);

        if (!inIllustrator)
          pack = (POPackage) ObjectSerializer.deserialize(jConf.get("pig.reduce.package"));
        // To be removed
        if (rp.isEmpty()) log.debug("Reduce Plan empty!");
        else {
          ByteArrayOutputStream baos = new ByteArrayOutputStream();
          rp.explain(baos);
          log.debug(baos.toString());
        }
        pigReporter = new ProgressableReporter();
        if (!(rp.isEmpty())) {
          roots = rp.getRoots().toArray(new PhysicalOperator[1]);
          leaf = rp.getLeaves().get(0);
        }

        // Get the UDF specific context
        MapRedUtil.setupUDFContext(jConf);

      } catch (IOException ioe) {
        String msg = "Problem while configuring reduce plan.";
        throw new RuntimeException(msg, ioe);
      }

      log.info(
          "Aliases being processed per job phase (AliasName[line,offset]): "
              + jConf.get("pig.alias.location"));

      Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
    }
Esempio n. 4
0
  @Test
  public void testSim6() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
    PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();

    ldGrpChain1.addAsLeaf(lr1);
    ldGrpChain2.addAsLeaf(lr2);

    php.merge(ldGrpChain1);
    php.merge(ldGrpChain2);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.addAsLeaf(pk);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC6.gld");
  }
Esempio n. 5
0
  @Test(expected = MRCompilerException.class)
  public void testMRCompilerErr() throws Exception {
    String query = "a = load 'input';" + "b = filter a by $0 > 5;" + "store b into 'output';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    pp.remove(pp.getRoots().get(0));
    try {
      Util.buildMRPlan(new PhysicalPlan(), pc);
    } catch (MRCompilerException mrce) {
      assertEquals(2053, mrce.getErrorCode());
      throw mrce;
    }
  }
Esempio n. 6
0
  @Test
  public void testMergeJoin() throws Exception {
    String query =
        "a = load '/tmp/input1';"
            + "b = load '/tmp/input2';"
            + "c = join a by $0, b by $0 using 'merge';"
            + "store c into '/tmp/output1';";

    PhysicalPlan pp = Util.buildPp(pigServer, query);
    MRCompiler comp = new MRCompiler(pp, pc);
    comp.compile();
    MROperPlan mrp = comp.getMRPlan();
    assertTrue(mrp.size() == 2);

    MapReduceOper mrOp0 = mrp.getRoots().get(0);
    assertTrue(mrOp0.mapPlan.size() == 2);
    PhysicalOperator load0 = mrOp0.mapPlan.getRoots().get(0);
    MergeJoinIndexer func =
        (MergeJoinIndexer)
            PigContext.instantiateFuncFromSpec(((POLoad) load0).getLFile().getFuncSpec());
    Field lrField = MergeJoinIndexer.class.getDeclaredField("lr");
    lrField.setAccessible(true);
    POLocalRearrange lr = (POLocalRearrange) lrField.get(func);
    List<PhysicalPlan> innerPlans = lr.getPlans();
    PhysicalOperator localrearrange0 = mrOp0.mapPlan.getSuccessors(load0).get(0);
    assertTrue(localrearrange0 instanceof POLocalRearrange);
    assertTrue(mrOp0.reducePlan.size() == 3);
    PhysicalOperator pack0 = mrOp0.reducePlan.getRoots().get(0);
    assertTrue(pack0 instanceof POPackage);
    PhysicalOperator foreach0 = mrOp0.reducePlan.getSuccessors(pack0).get(0);
    assertTrue(foreach0 instanceof POForEach);
    PhysicalOperator store0 = mrOp0.reducePlan.getSuccessors(foreach0).get(0);
    assertTrue(store0 instanceof POStore);

    assertTrue(innerPlans.size() == 1);
    PhysicalPlan innerPlan = innerPlans.get(0);
    assertTrue(innerPlan.size() == 1);
    PhysicalOperator project = innerPlan.getRoots().get(0);
    assertTrue(project instanceof POProject);
    assertTrue(((POProject) project).getColumn() == 0);

    MapReduceOper mrOp1 = mrp.getSuccessors(mrOp0).get(0);
    assertTrue(mrOp1.mapPlan.size() == 3);
    PhysicalOperator load1 = mrOp1.mapPlan.getRoots().get(0);
    assertTrue(load1 instanceof POLoad);
    PhysicalOperator mergejoin1 = mrOp1.mapPlan.getSuccessors(load1).get(0);
    assertTrue(mergejoin1 instanceof POMergeJoin);
    PhysicalOperator store1 = mrOp1.mapPlan.getSuccessors(mergejoin1).get(0);
    assertTrue(store1 instanceof POStore);
    assertTrue(mrOp1.reducePlan.isEmpty());
  }
Esempio n. 7
0
  /**
   * Replace old POLocalRearrange with new pre-combine LR, add new map foreach, new
   * map-local-rearrange, and connect them
   *
   * @param mapPlan
   * @param preCombinerLR
   * @param mfe
   * @param mapAgg
   * @param mlr
   * @throws PlanException
   */
  private static void patchUpMap(
      PhysicalPlan mapPlan,
      POPreCombinerLocalRearrange preCombinerLR,
      POForEach mfe,
      POPartialAgg mapAgg,
      POLocalRearrange mlr)
      throws PlanException {
    POLocalRearrange oldLR = (POLocalRearrange) mapPlan.getLeaves().get(0);
    mapPlan.replace(oldLR, preCombinerLR);

    mapPlan.add(mfe);
    mapPlan.connect(preCombinerLR, mfe);

    // the operator before local rearrange
    PhysicalOperator opBeforeLR = mfe;

    if (mapAgg != null) {
      mapPlan.add(mapAgg);
      mapPlan.connect(mfe, mapAgg);
      opBeforeLR = mapAgg;
    }

    mapPlan.add(mlr);
    mapPlan.connect(opBeforeLR, mlr);
  }
Esempio n. 8
0
 /**
  * Recursively clone op and its predecessors from pplan and add them to newplan
  *
  * @param op
  * @param pplan
  * @param newplan
  * @return
  * @throws CloneNotSupportedException
  * @throws PlanException
  */
 private static PhysicalOperator addPredecessorsToPlan(
     PhysicalOperator op, PhysicalPlan pplan, PhysicalPlan newplan)
     throws CloneNotSupportedException, PlanException {
   PhysicalOperator newOp = op.clone();
   newplan.add(newOp);
   if (pplan.getPredecessors(op) == null || pplan.getPredecessors(op).size() == 0) {
     return newOp;
   }
   for (PhysicalOperator pred : pplan.getPredecessors(op)) {
     PhysicalOperator newPred = addPredecessorsToPlan(pred, pplan, newplan);
     newplan.connect(newPred, newOp);
   }
   return newOp;
 }
Esempio n. 9
0
 /**
  * Create a new foreach with same scope,alias as given foreach add an inner plan that projects the
  * group column, which is going to be the first input
  *
  * @param foreach source foreach
  * @param keyType type for group-by key
  * @return new POForeach
  */
 private static POForEach createForEachWithGrpProj(POForEach foreach, byte keyType) {
   String scope = foreach.getOperatorKey().scope;
   POForEach newFE = new POForEach(createOperatorKey(scope), new ArrayList<PhysicalPlan>());
   newFE.addOriginalLocation(foreach.getAlias(), foreach.getOriginalLocations());
   newFE.setResultType(foreach.getResultType());
   // create plan that projects the group column
   PhysicalPlan grpProjPlan = new PhysicalPlan();
   // group by column is the first column
   POProject proj = new POProject(createOperatorKey(scope), 1, 0);
   proj.setResultType(keyType);
   grpProjPlan.add(proj);
   newFE.addInputPlan(grpProjPlan, false);
   return newFE;
 }
  @Test
  public void testMRCompilerErr() throws Exception {
    planTester.buildPlan("a = load 'input';");
    LogicalPlan lp = planTester.buildPlan("b = filter a by $0 > 5;");

    PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
    pp.remove(pp.getRoots().get(0));
    try {
      Util.buildMRPlan(new PhysicalPlan(), pc);
      fail("Expected failure.");
    } catch (MRCompilerException mrce) {
      assertTrue(mrce.getErrorCode() == 2053);
    }
  }
  public void testLimit() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lC = GenPhyOp.topLoadOp();
    php.add(lC);

    POLimit op = new POLimit(new OperatorKey("", r.nextLong()), -1, null);

    php.add(op);
    php.connect(lC, op);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC17.gld");
  }
  public void testSim5() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();
    php.merge(ldFil1);
    php.merge(ldFil2);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(un, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC5.gld");
  }
    // return: false-more output
    //         true- end of processing
    public boolean processOnePackageOutput(Context oc) throws IOException, InterruptedException {

      Result res = pack.getNextTuple();
      if (res.returnStatus == POStatus.STATUS_OK) {
        Tuple packRes = (Tuple) res.result;

        if (rp.isEmpty()) {
          oc.write(null, packRes);
          return false;
        }
        for (int i = 0; i < roots.length; i++) {
          roots[i].attachInput(packRes);
        }
        runPipeline(leaf);
      }

      if (res.returnStatus == POStatus.STATUS_NULL) {
        return false;
      }

      if (res.returnStatus == POStatus.STATUS_ERR) {
        int errCode = 2093;
        String msg = "Encountered error in package operator while processing group.";
        throw new ExecException(msg, errCode, PigException.BUG);
      }

      if (res.returnStatus == POStatus.STATUS_EOP) {
        return true;
      }

      return false;
    }
Esempio n. 14
0
  @Test(expected = MRCompilerException.class)
  public void testMRCompilerErr1() throws Exception {
    PhysicalPlan pp = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    pp.merge(ldFil1);

    POSplit op = GenPhyOp.topSplitOp();
    pp.addAsLeaf(op);

    try {
      Util.buildMRPlan(pp, pc);
    } catch (MRCompilerException mrce) {
      assertEquals(2025, mrce.getErrorCode());
      throw mrce;
    }
  }
  private void run(PhysicalPlan pp, String expectedFile) throws Exception {
    String compiledPlan, goldenPlan = null;
    int MAX_SIZE = 100000;
    MRCompiler comp = new MRCompiler(pp, pc);
    comp.compile();

    MROperPlan mrp = comp.getMRPlan();
    PlanPrinter ppp = new PlanPrinter(mrp);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ppp.print(baos);
    compiledPlan = baos.toString();

    if (generate) {
      FileOutputStream fos = new FileOutputStream(expectedFile);
      fos.write(baos.toByteArray());
      return;
    }
    FileInputStream fis = new FileInputStream(expectedFile);
    byte[] b = new byte[MAX_SIZE];
    int len = fis.read(b);
    goldenPlan = new String(b, 0, len);
    if (goldenPlan.charAt(len - 1) == '\n') goldenPlan = goldenPlan.substring(0, len - 1);

    pp.explain(System.out);
    System.out.println();
    System.out.println("<<<" + compiledPlan + ">>>");
    System.out.println("-------------");
    System.out.println("Golden");
    System.out.println("<<<" + goldenPlan + ">>>");
    System.out.println("-------------");
    assertEquals(goldenPlan, compiledPlan);
  }
  @Test
  public void testMRCompilerErr1() throws Exception {
    PhysicalPlan pp = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    pp.merge(ldFil1);

    POSplit op = GenPhyOp.topSplitOp();
    pp.addAsLeaf(op);

    try {
      Util.buildMRPlan(pp, pc);
      fail("Expected failure.");
    } catch (MRCompilerException mrce) {
      assertTrue(mrce.getErrorCode() == 2025);
    }
  }
Esempio n. 17
0
 /**
  * @param op
  * @param index
  * @param plan
  * @throws PlanException
  */
 private static void setProjectInput(PhysicalOperator op, PhysicalPlan plan, int index)
     throws PlanException {
   String scope = op.getOperatorKey().scope;
   POProject proj =
       new POProject(
           new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)),
           op.getRequestedParallelism(),
           index);
   proj.setResultType(DataType.BAG);
   // Remove old connections and elements from the plan
   plan.trimAbove(op);
   plan.add(proj);
   plan.connect(proj, op);
   List<PhysicalOperator> inputs = Lists.newArrayList();
   inputs.add(proj);
   op.setInputs(inputs);
 }
Esempio n. 18
0
  /**
   * create new Local rearrange by cloning existing rearrange and add plan for projecting the key
   *
   * @param rearrange
   * @return
   * @throws PlanException
   * @throws CloneNotSupportedException
   */
  private static POLocalRearrange getNewRearrange(POLocalRearrange rearrange)
      throws PlanException, CloneNotSupportedException {
    POLocalRearrange newRearrange = rearrange.clone();

    // Set the projection to be the key
    PhysicalPlan newPlan = new PhysicalPlan();
    String scope = newRearrange.getOperatorKey().scope;
    POProject proj =
        new POProject(
            new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), -1, 0);
    proj.setResultType(newRearrange.getKeyType());
    newPlan.add(proj);

    List<PhysicalPlan> plans = new ArrayList<PhysicalPlan>(1);
    plans.add(newPlan);
    newRearrange.setPlansFromCombiner(plans);

    return newRearrange;
  }
  public void testDistinct1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    php.merge(ldFil1);

    PODistinct op = new PODistinct(new OperatorKey("", r.nextLong()), -1, null);

    php.addAsLeaf(op);

    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);
    php.connect(op, grpChain1.getRoots().get(0));

    PODistinct op1 = new PODistinct(new OperatorKey("", r.nextLong()), -1, null);

    php.addAsLeaf(op1);
    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC16.gld");
  }
Esempio n. 20
0
  @Test
  public void testSim3() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    PhysicalPlan ldGrpChain1 = GenPhyOp.loadedGrpChain();
    PhysicalPlan ldGrpChain2 = GenPhyOp.loadedGrpChain();

    php.merge(ldGrpChain1);
    php.merge(ldGrpChain2);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();

    php.merge(ldFil1);
    php.connect(ldFil1.getLeaves().get(0), un);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(un, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC3.gld");
  }
    /**
     * Will be called once all the intermediate keys and values are processed. So right place to
     * stop the reporter thread.
     */
    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
      super.cleanup(context);

      if (errorInReduce) {
        // there was an error in reduce - just return
        return;
      }

      if (PigMapReduce.sJobConfInternal.get().get("pig.stream.in.reduce", "false").equals("true")) {
        // If there is a stream in the pipeline we could
        // potentially have more to process - so lets
        // set the flag stating that all map input has been sent
        // already and then lets run the pipeline one more time
        // This will result in nothing happening in the case
        // where there is no stream in the pipeline
        rp.endOfAllInput = true;
        runPipeline(leaf);
      }

      if (!inIllustrator) {
        for (POStore store : stores) {
          if (!initialized) {
            MapReducePOStoreImpl impl = new MapReducePOStoreImpl(context);
            store.setStoreImpl(impl);
            store.setUp();
          }
          store.tearDown();
        }
      }

      // Calling EvalFunc.finish()
      UDFFinishVisitor finisher =
          new UDFFinishVisitor(rp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(rp));
      try {
        finisher.visit();
      } catch (VisitorException e) {
        throw new IOException("Error trying to finish UDFs", e);
      }

      PhysicalOperator.setReporter(null);
      initialized = false;
    }
Esempio n. 22
0
  /**
   * stolen from JobControlCompiler TODO: refactor it to share this
   *
   * @param physicalPlan
   * @param poLoad
   * @param jobConf
   * @return
   * @throws java.io.IOException
   */
  private static JobConf configureLoader(PhysicalPlan physicalPlan, POLoad poLoad, JobConf jobConf)
      throws IOException {

    // 这部分似乎没用
    Job job = new Job(jobConf);
    LoadFunc loadFunc = poLoad.getLoadFunc();
    loadFunc.setLocation(poLoad.getLFile().getFileName(), job);

    // stolen from JobControlCompiler
    ArrayList<FileSpec> pigInputs = new ArrayList<FileSpec>();
    // Store the inp filespecs
    pigInputs.add(poLoad.getLFile());

    ArrayList<List<OperatorKey>> inpTargets = Lists.newArrayList();
    ArrayList<String> inpSignatures = Lists.newArrayList();
    ArrayList<Long> inpLimits = Lists.newArrayList();

    // Store the target operators for tuples read
    // from this input
    List<PhysicalOperator> loadSuccessors = physicalPlan.getSuccessors(poLoad);
    List<OperatorKey> loadSuccessorsKeys = Lists.newArrayList();
    if (loadSuccessors != null) {
      for (PhysicalOperator loadSuccessor : loadSuccessors) {
        loadSuccessorsKeys.add(loadSuccessor.getOperatorKey());
      }
    }

    inpTargets.add(loadSuccessorsKeys);
    inpSignatures.add(poLoad.getSignature());
    inpLimits.add(poLoad.getLimit());

    jobConf.set("pig.inputs", ObjectSerializer.serialize(pigInputs));
    jobConf.set("pig.inpTargets", ObjectSerializer.serialize(inpTargets));
    jobConf.set("pig.inpSignatures", ObjectSerializer.serialize(inpSignatures));
    jobConf.set("pig.inpLimits", ObjectSerializer.serialize(inpLimits));

    return jobConf;
  }
Esempio n. 23
0
  /**
   * Look for a algebraic POUserFunc as successor to this project, called recursively to skip any
   * other projects seen on the way.
   *
   * @param proj project
   * @param pplan physical plan
   * @return null if any operator other POProject or algebraic POUserFunc is found while going down
   *     the plan, otherwise algebraic POUserFunc is returned
   */
  private static POUserFunc getAlgebraicSuccessor(POProject proj, PhysicalPlan pplan) {
    // check if root is followed by combinable operator
    List<PhysicalOperator> succs = pplan.getSuccessors(proj);
    if (succs == null || succs.size() == 0) {
      return null;
    }
    if (succs.size() > 1) {
      // project shared by more than one operator - does not happen  in
      // plans generated today won't try to combine this
      return null;
    }

    PhysicalOperator succ = succs.get(0);
    if (succ instanceof POProject) {
      return getAlgebraicSuccessor((POProject) succ, pplan);
    }
    if (succ instanceof POUserFunc && ((POUserFunc) succ).combinable()) {
      return (POUserFunc) succ;
    }

    // some other operator ? can't combine
    return null;
  }
Esempio n. 24
0
  /**
   * Test that POSortedDistinct gets printed as POSortedDistinct
   *
   * @throws Exception
   */
  @Test
  public void testSortedDistinctInForeach() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    PhysicalPlan grpChain1 = GenPhyOp.loadedGrpChain();
    php.merge(grpChain1);

    List<PhysicalPlan> inputs = new LinkedList<PhysicalPlan>();
    PhysicalPlan inplan = new PhysicalPlan();
    PODistinct op1 = new POSortedDistinct(new OperatorKey("", r.nextLong()), -1, null);
    inplan.addAsLeaf(op1);
    inputs.add(inplan);
    List<Boolean> toFlattens = new ArrayList<Boolean>();
    toFlattens.add(false);
    POForEach pofe = new POForEach(new OperatorKey("", r.nextLong()), 1, inputs, toFlattens);

    php.addAsLeaf(pofe);
    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC19.gld");
  }
Esempio n. 25
0
  @Test
  public void testSortUDF1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    php.merge(ldFil1);

    // set up order by *
    String funcName = WeirdComparator.class.getName();
    POUserComparisonFunc comparator =
        new POUserComparisonFunc(
            new OperatorKey("", r.nextLong()), -1, null, new FuncSpec(funcName));
    POSort sort =
        new POSort(
            new OperatorKey("", r.nextLong()),
            -1,
            ldFil1.getLeaves(),
            null,
            new ArrayList<Boolean>(),
            comparator);
    sort.setRequestedParallelism(20);
    PhysicalPlan nesSortPlan = new PhysicalPlan();
    POProject topPrj = new POProject(new OperatorKey("", r.nextLong()));
    topPrj.setColumn(1);
    topPrj.setOverloaded(true);
    topPrj.setResultType(DataType.TUPLE);
    nesSortPlan.add(topPrj);

    POProject prjStar2 = new POProject(new OperatorKey("", r.nextLong()));
    prjStar2.setResultType(DataType.TUPLE);
    prjStar2.setStar(true);
    nesSortPlan.add(prjStar2);

    nesSortPlan.connect(topPrj, prjStar2);
    List<PhysicalPlan> nesSortPlanLst = new ArrayList<PhysicalPlan>();
    nesSortPlanLst.add(nesSortPlan);

    sort.setSortPlans(nesSortPlanLst);

    php.add(sort);
    php.connect(ldFil1.getLeaves().get(0), sort);
    // have a foreach which takes the sort output
    // and send it two two udfs
    List<String> udfs = new ArrayList<String>();
    udfs.add(COUNT.class.getName());
    udfs.add(SUM.class.getName());
    POForEach fe3 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.add(fe3);
    php.connect(sort, fe3);

    // add a group above the foreach
    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);
    php.connect(fe3, grpChain1.getRoots().get(0));

    udfs.clear();
    udfs.add(AVG.class.getName());
    POForEach fe4 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.addAsLeaf(fe4);

    PhysicalPlan grpChain2 = GenPhyOp.grpChain();
    php.merge(grpChain2);
    php.connect(fe4, grpChain2.getRoots().get(0));

    udfs.clear();
    udfs.add(GFCross.class.getName() + "('1')");
    POForEach fe5 = GenPhyOp.topForEachOPWithUDF(udfs);
    php.addAsLeaf(fe5);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC15.gld");
  }
Esempio n. 26
0
  @Test
  public void testSim9() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    PhysicalPlan ldFil1 = GenPhyOp.loadedFilter();
    PhysicalPlan ldFil2 = GenPhyOp.loadedFilter();

    php.merge(ldFil1);
    php.connect(ldFil1.getLeaves().get(0), gr);

    php.merge(ldFil2);
    php.connect(ldFil2.getLeaves().get(0), gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.addAsLeaf(pk);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC9.gld");
  }
Esempio n. 27
0
  // Tests Single input case for both blocking and non-blocking
  // with both map and reduce phases
  @Test
  public void testSim1() throws Exception {
    PhysicalPlan php = new PhysicalPlan();
    POLoad ld = GenPhyOp.topLoadOp();
    php.add(ld);
    PhysicalPlan grpChain1 = GenPhyOp.grpChain();
    php.merge(grpChain1);

    php.connect(ld, grpChain1.getRoots().get(0));

    PhysicalOperator leaf = php.getLeaves().get(0);

    PhysicalPlan grpChain2 = GenPhyOp.grpChain();
    php.merge(grpChain2);

    php.connect(leaf, grpChain2.getRoots().get(0));

    leaf = php.getLeaves().get(0);
    POFilter fl = GenPhyOp.topFilterOp();
    php.add(fl);

    php.connect(leaf, fl);

    POStore st = GenPhyOp.topStoreOp();
    php.add(st);

    php.connect(fl, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC1.gld");
  }
Esempio n. 28
0
  @Test
  public void testSpl3() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    fl1.setRequestedParallelism(10);
    POFilter fl2 = GenPhyOp.topFilterOp();
    fl2.setRequestedParallelism(20);
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POSplit sp11 = GenPhyOp.topSplitOp();
    POSplit sp21 = GenPhyOp.topSplitOp();
    php.add(sp11);
    php.add(sp21);
    php.connect(fl1, sp11);
    php.connect(fl2, sp21);

    POFilter fl11 = GenPhyOp.topFilterOp();
    fl11.setRequestedParallelism(10);
    POFilter fl21 = GenPhyOp.topFilterOp();
    fl21.setRequestedParallelism(20);
    POFilter fl22 = GenPhyOp.topFilterOp();
    fl22.setRequestedParallelism(30);
    php.add(fl11);
    php.add(fl21);
    php.add(fl22);
    php.connect(sp11, fl11);
    php.connect(sp21, fl21);
    php.connect(sp21, fl22);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    lr1.setRequestedParallelism(40);
    POLocalRearrange lr21 = GenPhyOp.topLocalRearrangeOp();
    lr21.setRequestedParallelism(15);
    POLocalRearrange lr22 = GenPhyOp.topLocalRearrangeOp();
    lr22.setRequestedParallelism(35);
    php.add(lr1);
    php.add(lr21);
    php.add(lr22);
    php.connect(fl11, lr1);
    php.connect(fl21, lr21);
    php.connect(fl22, lr22);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    POPackage pk = GenPhyOp.topPackageOp();
    pk.setRequestedParallelism(25);
    php.addAsLeaf(pk);

    POSplit sp2 = GenPhyOp.topSplitOp();
    php.addAsLeaf(sp2);

    POFilter fl3 = GenPhyOp.topFilterOp();
    fl3.setRequestedParallelism(100);
    POFilter fl4 = GenPhyOp.topFilterOp();
    fl4.setRequestedParallelism(80);
    php.add(fl3);
    php.add(fl4);
    php.connect(sp2, fl3);
    php.connect(sp2, fl4);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC14.gld");
  }
Esempio n. 29
0
  @Test
  public void testSpl2() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    POLoad lA = GenPhyOp.topLoadOp();
    POSplit spl = GenPhyOp.topSplitOp();
    php.add(lA);
    php.add(spl);
    php.connect(lA, spl);

    POFilter fl1 = GenPhyOp.topFilterOp();
    POFilter fl2 = GenPhyOp.topFilterOp();
    php.add(fl1);
    php.add(fl2);
    php.connect(spl, fl1);
    php.connect(spl, fl2);

    POLocalRearrange lr1 = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lr2 = GenPhyOp.topLocalRearrangeOp();
    php.add(lr1);
    php.add(lr2);
    php.connect(fl1, lr1);
    php.connect(fl2, lr2);

    POGlobalRearrange gr = GenPhyOp.topGlobalRearrangeOp();
    php.addAsLeaf(gr);

    POPackage pk = GenPhyOp.topPackageOp();
    php.addAsLeaf(pk);

    POSplit sp2 = GenPhyOp.topSplitOp();
    php.addAsLeaf(sp2);

    POFilter fl3 = GenPhyOp.topFilterOp();
    POFilter fl4 = GenPhyOp.topFilterOp();
    php.add(fl3);
    php.add(fl4);
    php.connect(sp2, fl3);
    php.connect(sp2, fl4);

    POUnion un = GenPhyOp.topUnionOp();
    php.addAsLeaf(un);

    POStore st = GenPhyOp.topStoreOp();
    php.addAsLeaf(st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC13.gld");
  }
Esempio n. 30
0
  @Test
  public void testRun2() throws Exception {
    PhysicalPlan php = new PhysicalPlan();

    PhysicalPlan part1 = new PhysicalPlan();
    POLoad lC = GenPhyOp.topLoadOp();
    POFilter fC = GenPhyOp.topFilterOp();
    POLocalRearrange lrC = GenPhyOp.topLocalRearrangeOp();
    POGlobalRearrange grC = GenPhyOp.topGlobalRearrangeOp();
    POPackage pkC = GenPhyOp.topPackageOp();
    part1.add(lC);
    part1.add(fC);
    part1.connect(lC, fC);
    part1.add(lrC);
    part1.connect(fC, lrC);
    part1.add(grC);
    part1.connect(lrC, grC);
    part1.add(pkC);
    part1.connect(grC, pkC);

    POPackage pkD = GenPhyOp.topPackageOp();
    POLocalRearrange lrD = GenPhyOp.topLocalRearrangeOp();
    POGlobalRearrange grD = GenPhyOp.topGlobalRearrangeOp();
    POLoad lD = GenPhyOp.topLoadOp();
    part1.add(lD);
    part1.add(lrD);
    part1.connect(lD, lrD);

    part1.add(grD);
    part1.connect(lrD, grD);
    part1.add(pkD);
    part1.connect(grD, pkD);
    part1.connect(pkD, grC);

    POLoad lA = GenPhyOp.topLoadOp();
    POLoad lB = GenPhyOp.topLoadOp();

    // POLoad lC = lA;
    POFilter fA = GenPhyOp.topFilterOp();

    POLocalRearrange lrA = GenPhyOp.topLocalRearrangeOp();
    POLocalRearrange lrB = GenPhyOp.topLocalRearrangeOp();

    POGlobalRearrange grAB = GenPhyOp.topGlobalRearrangeOp();

    POPackage pkAB = GenPhyOp.topPackageOp();

    POFilter fAB = GenPhyOp.topFilterOp();
    POUnion unABC = GenPhyOp.topUnionOp();

    php.add(lA);
    php.add(lB);

    php.add(fA);

    php.connect(lA, fA);

    php.add(lrA);
    php.add(lrB);

    php.connect(fA, lrA);
    php.connect(lB, lrB);

    php.add(grAB);
    php.connect(lrA, grAB);
    php.connect(lrB, grAB);

    php.add(pkAB);
    php.connect(grAB, pkAB);

    php.add(fAB);
    php.connect(pkAB, fAB);

    php.merge(part1);

    List<PhysicalOperator> leaves = new ArrayList<PhysicalOperator>();
    for (PhysicalOperator phyOp : php.getLeaves()) {
      leaves.add(phyOp);
    }

    php.add(unABC);
    for (PhysicalOperator physicalOperator : leaves) {
      php.connect(physicalOperator, unABC);
    }

    POStore st = GenPhyOp.topStoreOp();

    php.add(st);
    php.connect(unABC, st);
    run(php, "test/org/apache/pig/test/data/GoldenFiles/MRC11.gld");
  }