コード例 #1
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
  /**
   * Change the algebriac function type for algebraic functions in map and combine In map and
   * combine the algebraic functions will be leaf of the plan
   *
   * @param fe
   * @param type
   * @throws PlanException
   */
  private static void changeFunc(POForEach fe, byte type) throws PlanException {
    for (PhysicalPlan plan : fe.getInputPlans()) {
      List<PhysicalOperator> leaves = plan.getLeaves();
      if (leaves == null || leaves.size() != 1) {
        int errCode = 2019;
        String msg = "Expected to find plan with single leaf. Found " + leaves.size() + " leaves.";
        throw new PlanException(msg, errCode, PigException.BUG);
      }

      PhysicalOperator leaf = leaves.get(0);
      if (leaf instanceof POProject) {
        continue;
      }
      if (!(leaf instanceof POUserFunc)) {
        int errCode = 2020;
        String msg =
            "Expected to find plan with UDF or project leaf. Found "
                + leaf.getClass().getSimpleName();
        throw new PlanException(msg, errCode, PigException.BUG);
      }

      POUserFunc func = (POUserFunc) leaf;
      try {
        func.setAlgebraicFunction(type);
      } catch (ExecException e) {
        int errCode = 2075;
        String msg = "Could not set algebraic function type.";
        throw new PlanException(msg, errCode, PigException.BUG, e);
      }
    }
  }
コード例 #2
0
ファイル: CombinerOptimizerUtil.java プロジェクト: scr/pig
    @Override
    public void visitProject(POProject proj) throws VisitorException {
      // check if this project is preceded by PODistinct and
      // has the return type bag

      List<PhysicalOperator> preds = mPlan.getPredecessors(proj);
      if (preds == null) return; // this is a leaf project and so not interesting for patching
      PhysicalOperator pred = preds.get(0);
      if (preds.size() == 1 && pred instanceof PODistinct) {
        if (distinct != null) {
          // we should not already have been patched since the
          // Project-Distinct pair should occur only once
          int errCode = 2076;
          String msg =
              "Unexpected Project-Distinct pair while trying to set up plans for use with combiner.";
          throw new OptimizerException(msg, errCode, PigException.BUG);
        }
        // we have stick in the POUserfunc(org.apache.pig.builtin.Distinct)[DataBag]
        // in place of the Project-PODistinct pair
        PhysicalOperator distinctPredecessor = mPlan.getPredecessors(pred).get(0);
        POUserFunc func = null;

        try {
          String scope = proj.getOperatorKey().scope;
          List<PhysicalOperator> funcInput = new ArrayList<PhysicalOperator>();
          FuncSpec fSpec = new FuncSpec(DISTINCT_UDF_CLASSNAME);
          funcInput.add(distinctPredecessor);
          // explicitly set distinctPredecessor's result type to
          // be tuple - this is relevant when distinctPredecessor is
          // originally a POForeach with return type BAG - we need to
          // set it to tuple so we get a stream of tuples.
          distinctPredecessor.setResultType(DataType.TUPLE);
          func =
              new POUserFunc(
                  new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)),
                  -1,
                  funcInput,
                  fSpec);
          func.setResultType(DataType.BAG);
          mPlan.replace(proj, func);
          mPlan.remove(pred);
          // connect the the newly added "func" to
          // the predecessor to the earlier PODistinct
          mPlan.connect(distinctPredecessor, func);
        } catch (PlanException e) {
          int errCode = 2077;
          String msg = "Problem with reconfiguring plan to add distinct built-in function.";
          throw new OptimizerException(msg, errCode, PigException.BUG, e);
        }
        distinct = func;
      }
    }