Ejemplo n.º 1
0
  /**
   * Analyze aggregation-relevant components of the select block (Group By clause, select list,
   * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg
   * output tuple, and transform all post-agg exprs given AggregationInfo's smap.
   *
   * @param analyzer
   * @throws AnalysisException
   */
  private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, InternalException {
    if (groupingExprs == null
        && !selectList.isDistinct()
        && !Expr.contains(resultExprs, AggregateExpr.class)) {
      // we're not computing aggregates
      return;
    }

    // If we're computing an aggregate, we must have a FROM clause.
    if (tableRefs.size() == 0) {
      throw new AnalysisException("aggregation without a FROM clause is not allowed");
    }

    if ((groupingExprs != null || Expr.contains(resultExprs, AggregateExpr.class))
        && selectList.isDistinct()) {
      throw new AnalysisException(
          "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY");
    }

    // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to
    // name all star-expanded cols in the group by clause you might as well do it
    // in the select list)
    if (groupingExprs != null) {
      for (SelectListItem item : selectList.getItems()) {
        if (item.isStar()) {
          throw new AnalysisException(
              "cannot combine '*' in select list with GROUP BY: " + item.toSql());
        }
      }
    }

    // analyze grouping exprs
    ArrayList<Expr> groupingExprsCopy = Lists.newArrayList();
    if (groupingExprs != null) {
      // make a deep copy here, we don't want to modify the original
      // exprs during analysis (in case we need to print them later)
      groupingExprsCopy = Expr.cloneList(groupingExprs, null);
      substituteOrdinals(groupingExprsCopy, "GROUP BY");
      Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy);
      if (ambiguousAlias != null) {
        throw new AnalysisException(
            "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous");
      }
      Expr.substituteList(groupingExprsCopy, aliasSMap);
      for (int i = 0; i < groupingExprsCopy.size(); ++i) {
        groupingExprsCopy.get(i).analyze(analyzer);
        if (groupingExprsCopy.get(i).contains(AggregateExpr.class)) {
          // reference the original expr in the error msg
          throw new AnalysisException(
              "GROUP BY expression must not contain aggregate functions: "
                  + groupingExprs.get(i).toSql());
        }
      }
    }

    // analyze having clause
    if (havingClause != null) {
      // substitute aliases in place (ordinals not allowed in having clause)
      havingPred = havingClause.clone(aliasSMap);
      havingPred.analyze(analyzer);
      havingPred.checkReturnsBool("HAVING clause", true);
      analyzer.registerConjuncts(havingPred, null, false);
    }

    List<Expr> orderingExprs = null;
    if (sortInfo != null) {
      orderingExprs = sortInfo.getOrderingExprs();
    }

    ArrayList<AggregateExpr> aggExprs = collectAggExprs();
    Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer);

    // substitute AVG before constructing AggregateInfo
    Expr.substituteList(aggExprs, avgSMap);
    ArrayList<AggregateExpr> nonAvgAggExprs = Lists.newArrayList();
    Expr.collectList(aggExprs, AggregateExpr.class, nonAvgAggExprs);
    aggExprs = nonAvgAggExprs;
    createAggInfo(groupingExprsCopy, aggExprs, analyzer);

    // combine avg smap with the one that produces the final agg output
    AggregateInfo finalAggInfo =
        aggInfo.getSecondPhaseDistinctAggInfo() != null
            ? aggInfo.getSecondPhaseDistinctAggInfo()
            : aggInfo;
    Expr.SubstitutionMap combinedSMap =
        Expr.SubstitutionMap.combine(avgSMap, finalAggInfo.getSMap());
    LOG.debug("combined smap: " + combinedSMap.debugString());

    // change select list, having and ordering exprs to point to agg output
    Expr.substituteList(resultExprs, combinedSMap);
    LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs));
    if (havingPred != null) {
      havingPred = havingPred.substitute(combinedSMap);
      LOG.debug("post-agg havingPred: " + havingPred.debugString());
    }
    Expr.substituteList(orderingExprs, combinedSMap);
    LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs));

    // check that all post-agg exprs point to agg output
    for (int i = 0; i < selectList.getItems().size(); ++i) {
      if (!resultExprs.get(i).isBound(finalAggInfo.getAggTupleId())) {
        throw new AnalysisException(
            "select list expression not produced by aggregation output "
                + "(missing from GROUP BY clause?): "
                + selectList.getItems().get(i).getExpr().toSql());
      }
    }
    if (orderByElements != null) {
      for (int i = 0; i < orderByElements.size(); ++i) {
        if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) {
          throw new AnalysisException(
              "ORDER BY expression not produced by aggregation output "
                  + "(missing from GROUP BY clause?): "
                  + orderByElements.get(i).getExpr().toSql());
        }
      }
    }
    if (havingPred != null) {
      if (!havingPred.isBound(finalAggInfo.getAggTupleId())) {
        throw new AnalysisException(
            "HAVING clause not produced by aggregation output "
                + "(missing from GROUP BY clause?): "
                + havingClause.toSql());
      }
    }
  }
Ejemplo n.º 2
0
  /**
   * Analyze aggregation-relevant components of the select block (Group By clause, select list,
   * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg
   * output tuple, and transform all post-agg exprs given AggregationInfo's smap.
   */
  private void analyzeAggregation(Analyzer analyzer)
      throws AnalysisException, AuthorizationException {
    if (groupingExprs_ == null
        && !selectList_.isDistinct()
        && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) {
      // we're not computing aggregates
      return;
    }

    // If we're computing an aggregate, we must have a FROM clause.
    if (tableRefs_.size() == 0) {
      throw new AnalysisException("aggregation without a FROM clause is not allowed");
    }

    if ((groupingExprs_ != null || TreeNode.contains(resultExprs_, Expr.isAggregatePredicate()))
        && selectList_.isDistinct()) {
      throw new AnalysisException(
          "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY");
    }

    // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to
    // name all star-expanded cols in the group by clause you might as well do it
    // in the select list)
    if (groupingExprs_ != null) {
      for (SelectListItem item : selectList_.getItems()) {
        if (item.isStar()) {
          throw new AnalysisException(
              "cannot combine '*' in select list with GROUP BY: " + item.toSql());
        }
      }
    }

    // analyze grouping exprs
    ArrayList<Expr> groupingExprsCopy = Lists.newArrayList();
    if (groupingExprs_ != null) {
      // make a deep copy here, we don't want to modify the original
      // exprs during analysis (in case we need to print them later)
      groupingExprsCopy = Expr.cloneList(groupingExprs_);
      substituteOrdinals(groupingExprsCopy, "GROUP BY");
      Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy);
      if (ambiguousAlias != null) {
        throw new AnalysisException(
            "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous");
      }
      Expr.substituteList(groupingExprsCopy, aliasSmap_);
      for (int i = 0; i < groupingExprsCopy.size(); ++i) {
        groupingExprsCopy.get(i).analyze(analyzer);
        if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) {
          // reference the original expr in the error msg
          throw new AnalysisException(
              "GROUP BY expression must not contain aggregate functions: "
                  + groupingExprs_.get(i).toSql());
        }
      }
    }

    // analyze having clause
    if (havingClause_ != null) {
      // substitute aliases in place (ordinals not allowed in having clause)
      havingPred_ = havingClause_.clone(aliasSmap_);
      havingPred_.analyze(analyzer);
      havingPred_.checkReturnsBool("HAVING clause", true);
    }

    List<Expr> orderingExprs = null;
    if (sortInfo_ != null) orderingExprs = sortInfo_.getOrderingExprs();

    // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses
    // of this statement.
    ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList();
    TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs);
    if (havingPred_ != null) {
      havingPred_.collect(Expr.isAggregatePredicate(), aggExprs);
    }
    if (sortInfo_ != null) {
      TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs);
    }

    // substitute AVG before constructing AggregateInfo
    Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer);
    ArrayList<Expr> substitutedAggs = Expr.cloneList(aggExprs, avgSMap);

    ArrayList<FunctionCallExpr> nonAvgAggExprs = Lists.newArrayList();
    TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), nonAvgAggExprs);

    // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are
    // evaluated in two phases (see AggregateInfo for more details). In particular,
    // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to
    // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S".
    // Since a group-by expression is added to the inner query it returns no rows if
    // R is empty, in which case the SUM of COUNTs will return NULL.
    // However the original COUNT(c) should have returned 0 instead of NULL in this case.
    // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if
    // i) There is no GROUP-BY clause, and
    // ii) Other DISTINCT aggregates are present.
    Expr.SubstitutionMap countAllMap = createCountAllMap(nonAvgAggExprs, analyzer);
    substitutedAggs = Expr.cloneList(nonAvgAggExprs, countAllMap);
    aggExprs.clear();
    TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs);

    try {
      createAggInfo(groupingExprsCopy, aggExprs, analyzer);
    } catch (InternalException e) {
      throw new AnalysisException(e.getMessage(), e);
    }

    // combine avg smap with the one that produces the final agg output
    AggregateInfo finalAggInfo =
        aggInfo_.getSecondPhaseDistinctAggInfo() != null
            ? aggInfo_.getSecondPhaseDistinctAggInfo()
            : aggInfo_;

    Expr.SubstitutionMap combinedSMap =
        Expr.SubstitutionMap.compose(
            Expr.SubstitutionMap.compose(avgSMap, countAllMap), finalAggInfo.getSMap());
    LOG.debug("combined smap: " + combinedSMap.debugString());

    // change select list, having and ordering exprs to point to agg output
    Expr.substituteList(resultExprs_, combinedSMap);
    LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs_));
    if (havingPred_ != null) {
      havingPred_ = havingPred_.substitute(combinedSMap);
      analyzer.registerConjuncts(havingPred_, null, false);
      LOG.debug("post-agg havingPred: " + havingPred_.debugString());
    }
    Expr.substituteList(orderingExprs, combinedSMap);
    LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs));

    // check that all post-agg exprs point to agg output
    for (int i = 0; i < selectList_.getItems().size(); ++i) {
      if (!resultExprs_.get(i).isBound(finalAggInfo.getAggTupleId())) {
        throw new AnalysisException(
            "select list expression not produced by aggregation output "
                + "(missing from GROUP BY clause?): "
                + selectList_.getItems().get(i).getExpr().toSql());
      }
    }
    if (orderByElements_ != null) {
      for (int i = 0; i < orderByElements_.size(); ++i) {
        if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) {
          throw new AnalysisException(
              "ORDER BY expression not produced by aggregation output "
                  + "(missing from GROUP BY clause?): "
                  + orderByElements_.get(i).getExpr().toSql());
        }
      }
    }
    if (havingPred_ != null) {
      if (!havingPred_.isBound(finalAggInfo.getAggTupleId())) {
        throw new AnalysisException(
            "HAVING clause not produced by aggregation output "
                + "(missing from GROUP BY clause?): "
                + havingClause_.toSql());
      }
    }
  }