/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. * * @param analyzer * @throws AnalysisException */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, InternalException { if (groupingExprs == null && !selectList.isDistinct() && !Expr.contains(resultExprs, AggregateExpr.class)) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs != null || Expr.contains(resultExprs, AggregateExpr.class)) && selectList.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs != null) { for (SelectListItem item : selectList.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs, null); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSMap); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(AggregateExpr.class)) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs.get(i).toSql()); } } } // analyze having clause if (havingClause != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred = havingClause.clone(aliasSMap); havingPred.analyze(analyzer); havingPred.checkReturnsBool("HAVING clause", true); analyzer.registerConjuncts(havingPred, null, false); } List<Expr> orderingExprs = null; if (sortInfo != null) { orderingExprs = sortInfo.getOrderingExprs(); } ArrayList<AggregateExpr> aggExprs = collectAggExprs(); Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); // substitute AVG before constructing AggregateInfo Expr.substituteList(aggExprs, avgSMap); ArrayList<AggregateExpr> nonAvgAggExprs = Lists.newArrayList(); Expr.collectList(aggExprs, AggregateExpr.class, nonAvgAggExprs); aggExprs = nonAvgAggExprs; createAggInfo(groupingExprsCopy, aggExprs, analyzer); // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo.getSecondPhaseDistinctAggInfo() != null ? aggInfo.getSecondPhaseDistinctAggInfo() : aggInfo; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.combine(avgSMap, finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs)); if (havingPred != null) { havingPred = havingPred.substitute(combinedSMap); LOG.debug("post-agg havingPred: " + havingPred.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList.getItems().size(); ++i) { if (!resultExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList.getItems().get(i).getExpr().toSql()); } } if (orderByElements != null) { for (int i = 0; i < orderByElements.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements.get(i).getExpr().toSql()); } } } if (havingPred != null) { if (!havingPred.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause.toSql()); } } }
/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, AuthorizationException { if (groupingExprs_ == null && !selectList_.isDistinct() && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs_.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs_ != null || TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) && selectList_.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs_ != null) { for (SelectListItem item : selectList_.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs_ != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs_); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSmap_); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs_.get(i).toSql()); } } } // analyze having clause if (havingClause_ != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred_ = havingClause_.clone(aliasSmap_); havingPred_.analyze(analyzer); havingPred_.checkReturnsBool("HAVING clause", true); } List<Expr> orderingExprs = null; if (sortInfo_ != null) orderingExprs = sortInfo_.getOrderingExprs(); // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses // of this statement. ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList(); TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs); if (havingPred_ != null) { havingPred_.collect(Expr.isAggregatePredicate(), aggExprs); } if (sortInfo_ != null) { TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs); } // substitute AVG before constructing AggregateInfo Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); ArrayList<Expr> substitutedAggs = Expr.cloneList(aggExprs, avgSMap); ArrayList<FunctionCallExpr> nonAvgAggExprs = Lists.newArrayList(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), nonAvgAggExprs); // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are // evaluated in two phases (see AggregateInfo for more details). In particular, // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S". // Since a group-by expression is added to the inner query it returns no rows if // R is empty, in which case the SUM of COUNTs will return NULL. // However the original COUNT(c) should have returned 0 instead of NULL in this case. // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if // i) There is no GROUP-BY clause, and // ii) Other DISTINCT aggregates are present. Expr.SubstitutionMap countAllMap = createCountAllMap(nonAvgAggExprs, analyzer); substitutedAggs = Expr.cloneList(nonAvgAggExprs, countAllMap); aggExprs.clear(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs); try { createAggInfo(groupingExprsCopy, aggExprs, analyzer); } catch (InternalException e) { throw new AnalysisException(e.getMessage(), e); } // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo_.getSecondPhaseDistinctAggInfo() != null ? aggInfo_.getSecondPhaseDistinctAggInfo() : aggInfo_; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.compose( Expr.SubstitutionMap.compose(avgSMap, countAllMap), finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs_, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs_)); if (havingPred_ != null) { havingPred_ = havingPred_.substitute(combinedSMap); analyzer.registerConjuncts(havingPred_, null, false); LOG.debug("post-agg havingPred: " + havingPred_.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList_.getItems().size(); ++i) { if (!resultExprs_.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList_.getItems().get(i).getExpr().toSql()); } } if (orderByElements_ != null) { for (int i = 0; i < orderByElements_.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements_.get(i).getExpr().toSql()); } } } if (havingPred_ != null) { if (!havingPred_.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause_.toSql()); } } }