/** Create aggInfo for the given grouping and agg exprs. */ private void createAggInfo( ArrayList<Expr> groupingExprs, ArrayList<AggregateExpr> aggExprs, Analyzer analyzer) throws AnalysisException, InternalException { if (selectList.isDistinct()) { // Create aggInfo for SELECT DISTINCT ... stmt: // - all select list items turn into grouping exprs // - there are no aggregate exprs Preconditions.checkState(groupingExprs.isEmpty()); Preconditions.checkState(aggExprs.isEmpty()); aggInfo = AggregateInfo.create(Expr.cloneList(resultExprs, null), null, null, analyzer); } else { aggInfo = AggregateInfo.create(groupingExprs, aggExprs, null, analyzer); } }
/** Marks all unassigned join predicates as well as exprs in aggInfo and sortInfo. */ @Override public void materializeRequiredSlots(Analyzer analyzer) { // Mark unassigned join predicates. Some predicates that must be evaluated by a join // can also be safely evaluated below the join (picked up by getBoundPredicates()). // Such predicates will be marked twice and that is ok. List<Expr> unassigned = analyzer.getUnassignedConjuncts(getTableRefIds(), true); List<Expr> unassignedJoinConjuncts = Lists.newArrayList(); for (Expr e : unassigned) { if (analyzer.evalByJoin(e)) unassignedJoinConjuncts.add(e); } List<Expr> baseTblJoinConjuncts = Expr.cloneList(unassignedJoinConjuncts, baseTblSmap_); materializeSlots(analyzer, baseTblJoinConjuncts); if (sortInfo_ != null) { // mark ordering exprs before marking agg exprs because the ordering exprs // may contain agg exprs that are not referenced anywhere but the ORDER BY clause List<Expr> resolvedExprs = Expr.cloneList(sortInfo_.getOrderingExprs(), baseTblSmap_); materializeSlots(analyzer, resolvedExprs); } if (aggInfo_ != null) { // mark all agg exprs needed for HAVING pred and binding predicates as materialized // before calling AggregateInfo.materializeRequiredSlots(), otherwise they won't // show up in AggregateInfo.getMaterializedAggregateExprs() ArrayList<Expr> havingConjuncts = Lists.newArrayList(); if (havingPred_ != null) havingConjuncts.add(havingPred_); // Ignore predicates bound to a group-by slot because those // are already evaluated below this agg node (e.g., in a scan). Set<SlotId> groupBySlots = Sets.newHashSet(); for (int i = 0; i < aggInfo_.getGroupingExprs().size(); ++i) { groupBySlots.add(aggInfo_.getAggTupleDesc().getSlots().get(i).getId()); } // Binding predicates are assigned to the final output tuple of the aggregation, // which is the tuple of the 2nd phase agg for distinct aggs. ArrayList<Expr> bindingPredicates = analyzer.getBoundPredicates(aggInfo_.getOutputTupleId(), groupBySlots); havingConjuncts.addAll(bindingPredicates); havingConjuncts.addAll( analyzer.getUnassignedConjuncts(aggInfo_.getOutputTupleId().asList(), false)); materializeSlots(analyzer, havingConjuncts); aggInfo_.materializeRequiredSlots(analyzer, baseTblSmap_); } }
@Override public void analyze(Analyzer analyzer) throws AnalysisException, InternalException { // start out with table refs to establish aliases TableRef leftTblRef = null; // the one to the left of tblRef for (TableRef tblRef : tableRefs) { tblRef.setLeftTblRef(leftTblRef); tblRef.analyze(analyzer); leftTblRef = tblRef; } // populate selectListExprs, aliasSMap, and colNames for (SelectListItem item : selectList.getItems()) { if (item.isStar()) { TableName tblName = item.getTblName(); if (tblName == null) { expandStar(analyzer); } else { expandStar(analyzer, tblName); } } else { resultExprs.add(item.getExpr()); SlotRef aliasRef = new SlotRef(null, item.toColumnLabel()); if (aliasSMap.lhs.contains(aliasRef)) { // If we have already seen this alias, it refers to more than one column and // therefore is ambiguous. ambiguousAliasList.add(aliasRef); } aliasSMap.lhs.add(aliasRef); aliasSMap.rhs.add(item.getExpr().clone(null)); colLabels.add(item.toColumnLabel()); } } // analyze selectListExprs Expr.analyze(resultExprs, analyzer); if (whereClause != null) { whereClause.analyze(analyzer); if (whereClause.contains(AggregateExpr.class)) { throw new AnalysisException("aggregation function not allowed in WHERE clause"); } whereClause.checkReturnsBool("WHERE clause", false); analyzer.registerConjuncts(whereClause, null, true); } createSortInfo(analyzer); analyzeAggregation(analyzer); // Substitute expressions to the underlying inline view expressions substituteInlineViewExprs(analyzer); if (aggInfo != null) { LOG.debug("post-analysis " + aggInfo.debugString()); } }
@Override public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) { // If select statement has an aggregate, then the aggregate tuple id is materialized. // Otherwise, all referenced tables are materialized. if (aggInfo != null) { tupleIdList.add(aggInfo.getAggTupleId()); } else { for (TableRef tblRef : tableRefs) { tupleIdList.addAll(tblRef.getMaterializedTupleIds()); } } }
@Override public void getMaterializedTupleIds(ArrayList<TupleId> tupleIdList) { // If select statement has an aggregate, then the aggregate tuple id is materialized. // Otherwise, all referenced tables are materialized. if (aggInfo_ != null) { // Return the tuple id produced in the final aggregation step. tupleIdList.add(aggInfo_.getOutputTupleId()); } else { for (TableRef tblRef : tableRefs_) { tupleIdList.addAll(tblRef.getMaterializedTupleIds()); } } }
/** * This select block might contain inline views. Substitute all exprs (result of the analysis) of * this select block referencing any of our inlined views, including everything registered with * the analyzer. Expressions created during parsing (such as whereClause) are not touched. */ protected void substituteInlineViewExprs(Analyzer analyzer) { // Gather the inline view substitution maps from the enclosed inline views Expr.SubstitutionMap sMap = new Expr.SubstitutionMap(); for (TableRef tblRef : tableRefs) { if (tblRef instanceof InlineViewRef) { InlineViewRef inlineViewRef = (InlineViewRef) tblRef; sMap = Expr.SubstitutionMap.combine(sMap, inlineViewRef.getExprSMap()); } } // we might not have anything to substitute if (sMap.lhs.size() == 0) { return; } // Substitute select list, join clause, where clause, aggregate, order by // and this select block's analyzer expressions // select Expr.substituteList(resultExprs, sMap); // aggregation (group by and aggregation expr) if (aggInfo != null) { aggInfo.substitute(sMap); } // having if (havingPred != null) { havingPred.substitute(sMap); } // ordering if (sortInfo != null) { sortInfo.substitute(sMap); } // expressions registered inside the analyzer analyzer.substitute(sMap); }
/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. * * @param analyzer * @throws AnalysisException */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, InternalException { if (groupingExprs == null && !selectList.isDistinct() && !Expr.contains(resultExprs, AggregateExpr.class)) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs != null || Expr.contains(resultExprs, AggregateExpr.class)) && selectList.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs != null) { for (SelectListItem item : selectList.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs, null); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSMap); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(AggregateExpr.class)) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs.get(i).toSql()); } } } // analyze having clause if (havingClause != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred = havingClause.clone(aliasSMap); havingPred.analyze(analyzer); havingPred.checkReturnsBool("HAVING clause", true); analyzer.registerConjuncts(havingPred, null, false); } List<Expr> orderingExprs = null; if (sortInfo != null) { orderingExprs = sortInfo.getOrderingExprs(); } ArrayList<AggregateExpr> aggExprs = collectAggExprs(); Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); // substitute AVG before constructing AggregateInfo Expr.substituteList(aggExprs, avgSMap); ArrayList<AggregateExpr> nonAvgAggExprs = Lists.newArrayList(); Expr.collectList(aggExprs, AggregateExpr.class, nonAvgAggExprs); aggExprs = nonAvgAggExprs; createAggInfo(groupingExprsCopy, aggExprs, analyzer); // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo.getSecondPhaseDistinctAggInfo() != null ? aggInfo.getSecondPhaseDistinctAggInfo() : aggInfo; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.combine(avgSMap, finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs)); if (havingPred != null) { havingPred = havingPred.substitute(combinedSMap); LOG.debug("post-agg havingPred: " + havingPred.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList.getItems().size(); ++i) { if (!resultExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList.getItems().get(i).getExpr().toSql()); } } if (orderByElements != null) { for (int i = 0; i < orderByElements.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements.get(i).getExpr().toSql()); } } } if (havingPred != null) { if (!havingPred.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause.toSql()); } } }
/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, AuthorizationException { if (groupingExprs_ == null && !selectList_.isDistinct() && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs_.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs_ != null || TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) && selectList_.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs_ != null) { for (SelectListItem item : selectList_.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs_ != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs_); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSmap_); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs_.get(i).toSql()); } } } // analyze having clause if (havingClause_ != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred_ = havingClause_.clone(aliasSmap_); havingPred_.analyze(analyzer); havingPred_.checkReturnsBool("HAVING clause", true); } List<Expr> orderingExprs = null; if (sortInfo_ != null) orderingExprs = sortInfo_.getOrderingExprs(); // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses // of this statement. ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList(); TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs); if (havingPred_ != null) { havingPred_.collect(Expr.isAggregatePredicate(), aggExprs); } if (sortInfo_ != null) { TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs); } // substitute AVG before constructing AggregateInfo Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); ArrayList<Expr> substitutedAggs = Expr.cloneList(aggExprs, avgSMap); ArrayList<FunctionCallExpr> nonAvgAggExprs = Lists.newArrayList(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), nonAvgAggExprs); // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are // evaluated in two phases (see AggregateInfo for more details). In particular, // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S". // Since a group-by expression is added to the inner query it returns no rows if // R is empty, in which case the SUM of COUNTs will return NULL. // However the original COUNT(c) should have returned 0 instead of NULL in this case. // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if // i) There is no GROUP-BY clause, and // ii) Other DISTINCT aggregates are present. Expr.SubstitutionMap countAllMap = createCountAllMap(nonAvgAggExprs, analyzer); substitutedAggs = Expr.cloneList(nonAvgAggExprs, countAllMap); aggExprs.clear(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs); try { createAggInfo(groupingExprsCopy, aggExprs, analyzer); } catch (InternalException e) { throw new AnalysisException(e.getMessage(), e); } // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo_.getSecondPhaseDistinctAggInfo() != null ? aggInfo_.getSecondPhaseDistinctAggInfo() : aggInfo_; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.compose( Expr.SubstitutionMap.compose(avgSMap, countAllMap), finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs_, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs_)); if (havingPred_ != null) { havingPred_ = havingPred_.substitute(combinedSMap); analyzer.registerConjuncts(havingPred_, null, false); LOG.debug("post-agg havingPred: " + havingPred_.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList_.getItems().size(); ++i) { if (!resultExprs_.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList_.getItems().get(i).getExpr().toSql()); } } if (orderByElements_ != null) { for (int i = 0; i < orderByElements_.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements_.get(i).getExpr().toSql()); } } } if (havingPred_ != null) { if (!havingPred_.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause_.toSql()); } } }
/** Creates resultExprs and baseTblResultExprs. */ @Override public void analyze(Analyzer analyzer) throws AnalysisException, AuthorizationException { super.analyze(analyzer); // Replace BaseTableRefs with ViewRefs. substituteViews(analyzer, tableRefs_); // start out with table refs to establish aliases TableRef leftTblRef = null; // the one to the left of tblRef for (TableRef tblRef : tableRefs_) { tblRef.setLeftTblRef(leftTblRef); try { tblRef.analyze(analyzer); } catch (AnalysisException e) { // Only re-throw the exception if no tables are missing. if (analyzer.getMissingTbls().isEmpty()) throw e; } leftTblRef = tblRef; } // All tableRefs have been analyzed, but at least one table was found missing. // There is no reason to proceed with analysis past this point. if (!analyzer.getMissingTbls().isEmpty()) { throw new AnalysisException("Found missing tables. Aborting analysis."); } // populate selectListExprs, aliasSMap, and colNames for (int i = 0; i < selectList_.getItems().size(); ++i) { SelectListItem item = selectList_.getItems().get(i); if (item.isStar()) { TableName tblName = item.getTblName(); if (tblName == null) { expandStar(analyzer); } else { expandStar(analyzer, tblName); } } else { // Analyze the resultExpr before generating a label to ensure enforcement // of expr child and depth limits (toColumn() label may call toSql()). item.getExpr().analyze(analyzer); resultExprs_.add(item.getExpr()); String label = item.toColumnLabel(i, analyzer.useHiveColLabels()); SlotRef aliasRef = new SlotRef(null, label); if (aliasSmap_.containsMappingFor(aliasRef)) { // If we have already seen this alias, it refers to more than one column and // therefore is ambiguous. ambiguousAliasList_.add(aliasRef); } aliasSmap_.addMapping(aliasRef, item.getExpr().clone(null)); colLabels_.add(label); } } if (whereClause_ != null) { whereClause_.analyze(analyzer); if (whereClause_.contains(Expr.isAggregatePredicate())) { throw new AnalysisException("aggregate function not allowed in WHERE clause"); } whereClause_.checkReturnsBool("WHERE clause", false); analyzer.registerConjuncts(whereClause_, null, true); } createSortInfo(analyzer); analyzeAggregation(analyzer); // Remember the SQL string before inline-view expression substitution. sqlString_ = toSql(); resolveInlineViewRefs(analyzer); if (aggInfo_ != null) LOG.debug("post-analysis " + aggInfo_.debugString()); }