/** Populates baseTblSmap_ with our combined inline view smap and creates baseTblResultExprs. */ protected void resolveInlineViewRefs(Analyzer analyzer) { // Gather the inline view substitution maps from the enclosed inline views for (TableRef tblRef : tableRefs_) { if (tblRef instanceof InlineViewRef) { InlineViewRef inlineViewRef = (InlineViewRef) tblRef; baseTblSmap_ = Expr.SubstitutionMap.combine(baseTblSmap_, inlineViewRef.getBaseTblSmap()); } } baseTblResultExprs_ = Expr.cloneList(resultExprs_, baseTblSmap_); LOG.trace("baseTblSmap_: " + baseTblSmap_.debugString()); LOG.trace("resultExprs: " + Expr.debugString(resultExprs_)); LOG.trace("baseTblResultExprs: " + Expr.debugString(baseTblResultExprs_)); }
/** Build smap AVG -> SUM/COUNT; assumes that select list and having clause have been analyzed. */ private Expr.SubstitutionMap createAvgSMap(ArrayList<AggregateExpr> aggExprs, Analyzer analyzer) throws AnalysisException { Expr.SubstitutionMap result = new Expr.SubstitutionMap(); for (AggregateExpr aggExpr : aggExprs) { if (aggExpr.getOp() != AggregateExpr.Operator.AVG) { continue; } // Transform avg(TIMESTAMP) to cast(avg(cast(TIMESTAMP as DOUBLE)) as TIMESTAMP) CastExpr inCastExpr = null; if (aggExpr.getChild(0).type == PrimitiveType.TIMESTAMP) { inCastExpr = new CastExpr(PrimitiveType.DOUBLE, aggExpr.getChild(0).clone(), false); } AggregateExpr sumExpr = new AggregateExpr( AggregateExpr.Operator.SUM, false, aggExpr.isDistinct(), Lists.newArrayList( aggExpr.getChild(0).type == PrimitiveType.TIMESTAMP ? inCastExpr : aggExpr.getChild(0).clone())); AggregateExpr countExpr = new AggregateExpr( AggregateExpr.Operator.COUNT, false, aggExpr.isDistinct(), Lists.newArrayList(aggExpr.getChild(0).clone())); ArithmeticExpr divExpr = new ArithmeticExpr(ArithmeticExpr.Operator.DIVIDE, sumExpr, countExpr); if (aggExpr.getChild(0).type == PrimitiveType.TIMESTAMP) { CastExpr outCastExpr = new CastExpr(PrimitiveType.TIMESTAMP, divExpr, false); outCastExpr.analyze(analyzer); result.rhs.add(outCastExpr); } else { divExpr.analyze(analyzer); result.rhs.add(divExpr); } result.lhs.add(aggExpr); } LOG.debug("avg smap: " + result.debugString()); return result; }
/* * Create a map from COUNT([ALL]) -> zeroifnull(COUNT([ALL])) if * i) There is no GROUP-BY, and * ii) There are other distinct aggregates to be evaluated. * This transformation is necessary for COUNT to correctly return 0 for empty * input relations. */ private Expr.SubstitutionMap createCountAllMap(List<FunctionCallExpr> aggExprs, Analyzer analyzer) throws AuthorizationException, AnalysisException { Expr.SubstitutionMap scalarCountAllMap = new Expr.SubstitutionMap(); if (groupingExprs_ != null && !groupingExprs_.isEmpty()) { // There are grouping expressions, so no substitution needs to be done. return scalarCountAllMap; } com.google.common.base.Predicate<FunctionCallExpr> isNotDistinctPred = new com.google.common.base.Predicate<FunctionCallExpr>() { public boolean apply(FunctionCallExpr expr) { return !expr.isDistinct(); } }; if (Iterables.all(aggExprs, isNotDistinctPred)) { // Only [ALL] aggs, so no substitution needs to be done. return scalarCountAllMap; } com.google.common.base.Predicate<FunctionCallExpr> isCountPred = new com.google.common.base.Predicate<FunctionCallExpr>() { public boolean apply(FunctionCallExpr expr) { return expr.getFnName().getFunction().equals("count"); } }; Iterable<FunctionCallExpr> countAllAggs = Iterables.filter(aggExprs, Predicates.and(isCountPred, isNotDistinctPred)); for (FunctionCallExpr countAllAgg : countAllAggs) { // Replace COUNT(ALL) with zeroifnull(COUNT(ALL)) ArrayList<Expr> zeroIfNullParam = Lists.newArrayList(countAllAgg.clone(null)); FunctionCallExpr zeroIfNull = new FunctionCallExpr("zeroifnull", zeroIfNullParam); zeroIfNull.analyze(analyzer); scalarCountAllMap.addMapping(countAllAgg, zeroIfNull); } return scalarCountAllMap; }
/** Build smap AVG -> SUM/COUNT; assumes that select list and having clause have been analyzed. */ private Expr.SubstitutionMap createAvgSMap( ArrayList<FunctionCallExpr> aggExprs, Analyzer analyzer) throws AnalysisException, AuthorizationException { Expr.SubstitutionMap result = new Expr.SubstitutionMap(); for (FunctionCallExpr aggExpr : aggExprs) { if (!aggExpr.getFnName().getFunction().equals("avg")) continue; // Transform avg(TIMESTAMP) to cast(avg(cast(TIMESTAMP as DOUBLE)) as TIMESTAMP) CastExpr inCastExpr = null; if (aggExpr.getChild(0).type_.getPrimitiveType() == PrimitiveType.TIMESTAMP) { inCastExpr = new CastExpr(ColumnType.DOUBLE, aggExpr.getChild(0).clone(null), false); } List<Expr> sumInputExprs = Lists.newArrayList( aggExpr.getChild(0).type_.getPrimitiveType() == PrimitiveType.TIMESTAMP ? inCastExpr : aggExpr.getChild(0).clone(null)); List<Expr> countInputExpr = Lists.newArrayList(aggExpr.getChild(0).clone(null)); FunctionCallExpr sumExpr = new FunctionCallExpr("sum", new FunctionParams(aggExpr.isDistinct(), sumInputExprs)); FunctionCallExpr countExpr = new FunctionCallExpr("count", new FunctionParams(aggExpr.isDistinct(), countInputExpr)); ArithmeticExpr divExpr = new ArithmeticExpr(ArithmeticExpr.Operator.DIVIDE, sumExpr, countExpr); if (aggExpr.getChild(0).type_.getPrimitiveType() == PrimitiveType.TIMESTAMP) { CastExpr outCastExpr = new CastExpr(ColumnType.TIMESTAMP, divExpr, false); outCastExpr.analyze(analyzer); result.addMapping(aggExpr, outCastExpr); } else { divExpr.analyze(analyzer); result.addMapping(aggExpr, divExpr); } } LOG.debug("avg smap: " + result.debugString()); return result; }
/** * This select block might contain inline views. Substitute all exprs (result of the analysis) of * this select block referencing any of our inlined views, including everything registered with * the analyzer. Expressions created during parsing (such as whereClause) are not touched. */ protected void substituteInlineViewExprs(Analyzer analyzer) { // Gather the inline view substitution maps from the enclosed inline views Expr.SubstitutionMap sMap = new Expr.SubstitutionMap(); for (TableRef tblRef : tableRefs) { if (tblRef instanceof InlineViewRef) { InlineViewRef inlineViewRef = (InlineViewRef) tblRef; sMap = Expr.SubstitutionMap.combine(sMap, inlineViewRef.getExprSMap()); } } // we might not have anything to substitute if (sMap.lhs.size() == 0) { return; } // Substitute select list, join clause, where clause, aggregate, order by // and this select block's analyzer expressions // select Expr.substituteList(resultExprs, sMap); // aggregation (group by and aggregation expr) if (aggInfo != null) { aggInfo.substitute(sMap); } // having if (havingPred != null) { havingPred.substitute(sMap); } // ordering if (sortInfo != null) { sortInfo.substitute(sMap); } // expressions registered inside the analyzer analyzer.substitute(sMap); }
/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. * * @param analyzer * @throws AnalysisException */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, InternalException { if (groupingExprs == null && !selectList.isDistinct() && !Expr.contains(resultExprs, AggregateExpr.class)) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs != null || Expr.contains(resultExprs, AggregateExpr.class)) && selectList.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs != null) { for (SelectListItem item : selectList.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs, null); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSMap); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(AggregateExpr.class)) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs.get(i).toSql()); } } } // analyze having clause if (havingClause != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred = havingClause.clone(aliasSMap); havingPred.analyze(analyzer); havingPred.checkReturnsBool("HAVING clause", true); analyzer.registerConjuncts(havingPred, null, false); } List<Expr> orderingExprs = null; if (sortInfo != null) { orderingExprs = sortInfo.getOrderingExprs(); } ArrayList<AggregateExpr> aggExprs = collectAggExprs(); Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); // substitute AVG before constructing AggregateInfo Expr.substituteList(aggExprs, avgSMap); ArrayList<AggregateExpr> nonAvgAggExprs = Lists.newArrayList(); Expr.collectList(aggExprs, AggregateExpr.class, nonAvgAggExprs); aggExprs = nonAvgAggExprs; createAggInfo(groupingExprsCopy, aggExprs, analyzer); // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo.getSecondPhaseDistinctAggInfo() != null ? aggInfo.getSecondPhaseDistinctAggInfo() : aggInfo; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.combine(avgSMap, finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs)); if (havingPred != null) { havingPred = havingPred.substitute(combinedSMap); LOG.debug("post-agg havingPred: " + havingPred.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList.getItems().size(); ++i) { if (!resultExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList.getItems().get(i).getExpr().toSql()); } } if (orderByElements != null) { for (int i = 0; i < orderByElements.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements.get(i).getExpr().toSql()); } } } if (havingPred != null) { if (!havingPred.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause.toSql()); } } }
/** * Analyze aggregation-relevant components of the select block (Group By clause, select list, * Order By clause), substitute AVG with SUM/COUNT, create the AggregationInfo, including the agg * output tuple, and transform all post-agg exprs given AggregationInfo's smap. */ private void analyzeAggregation(Analyzer analyzer) throws AnalysisException, AuthorizationException { if (groupingExprs_ == null && !selectList_.isDistinct() && !TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) { // we're not computing aggregates return; } // If we're computing an aggregate, we must have a FROM clause. if (tableRefs_.size() == 0) { throw new AnalysisException("aggregation without a FROM clause is not allowed"); } if ((groupingExprs_ != null || TreeNode.contains(resultExprs_, Expr.isAggregatePredicate())) && selectList_.isDistinct()) { throw new AnalysisException( "cannot combine SELECT DISTINCT with aggregate functions or GROUP BY"); } // disallow '*' and explicit GROUP BY (we can't group by '*', and if you need to // name all star-expanded cols in the group by clause you might as well do it // in the select list) if (groupingExprs_ != null) { for (SelectListItem item : selectList_.getItems()) { if (item.isStar()) { throw new AnalysisException( "cannot combine '*' in select list with GROUP BY: " + item.toSql()); } } } // analyze grouping exprs ArrayList<Expr> groupingExprsCopy = Lists.newArrayList(); if (groupingExprs_ != null) { // make a deep copy here, we don't want to modify the original // exprs during analysis (in case we need to print them later) groupingExprsCopy = Expr.cloneList(groupingExprs_); substituteOrdinals(groupingExprsCopy, "GROUP BY"); Expr ambiguousAlias = getFirstAmbiguousAlias(groupingExprsCopy); if (ambiguousAlias != null) { throw new AnalysisException( "Column " + ambiguousAlias.toSql() + " in group by clause is ambiguous"); } Expr.substituteList(groupingExprsCopy, aliasSmap_); for (int i = 0; i < groupingExprsCopy.size(); ++i) { groupingExprsCopy.get(i).analyze(analyzer); if (groupingExprsCopy.get(i).contains(Expr.isAggregatePredicate())) { // reference the original expr in the error msg throw new AnalysisException( "GROUP BY expression must not contain aggregate functions: " + groupingExprs_.get(i).toSql()); } } } // analyze having clause if (havingClause_ != null) { // substitute aliases in place (ordinals not allowed in having clause) havingPred_ = havingClause_.clone(aliasSmap_); havingPred_.analyze(analyzer); havingPred_.checkReturnsBool("HAVING clause", true); } List<Expr> orderingExprs = null; if (sortInfo_ != null) orderingExprs = sortInfo_.getOrderingExprs(); // Collect the aggregate expressions from the SELECT, HAVING and ORDER BY clauses // of this statement. ArrayList<FunctionCallExpr> aggExprs = Lists.newArrayList(); TreeNode.collect(resultExprs_, Expr.isAggregatePredicate(), aggExprs); if (havingPred_ != null) { havingPred_.collect(Expr.isAggregatePredicate(), aggExprs); } if (sortInfo_ != null) { TreeNode.collect(sortInfo_.getOrderingExprs(), Expr.isAggregatePredicate(), aggExprs); } // substitute AVG before constructing AggregateInfo Expr.SubstitutionMap avgSMap = createAvgSMap(aggExprs, analyzer); ArrayList<Expr> substitutedAggs = Expr.cloneList(aggExprs, avgSMap); ArrayList<FunctionCallExpr> nonAvgAggExprs = Lists.newArrayList(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), nonAvgAggExprs); // When DISTINCT aggregates are present, non-distinct (i.e. ALL) aggregates are // evaluated in two phases (see AggregateInfo for more details). In particular, // COUNT(c) in "SELECT COUNT(c), AGG(DISTINCT d) from R" is transformed to // "SELECT SUM(cnt) FROM (SELECT COUNT(c) as cnt from R group by d ) S". // Since a group-by expression is added to the inner query it returns no rows if // R is empty, in which case the SUM of COUNTs will return NULL. // However the original COUNT(c) should have returned 0 instead of NULL in this case. // Therefore, COUNT([ALL]) is transformed into zeroifnull(COUNT([ALL]) if // i) There is no GROUP-BY clause, and // ii) Other DISTINCT aggregates are present. Expr.SubstitutionMap countAllMap = createCountAllMap(nonAvgAggExprs, analyzer); substitutedAggs = Expr.cloneList(nonAvgAggExprs, countAllMap); aggExprs.clear(); TreeNode.collect(substitutedAggs, Expr.isAggregatePredicate(), aggExprs); try { createAggInfo(groupingExprsCopy, aggExprs, analyzer); } catch (InternalException e) { throw new AnalysisException(e.getMessage(), e); } // combine avg smap with the one that produces the final agg output AggregateInfo finalAggInfo = aggInfo_.getSecondPhaseDistinctAggInfo() != null ? aggInfo_.getSecondPhaseDistinctAggInfo() : aggInfo_; Expr.SubstitutionMap combinedSMap = Expr.SubstitutionMap.compose( Expr.SubstitutionMap.compose(avgSMap, countAllMap), finalAggInfo.getSMap()); LOG.debug("combined smap: " + combinedSMap.debugString()); // change select list, having and ordering exprs to point to agg output Expr.substituteList(resultExprs_, combinedSMap); LOG.debug("post-agg selectListExprs: " + Expr.debugString(resultExprs_)); if (havingPred_ != null) { havingPred_ = havingPred_.substitute(combinedSMap); analyzer.registerConjuncts(havingPred_, null, false); LOG.debug("post-agg havingPred: " + havingPred_.debugString()); } Expr.substituteList(orderingExprs, combinedSMap); LOG.debug("post-agg orderingExprs: " + Expr.debugString(orderingExprs)); // check that all post-agg exprs point to agg output for (int i = 0; i < selectList_.getItems().size(); ++i) { if (!resultExprs_.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "select list expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + selectList_.getItems().get(i).getExpr().toSql()); } } if (orderByElements_ != null) { for (int i = 0; i < orderByElements_.size(); ++i) { if (!orderingExprs.get(i).isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "ORDER BY expression not produced by aggregation output " + "(missing from GROUP BY clause?): " + orderByElements_.get(i).getExpr().toSql()); } } } if (havingPred_ != null) { if (!havingPred_.isBound(finalAggInfo.getAggTupleId())) { throw new AnalysisException( "HAVING clause not produced by aggregation output " + "(missing from GROUP BY clause?): " + havingClause_.toSql()); } } }