/** * This method logs the reason for which we cannot apply the rewrite optimization. * * @return */ boolean checkIfAllRewriteCriteriaIsMet(RewriteCanApplyCtx canApplyCtx) { if (canApplyCtx.getAggFuncCnt() > 1) { LOG.debug("More than 1 agg funcs: " + "Not supported by " + getName() + " optimization."); return false; } if (canApplyCtx.isAggFuncIsNotCount()) { LOG.debug( "Agg func other than count is " + "not supported by " + getName() + " optimization."); return false; } if (canApplyCtx.isCountOnAllCols()) { LOG.debug( "Currently count function needs group by on key columns. This is a count(*) case.," + "Cannot apply this " + getName() + " optimization."); return false; } if (canApplyCtx.isCountOfOne()) { LOG.debug( "Currently count function needs group by on key columns. This is a count(1) case.," + "Cannot apply this " + getName() + " optimization."); return false; } if (canApplyCtx.isAggFuncColsFetchException()) { LOG.debug( "Got exception while locating child col refs " + "of agg func, skipping " + getName() + " optimization."); return false; } if (canApplyCtx.isWhrClauseColsFetchException()) { LOG.debug( "Got exception while locating child col refs for where clause, " + "skipping " + getName() + " optimization."); return false; } if (canApplyCtx.isSelClauseColsFetchException()) { LOG.debug( "Got exception while locating child col refs for select list, " + "skipping " + getName() + " optimization."); return false; } if (canApplyCtx.isGbyKeysFetchException()) { LOG.debug( "Got exception while locating child col refs for GroupBy key, " + "skipping " + getName() + " optimization."); return false; } return true; }
/** * This methods checks if rewrite can be applied using the index and also verifies all conditions * of the operator tree. * * @param topOp - TableScanOperator for a single the operator tree branch * @param indexes - Map of a table and list of indexes on it * @return - true if rewrite can be applied on the current branch; false otherwise * @throws SemanticException */ private boolean checkIfRewriteCanBeApplied( TableScanOperator topOp, Table baseTable, Map<Table, List<Index>> indexes) throws SemanticException { boolean canApply = false; // Context for checking if this optimization can be applied to the input query RewriteCanApplyCtx canApplyCtx = RewriteCanApplyCtx.getInstance(parseContext); Map<String, Operator<? extends Serializable>> topOps = parseContext.getTopOps(); canApplyCtx.setBaseTableName(baseTableName); canApplyCtx.populateRewriteVars(topOp); Map<Index, Set<String>> indexTableMap = getIndexToKeysMap(indexes.get(baseTable)); Iterator<Index> indexMapItr = indexTableMap.keySet().iterator(); Index index = null; while (indexMapItr.hasNext()) { // we rewrite the original query using the first valid index encountered // this can be changed if we have a better mechanism to // decide which index will produce a better rewrite index = indexMapItr.next(); canApply = canApplyCtx.isIndexUsableForQueryBranchRewrite(index, indexTableMap.get(index)); if (canApply) { canApply = checkIfAllRewriteCriteriaIsMet(canApplyCtx); // break here if any valid index is found to apply rewrite if (canApply) { // check if aggregation function is set. // If not, set it using the only indexed column if (canApplyCtx.getAggFunction() == null) { // strip of the start and end braces [...] String aggregationFunction = indexTableMap.get(index).toString(); aggregationFunction = aggregationFunction.substring(1, aggregationFunction.length() - 1); canApplyCtx.setAggFunction("_count_of_" + aggregationFunction + ""); } } break; } } indexTableName = index.getIndexTableName(); if (canApply && topOps.containsValue(topOp)) { Iterator<String> topOpNamesItr = topOps.keySet().iterator(); while (topOpNamesItr.hasNext()) { String topOpName = topOpNamesItr.next(); if (topOps.get(topOpName).equals(topOp)) { tsOpToProcess.put(topOpName, canApplyCtx); } } } if (tsOpToProcess.size() == 0) { canApply = false; } else { canApply = true; } return canApply; }
/** * Method to rewrite the input query if all optimization criteria is passed. The method iterates * over the tsOpToProcess {@link ArrayList} to apply the rewrites * * @throws SemanticException */ @SuppressWarnings("unchecked") private void rewriteOriginalQuery() throws SemanticException { Map<String, Operator<? extends Serializable>> topOpMap = (HashMap<String, Operator<? extends Serializable>>) parseContext.getTopOps().clone(); Iterator<String> tsOpItr = tsOpToProcess.keySet().iterator(); while (tsOpItr.hasNext()) { baseTableName = tsOpItr.next(); RewriteCanApplyCtx canApplyCtx = tsOpToProcess.get(baseTableName); TableScanOperator topOp = (TableScanOperator) topOpMap.get(baseTableName); RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = RewriteQueryUsingAggregateIndexCtx.getInstance( parseContext, hiveDb, indexTableName, baseTableName, canApplyCtx.getAggFunction()); rewriteQueryCtx.invokeRewriteQueryProc(topOp); parseContext = rewriteQueryCtx.getParseContext(); parseContext.setOpParseCtx( (LinkedHashMap<Operator<? extends Serializable>, OpParseContext>) rewriteQueryCtx.getOpc()); } LOG.info("Finished Rewriting query"); }
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx; for (Node node : stack) { // For table scan operator, // check ReferencedColumns to make sure that only the index column is // selected for the following operators. if (node instanceof TableScanOperator) { TableScanOperator ts = (TableScanOperator) node; canApplyCtx.setTableScanOperator(ts); List<String> selectColumns = ts.getConf().getReferencedColumns(); if (selectColumns == null || selectColumns.size() != 1) { canApplyCtx.setSelClauseColsFetchException(true); return null; } else { canApplyCtx.setIndexKey(selectColumns.get(0)); } } else if (node instanceof SelectOperator) { // For select operators in the stack, we just add them if (canApplyCtx.getSelectOperators() == null) { canApplyCtx.setSelectOperators(new ArrayList<SelectOperator>()); } canApplyCtx.getSelectOperators().add((SelectOperator) node); } else if (node instanceof GroupByOperator) { if (canApplyCtx.getGroupByOperators() == null) { canApplyCtx.setGroupByOperators(new ArrayList<GroupByOperator>()); } // According to the pre-order, // the first GroupbyOperator is the one before RS // and the second one is the one after RS GroupByOperator operator = (GroupByOperator) node; canApplyCtx.getGroupByOperators().add(operator); if (!canApplyCtx.isQueryHasGroupBy()) { canApplyCtx.setQueryHasGroupBy(true); GroupByDesc conf = operator.getConf(); List<AggregationDesc> aggrList = conf.getAggregators(); if (aggrList == null || aggrList.size() != 1 || !("count".equals(aggrList.get(0).getGenericUDAFName()))) { // In the current implementation, we make sure that only count is // in the function canApplyCtx.setAggFuncIsNotCount(true); return null; } else { List<ExprNodeDesc> para = aggrList.get(0).getParameters(); if (para == null || para.size() == 0 || para.size() > 1) { canApplyCtx.setAggParameterException(true); return null; } else { ExprNodeDesc expr = ExprNodeDescUtils.backtrack( para.get(0), operator, (Operator<OperatorDesc>) stack.get(0)); if (!(expr instanceof ExprNodeColumnDesc)) { canApplyCtx.setAggParameterException(true); return null; } } } } } } return null; }