/** Extract a minimal set of WindowGroups from analyticExprs. */ private List<WindowGroup> collectWindowGroups() { List<Expr> analyticExprs = analyticInfo_.getAnalyticExprs(); List<WindowGroup> groups = Lists.newArrayList(); for (int i = 0; i < analyticExprs.size(); ++i) { AnalyticExpr analyticExpr = (AnalyticExpr) analyticExprs.get(i); // Do not generate the plan for non-materialized analytic exprs. if (!analyticInfo_.getOutputTupleDesc().getSlots().get(i).isMaterialized()) { continue; } boolean match = false; for (WindowGroup group : groups) { if (group.isCompatible(analyticExpr)) { group.add( (AnalyticExpr) analyticInfo_.getAnalyticExprs().get(i), analyticInfo_.getOutputTupleDesc().getSlots().get(i), analyticInfo_.getIntermediateTupleDesc().getSlots().get(i)); match = true; break; } } if (!match) { groups.add( new WindowGroup( (AnalyticExpr) analyticInfo_.getAnalyticExprs().get(i), analyticInfo_.getOutputTupleDesc().getSlots().get(i), analyticInfo_.getIntermediateTupleDesc().getSlots().get(i))); } } return groups; }
public AnalyticPlanner( List<TupleId> stmtTupleIds, AnalyticInfo analyticInfo, Analyzer analyzer, IdGenerator<PlanNodeId> idGenerator) { Preconditions.checkState(!stmtTupleIds.isEmpty()); TupleId lastStmtTupleId = stmtTupleIds.get(stmtTupleIds.size() - 1); Preconditions.checkState( stmtTupleIds.size() == 1 || lastStmtTupleId.equals(analyticInfo.getOutputTupleId())); stmtTupleIds_ = stmtTupleIds; analyticInfo_ = analyticInfo; analyzer_ = analyzer; idGenerator_ = idGenerator; }
/** * Create plan tree for the entire sort group, including all contained window groups. Marks the * SortNode as requiring its input to be partitioned if partitionExprs is not null (partitionExprs * represent the data partition of the entire partition group of which this sort group is a part). */ private PlanNode createSortGroupPlan( PlanNode root, SortGroup sortGroup, List<Expr> partitionExprs) throws ImpalaException { List<Expr> partitionByExprs = sortGroup.partitionByExprs; List<OrderByElement> orderByElements = sortGroup.orderByElements; ExprSubstitutionMap sortSmap = null; TupleId sortTupleId = null; TupleDescriptor bufferedTupleDesc = null; // map from input to buffered tuple ExprSubstitutionMap bufferedSmap = new ExprSubstitutionMap(); // sort on partition by (pb) + order by (ob) exprs and create pb/ob predicates if (!partitionByExprs.isEmpty() || !orderByElements.isEmpty()) { // first sort on partitionExprs (direction doesn't matter) List<Expr> sortExprs = Lists.newArrayList(partitionByExprs); List<Boolean> isAsc = Lists.newArrayList(Collections.nCopies(sortExprs.size(), new Boolean(true))); // TODO: utilize a direction and nulls/first last that has benefit // for subsequent sort groups List<Boolean> nullsFirst = Lists.newArrayList(Collections.nCopies(sortExprs.size(), new Boolean(true))); // then sort on orderByExprs for (OrderByElement orderByElement : sortGroup.orderByElements) { sortExprs.add(orderByElement.getExpr()); isAsc.add(orderByElement.isAsc()); nullsFirst.add(orderByElement.getNullsFirstParam()); } SortInfo sortInfo = createSortInfo(root, sortExprs, isAsc, nullsFirst); SortNode sortNode = new SortNode(idGenerator_.getNextId(), root, sortInfo, false, 0); // if this sort group does not have partitioning exprs, we want the sort // to be executed like a regular distributed sort if (!partitionByExprs.isEmpty()) sortNode.setIsAnalyticSort(true); if (partitionExprs != null) { // create required input partition DataPartition inputPartition = DataPartition.UNPARTITIONED; if (!partitionExprs.isEmpty()) { inputPartition = new DataPartition(TPartitionType.HASH_PARTITIONED, partitionExprs); } sortNode.setInputPartition(inputPartition); } root = sortNode; root.init(analyzer_); sortSmap = sortNode.getOutputSmap(); // create bufferedTupleDesc and bufferedSmap sortTupleId = sortNode.tupleIds_.get(0); bufferedTupleDesc = analyzer_.getDescTbl().copyTupleDescriptor(sortTupleId, "buffered-tuple"); LOG.trace("desctbl: " + analyzer_.getDescTbl().debugString()); List<SlotDescriptor> inputSlots = analyzer_.getTupleDesc(sortTupleId).getSlots(); List<SlotDescriptor> bufferedSlots = bufferedTupleDesc.getSlots(); for (int i = 0; i < inputSlots.size(); ++i) { bufferedSmap.put(new SlotRef(inputSlots.get(i)), new SlotRef(bufferedSlots.get(i))); } } // create one AnalyticEvalNode per window group for (WindowGroup windowGroup : sortGroup.windowGroups) { // Create partition-by (pb) and order-by (ob) less-than predicates between the // input tuple (the output of the preceding sort) and a buffered tuple that is // identical to the input tuple. We need a different tuple descriptor for the // buffered tuple because the generated predicates should compare two different // tuple instances from the same input stream (i.e., the predicates should be // evaluated over a row that is composed of the input and the buffered tuple). // we need to remap the pb/ob exprs to a) the sort output, b) our buffer of the // sort input Expr partitionByEq = null; if (!windowGroup.partitionByExprs.isEmpty()) { partitionByEq = createNullMatchingEquals( Expr.substituteList(windowGroup.partitionByExprs, sortSmap, analyzer_, false), sortTupleId, bufferedSmap); LOG.trace("partitionByEq: " + partitionByEq.debugString()); } Expr orderByEq = null; if (!windowGroup.orderByElements.isEmpty()) { orderByEq = createNullMatchingEquals( OrderByElement.getOrderByExprs( OrderByElement.substitute(windowGroup.orderByElements, sortSmap, analyzer_)), sortTupleId, bufferedSmap); LOG.trace("orderByEq: " + orderByEq.debugString()); } root = new AnalyticEvalNode( idGenerator_.getNextId(), root, stmtTupleIds_, windowGroup.analyticFnCalls, windowGroup.partitionByExprs, windowGroup.orderByElements, windowGroup.window, analyticInfo_.getOutputTupleDesc(), windowGroup.physicalIntermediateTuple, windowGroup.physicalOutputTuple, windowGroup.logicalToPhysicalSmap, partitionByEq, orderByEq, bufferedTupleDesc); root.init(analyzer_); } return root; }