private void insertRowResolvers( Operator<? extends OperatorDesc> op, Operator<? extends OperatorDesc> opClone, SkewJoinOptProcCtx ctx) { if (op instanceof TableScanOperator) { ctx.getCloneTSOpMap().put((TableScanOperator) opClone, (TableScanOperator) op); } List<Operator<? extends OperatorDesc>> parents = op.getParentOperators(); List<Operator<? extends OperatorDesc>> parentClones = opClone.getParentOperators(); if ((parents != null) && (!parents.isEmpty()) && (parentClones != null) && (!parentClones.isEmpty())) { for (int pos = 0; pos < parents.size(); pos++) { insertRowResolvers(parents.get(pos), parentClones.get(pos), ctx); } } }
@Override public Object process( Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { // We should be having a tree which looks like this // TS -> * -> RS - // \ // -> JOIN -> .. // / // TS -> * -> RS - // // We are in the join operator now. SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx; parseContext = ctx.getpGraphContext(); JoinOperator joinOp = (JoinOperator) nd; // This join has already been processed if (ctx.getDoneJoins().contains(joinOp)) { return null; } ctx.getDoneJoins().add(joinOp); Operator<? extends OperatorDesc> currOp = joinOp; boolean processSelect = false; // Is there a select following // Clone the select also. It is useful for a follow-on optimization where the union // followed by a select star is completely removed. if ((joinOp.getChildOperators().size() == 1) && (joinOp.getChildOperators().get(0) instanceof SelectOperator)) { currOp = joinOp.getChildOperators().get(0); processSelect = true; } List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>(); if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) { return null; } if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) { return null; } // Get the skewed values in all the tables Map<List<ExprNodeDesc>, List<List<String>>> skewedValues = getSkewedValues(joinOp, tableScanOpsForJoin); // If there are no skewed values, nothing needs to be done if (skewedValues == null || skewedValues.size() == 0) { return null; } // After this optimization, the tree should be like: // TS -> (FIL "skewed rows") * -> RS - // \ // -> JOIN // / \ // TS -> (FIL "skewed rows") * -> RS - \ // \ // -> UNION -> .. // / // TS -> (FIL "no skewed rows") * -> RS - / // \ / // -> JOIN // / // TS -> (FIL "no skewed rows") * -> RS - // // Create a clone of the operator Operator<? extends OperatorDesc> currOpClone; try { currOpClone = currOp.clone(); insertRowResolvers(currOp, currOpClone, ctx); } catch (CloneNotSupportedException e) { LOG.debug("Operator tree could not be cloned"); return null; } JoinOperator joinOpClone; if (processSelect) { joinOpClone = (JoinOperator) (currOpClone.getParentOperators().get(0)); } else { joinOpClone = (JoinOperator) currOpClone; } joinOpClone.getConf().cloneQBJoinTreeProps(joinOp.getConf()); parseContext.getJoinOps().add(joinOpClone); List<TableScanOperator> tableScanCloneOpsForJoin = new ArrayList<TableScanOperator>(); if (!getTableScanOpsForJoin(joinOpClone, tableScanCloneOpsForJoin)) { LOG.debug("Operator tree not properly cloned!"); return null; } // Put the filter "skewed column = skewed keys" in op // and "skewed columns != skewed keys" in selectOpClone insertSkewFilter(tableScanOpsForJoin, skewedValues, true); insertSkewFilter(tableScanCloneOpsForJoin, skewedValues, false); // Update the topOps appropriately Map<String, Operator<? extends OperatorDesc>> topOps = getTopOps(joinOpClone); Map<String, Operator<? extends OperatorDesc>> origTopOps = parseContext.getTopOps(); for (Entry<String, Operator<? extends OperatorDesc>> topOp : topOps.entrySet()) { TableScanOperator tso = (TableScanOperator) topOp.getValue(); String tabAlias = tso.getConf().getAlias(); int initCnt = 1; String newAlias = "subquery" + initCnt + ":" + tabAlias; while (origTopOps.containsKey(newAlias)) { initCnt++; newAlias = "subquery" + initCnt + ":" + tabAlias; } parseContext.getTopOps().put(newAlias, tso); setUpAlias(joinOp, joinOpClone, tabAlias, newAlias, tso); } // Now do a union of the select operators: selectOp and selectOpClone // Store the operator that follows the select after the join, we will be // adding this as a child to the Union later List<Operator<? extends OperatorDesc>> finalOps = currOp.getChildOperators(); currOp.setChildOperators(null); currOpClone.setChildOperators(null); // Make the union operator List<Operator<? extends OperatorDesc>> oplist = new ArrayList<Operator<? extends OperatorDesc>>(); oplist.add(currOp); oplist.add(currOpClone); Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild( new UnionDesc(), new RowSchema(currOp.getSchema().getSignature()), oplist); // Introduce a select after the union List<Operator<? extends OperatorDesc>> unionList = new ArrayList<Operator<? extends OperatorDesc>>(); unionList.add(unionOp); Operator<? extends OperatorDesc> selectUnionOp = OperatorFactory.getAndMakeChild( new SelectDesc(true), new RowSchema(unionOp.getSchema().getSignature()), unionList); // add the finalOp after the union selectUnionOp.setChildOperators(finalOps); // replace the original selectOp in the parents with selectUnionOp for (Operator<? extends OperatorDesc> finalOp : finalOps) { finalOp.replaceParent(currOp, selectUnionOp); } return null; }