Пример #1
    private void insertRowResolvers(
        Operator<? extends OperatorDesc> op,
        Operator<? extends OperatorDesc> opClone,
        SkewJoinOptProcCtx ctx) {

      if (op instanceof TableScanOperator) {
        ctx.getCloneTSOpMap().put((TableScanOperator) opClone, (TableScanOperator) op);

      List<Operator<? extends OperatorDesc>> parents = op.getParentOperators();
      List<Operator<? extends OperatorDesc>> parentClones = opClone.getParentOperators();
      if ((parents != null)
          && (!parents.isEmpty())
          && (parentClones != null)
          && (!parentClones.isEmpty())) {
        for (int pos = 0; pos < parents.size(); pos++) {
          insertRowResolvers(parents.get(pos), parentClones.get(pos), ctx);
Пример #2
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      // We should be having a tree which looks like this
      //  TS -> * -> RS -
      //                  \
      //                   -> JOIN -> ..
      //                  /
      //  TS -> * -> RS -
      // We are in the join operator now.

      SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx;
      parseContext = ctx.getpGraphContext();

      JoinOperator joinOp = (JoinOperator) nd;
      // This join has already been processed
      if (ctx.getDoneJoins().contains(joinOp)) {
        return null;


      Operator<? extends OperatorDesc> currOp = joinOp;
      boolean processSelect = false;

      // Is there a select following
      // Clone the select also. It is useful for a follow-on optimization where the union
      // followed by a select star is completely removed.
      if ((joinOp.getChildOperators().size() == 1)
          && (joinOp.getChildOperators().get(0) instanceof SelectOperator)) {
        currOp = joinOp.getChildOperators().get(0);
        processSelect = true;

      List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) {
        return null;

      if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) {
        return null;

      // Get the skewed values in all the tables
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValues =
          getSkewedValues(joinOp, tableScanOpsForJoin);

      // If there are no skewed values, nothing needs to be done
      if (skewedValues == null || skewedValues.size() == 0) {
        return null;

      // After this optimization, the tree should be like:
      //  TS -> (FIL "skewed rows") * -> RS -
      //                                     \
      //                                       ->   JOIN
      //                                     /           \
      //  TS -> (FIL "skewed rows") * -> RS -             \
      //                                                   \
      //                                                     ->  UNION -> ..
      //                                                   /
      //  TS -> (FIL "no skewed rows") * -> RS -          /
      //                                        \        /
      //                                         -> JOIN
      //                                        /
      //  TS -> (FIL "no skewed rows") * -> RS -

      // Create a clone of the operator
      Operator<? extends OperatorDesc> currOpClone;
      try {
        currOpClone = currOp.clone();
        insertRowResolvers(currOp, currOpClone, ctx);
      } catch (CloneNotSupportedException e) {
        LOG.debug("Operator tree could not be cloned");
        return null;

      JoinOperator joinOpClone;
      if (processSelect) {
        joinOpClone = (JoinOperator) (currOpClone.getParentOperators().get(0));
      } else {
        joinOpClone = (JoinOperator) currOpClone;

      List<TableScanOperator> tableScanCloneOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOpClone, tableScanCloneOpsForJoin)) {
        LOG.debug("Operator tree not properly cloned!");
        return null;

      // Put the filter "skewed column = skewed keys" in op
      // and "skewed columns != skewed keys" in selectOpClone
      insertSkewFilter(tableScanOpsForJoin, skewedValues, true);

      insertSkewFilter(tableScanCloneOpsForJoin, skewedValues, false);

      // Update the topOps appropriately
      Map<String, Operator<? extends OperatorDesc>> topOps = getTopOps(joinOpClone);
      Map<String, Operator<? extends OperatorDesc>> origTopOps = parseContext.getTopOps();

      for (Entry<String, Operator<? extends OperatorDesc>> topOp : topOps.entrySet()) {
        TableScanOperator tso = (TableScanOperator) topOp.getValue();
        String tabAlias = tso.getConf().getAlias();
        int initCnt = 1;
        String newAlias = "subquery" + initCnt + ":" + tabAlias;
        while (origTopOps.containsKey(newAlias)) {
          newAlias = "subquery" + initCnt + ":" + tabAlias;

        parseContext.getTopOps().put(newAlias, tso);
        setUpAlias(joinOp, joinOpClone, tabAlias, newAlias, tso);

      // Now do a union of the select operators: selectOp and selectOpClone
      // Store the operator that follows the select after the join, we will be
      // adding this as a child to the Union later
      List<Operator<? extends OperatorDesc>> finalOps = currOp.getChildOperators();

      // Make the union operator
      List<Operator<? extends OperatorDesc>> oplist =
          new ArrayList<Operator<? extends OperatorDesc>>();
      Operator<? extends OperatorDesc> unionOp =
              new UnionDesc(), new RowSchema(currOp.getSchema().getSignature()), oplist);

      // Introduce a select after the union
      List<Operator<? extends OperatorDesc>> unionList =
          new ArrayList<Operator<? extends OperatorDesc>>();

      Operator<? extends OperatorDesc> selectUnionOp =
              new SelectDesc(true), new RowSchema(unionOp.getSchema().getSignature()), unionList);

      // add the finalOp after the union
      // replace the original selectOp in the parents with selectUnionOp
      for (Operator<? extends OperatorDesc> finalOp : finalOps) {
        finalOp.replaceParent(currOp, selectUnionOp);
      return null;