Example #1
0
  /**
   * Find all big tables from STREAMTABLE hints.
   *
   * @param joinCtx The join context
   * @return Set of all big tables
   */
  private Set<String> getBigTables(ParseContext joinCtx) {
    Set<String> bigTables = new HashSet<String>();

    for (JoinOperator joinOp : joinCtx.getJoinOps()) {
      if (joinOp.getConf().getStreamAliases() != null) {
        bigTables.addAll(joinOp.getConf().getStreamAliases());
      }
    }

    return bigTables;
  }
Example #2
0
  /**
   * Reorder the tables in a join operator appropriately (by reordering the tags of the reduces
   * sinks).
   *
   * @param joinOp The join operator to be processed
   * @param bigTables Set of all big tables
   */
  private void reorder(JoinOperator joinOp, Set<String> bigTables) {
    int count = joinOp.getParentOperators().size();

    // Find the biggest reduce sink
    int biggestPos = count - 1;
    int biggestSize = getOutputSize(joinOp.getParentOperators().get(biggestPos), bigTables);
    for (int i = 0; i < count - 1; i++) {
      int currSize = getOutputSize(joinOp.getParentOperators().get(i), bigTables);
      if (currSize > biggestSize) {
        biggestSize = currSize;
        biggestPos = i;
      }
    }

    // Reorder tags if need be
    if (biggestPos != (count - 1)) {
      Byte[] tagOrder = joinOp.getConf().getTagOrder();
      Byte temp = tagOrder[biggestPos];
      tagOrder[biggestPos] = tagOrder[count - 1];
      tagOrder[count - 1] = temp;

      // Update tags of reduce sinks
      ((ReduceSinkOperator) joinOp.getParentOperators().get(biggestPos))
          .getConf()
          .setTag(count - 1);
      ((ReduceSinkOperator) joinOp.getParentOperators().get(count - 1))
          .getConf()
          .setTag(biggestPos);
    }
  }
 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
     throws SemanticException {
   JoinOperator op = (JoinOperator) nd;
   pruneJoinOperator(ctx, op, op.getConf(), op.getColumnExprMap(), null, false);
   return null;
 }
 // pRS-pJOIN-cRS
 @Override
 public Object process(ReduceSinkOperator cRS, ReduceSinkDeduplicateProcCtx dedupCtx)
     throws SemanticException {
   JoinOperator pJoin =
       CorrelationUtilities.findPossibleParent(cRS, JoinOperator.class, dedupCtx.trustScript());
   if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
     pJoin.getConf().setFixedAsSorted(true);
     CorrelationUtilities.replaceReduceSinkWithSelectOperator(cRS, dedupCtx.getPctx(), dedupCtx);
     ReduceSinkOperator pRS =
         CorrelationUtilities.findPossibleParent(
             pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
     if (pRS != null) {
       pRS.getConf().setDeduplicated(true);
     }
     return true;
   }
   return false;
 }
 // pRS-pJOIN-cRS-cGBY
 @Override
 public Object process(
     ReduceSinkOperator cRS, GroupByOperator cGBY, ReduceSinkDeduplicateProcCtx dedupCtx)
     throws SemanticException {
   Operator<?> start = CorrelationUtilities.getStartForGroupBy(cRS, dedupCtx);
   JoinOperator pJoin =
       CorrelationUtilities.findPossibleParent(
           start, JoinOperator.class, dedupCtx.trustScript());
   if (pJoin != null && merge(cRS, pJoin, dedupCtx.minReducer())) {
     pJoin.getConf().setFixedAsSorted(true);
     CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
     ReduceSinkOperator pRS =
         CorrelationUtilities.findPossibleParent(
             pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
     if (pRS != null) {
       pRS.getConf().setDeduplicated(true);
     }
     return true;
   }
   return false;
 }
Example #6
0
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      // We should be having a tree which looks like this
      //  TS -> * -> RS -
      //                  \
      //                   -> JOIN -> ..
      //                  /
      //  TS -> * -> RS -
      //
      // We are in the join operator now.

      SkewJoinOptProcCtx ctx = (SkewJoinOptProcCtx) procCtx;
      parseContext = ctx.getpGraphContext();

      JoinOperator joinOp = (JoinOperator) nd;
      // This join has already been processed
      if (ctx.getDoneJoins().contains(joinOp)) {
        return null;
      }

      ctx.getDoneJoins().add(joinOp);

      Operator<? extends OperatorDesc> currOp = joinOp;
      boolean processSelect = false;

      // Is there a select following
      // Clone the select also. It is useful for a follow-on optimization where the union
      // followed by a select star is completely removed.
      if ((joinOp.getChildOperators().size() == 1)
          && (joinOp.getChildOperators().get(0) instanceof SelectOperator)) {
        currOp = joinOp.getChildOperators().get(0);
        processSelect = true;
      }

      List<TableScanOperator> tableScanOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOp, tableScanOpsForJoin)) {
        return null;
      }

      if ((tableScanOpsForJoin == null) || (tableScanOpsForJoin.isEmpty())) {
        return null;
      }

      // Get the skewed values in all the tables
      Map<List<ExprNodeDesc>, List<List<String>>> skewedValues =
          getSkewedValues(joinOp, tableScanOpsForJoin);

      // If there are no skewed values, nothing needs to be done
      if (skewedValues == null || skewedValues.size() == 0) {
        return null;
      }

      // After this optimization, the tree should be like:
      //  TS -> (FIL "skewed rows") * -> RS -
      //                                     \
      //                                       ->   JOIN
      //                                     /           \
      //  TS -> (FIL "skewed rows") * -> RS -             \
      //                                                   \
      //                                                     ->  UNION -> ..
      //                                                   /
      //  TS -> (FIL "no skewed rows") * -> RS -          /
      //                                        \        /
      //                                         -> JOIN
      //                                        /
      //  TS -> (FIL "no skewed rows") * -> RS -
      //

      // Create a clone of the operator
      Operator<? extends OperatorDesc> currOpClone;
      try {
        currOpClone = currOp.clone();
        insertRowResolvers(currOp, currOpClone, ctx);
      } catch (CloneNotSupportedException e) {
        LOG.debug("Operator tree could not be cloned");
        return null;
      }

      JoinOperator joinOpClone;
      if (processSelect) {
        joinOpClone = (JoinOperator) (currOpClone.getParentOperators().get(0));
      } else {
        joinOpClone = (JoinOperator) currOpClone;
      }
      joinOpClone.getConf().cloneQBJoinTreeProps(joinOp.getConf());
      parseContext.getJoinOps().add(joinOpClone);

      List<TableScanOperator> tableScanCloneOpsForJoin = new ArrayList<TableScanOperator>();
      if (!getTableScanOpsForJoin(joinOpClone, tableScanCloneOpsForJoin)) {
        LOG.debug("Operator tree not properly cloned!");
        return null;
      }

      // Put the filter "skewed column = skewed keys" in op
      // and "skewed columns != skewed keys" in selectOpClone
      insertSkewFilter(tableScanOpsForJoin, skewedValues, true);

      insertSkewFilter(tableScanCloneOpsForJoin, skewedValues, false);

      // Update the topOps appropriately
      Map<String, Operator<? extends OperatorDesc>> topOps = getTopOps(joinOpClone);
      Map<String, Operator<? extends OperatorDesc>> origTopOps = parseContext.getTopOps();

      for (Entry<String, Operator<? extends OperatorDesc>> topOp : topOps.entrySet()) {
        TableScanOperator tso = (TableScanOperator) topOp.getValue();
        String tabAlias = tso.getConf().getAlias();
        int initCnt = 1;
        String newAlias = "subquery" + initCnt + ":" + tabAlias;
        while (origTopOps.containsKey(newAlias)) {
          initCnt++;
          newAlias = "subquery" + initCnt + ":" + tabAlias;
        }

        parseContext.getTopOps().put(newAlias, tso);
        setUpAlias(joinOp, joinOpClone, tabAlias, newAlias, tso);
      }

      // Now do a union of the select operators: selectOp and selectOpClone
      // Store the operator that follows the select after the join, we will be
      // adding this as a child to the Union later
      List<Operator<? extends OperatorDesc>> finalOps = currOp.getChildOperators();
      currOp.setChildOperators(null);
      currOpClone.setChildOperators(null);

      // Make the union operator
      List<Operator<? extends OperatorDesc>> oplist =
          new ArrayList<Operator<? extends OperatorDesc>>();
      oplist.add(currOp);
      oplist.add(currOpClone);
      Operator<? extends OperatorDesc> unionOp =
          OperatorFactory.getAndMakeChild(
              new UnionDesc(), new RowSchema(currOp.getSchema().getSignature()), oplist);

      // Introduce a select after the union
      List<Operator<? extends OperatorDesc>> unionList =
          new ArrayList<Operator<? extends OperatorDesc>>();
      unionList.add(unionOp);

      Operator<? extends OperatorDesc> selectUnionOp =
          OperatorFactory.getAndMakeChild(
              new SelectDesc(true), new RowSchema(unionOp.getSchema().getSignature()), unionList);

      // add the finalOp after the union
      selectUnionOp.setChildOperators(finalOps);
      // replace the original selectOp in the parents with selectUnionOp
      for (Operator<? extends OperatorDesc> finalOp : finalOps) {
        finalOp.replaceParent(currOp, selectUnionOp);
      }
      return null;
    }
Example #7
0
 /** Set alias in the cloned join tree */
 private static void setUpAlias(
     JoinOperator origin,
     JoinOperator cloned,
     String origAlias,
     String newAlias,
     Operator<? extends OperatorDesc> topOp) {
   cloned.getConf().getAliasToOpInfo().remove(origAlias);
   cloned.getConf().getAliasToOpInfo().put(newAlias, topOp);
   if (origin.getConf().getLeftAlias().equals(origAlias)) {
     cloned.getConf().setLeftAlias(null);
     cloned.getConf().setLeftAlias(newAlias);
   }
   replaceAlias(
       origin.getConf().getLeftAliases(),
       cloned.getConf().getLeftAliases(),
       origAlias,
       newAlias);
   replaceAlias(
       origin.getConf().getRightAliases(),
       cloned.getConf().getRightAliases(),
       origAlias,
       newAlias);
   replaceAlias(
       origin.getConf().getBaseSrc(), cloned.getConf().getBaseSrc(), origAlias, newAlias);
   replaceAlias(
       origin.getConf().getMapAliases(), cloned.getConf().getMapAliases(), origAlias, newAlias);
   replaceAlias(
       origin.getConf().getStreamAliases(),
       cloned.getConf().getStreamAliases(),
       origAlias,
       newAlias);
 }