Ejemplo n.º 1
0
  @Override
  public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;

    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(
        new RuleRegExp(
            "R1",
            "("
                + FilterOperator.getOperatorName()
                + "%"
                + ReduceSinkOperator.getOperatorName()
                + "%"
                + JoinOperator.getOperatorName()
                + "%)"),
        new JoinTransitive());

    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    TransitiveContext context = new TransitiveContext();
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    GraphWalker ogw = new LevelOrderWalker(disp, 2);

    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);

    Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();

    // insert new filter between RS and parent of RS
    for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
      ReduceSinkOperator reducer = entry.getKey();
      Operator<?> parent = reducer.getParentOperators().get(0);

      List<ExprNodeDesc> exprs = entry.getValue();
      if (parent instanceof FilterOperator) {
        exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        ((FilterOperator) parent).getConf().setPredicate(merged);
      } else {
        ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
        RowSchema parentRS = parent.getSchema();
        Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
      }
    }

    return pGraphContext;
  }
 /**
  * Overlapping part of keys should be the same between parent and child. And if child has more
  * keys than parent, non-overlapping part of keys should be backtrackable to parent.
  */
 private Integer checkExprs(
     List<ExprNodeDesc> ckeys,
     List<ExprNodeDesc> pkeys,
     ReduceSinkOperator cRS,
     ReduceSinkOperator pRS)
     throws SemanticException {
   Integer moveKeyColTo = 0;
   if (ckeys == null || ckeys.isEmpty()) {
     if (pkeys != null && !pkeys.isEmpty()) {
       moveKeyColTo = -1;
     }
   } else {
     if (pkeys == null || pkeys.isEmpty()) {
       for (ExprNodeDesc ckey : ckeys) {
         if (ExprNodeDescUtils.backtrack(ckey, cRS, pRS) == null) {
           // cKey is not present in parent
           return null;
         }
       }
       moveKeyColTo = 1;
     } else {
       moveKeyColTo = sameKeys(ckeys, pkeys, cRS, pRS);
     }
   }
   return moveKeyColTo;
 }
 // Try to infer possible sort columns in the query
 // i.e. the sequence must be pRS-SEL*-fsParent
 // Returns true if columns could be inferred, false otherwise
 private void inferSortPositions(
     Operator<? extends OperatorDesc> fsParent,
     List<Integer> sortPositions,
     List<Integer> sortOrder)
     throws SemanticException {
   // If it is not a SEL operator, we bail out
   if (!(fsParent instanceof SelectOperator)) {
     return;
   }
   SelectOperator pSel = (SelectOperator) fsParent;
   Operator<? extends OperatorDesc> parent = pSel;
   while (!(parent instanceof ReduceSinkOperator)) {
     if (parent.getNumParent() != 1 || !(parent instanceof SelectOperator)) {
       return;
     }
     parent = parent.getParentOperators().get(0);
   }
   // Backtrack SEL columns to pRS
   List<ExprNodeDesc> selColsInPRS =
       ExprNodeDescUtils.backtrack(pSel.getConf().getColList(), pSel, parent);
   ReduceSinkOperator pRS = (ReduceSinkOperator) parent;
   for (int i = 0; i < pRS.getConf().getKeyCols().size(); i++) {
     ExprNodeDesc col = pRS.getConf().getKeyCols().get(i);
     int pos = selColsInPRS.indexOf(col);
     if (pos == -1) {
       sortPositions.clear();
       sortOrder.clear();
       return;
     }
     sortPositions.add(pos);
     sortOrder.add(pRS.getConf().getOrder().charAt(i) == '+' ? 1 : 0); // 1 asc, 0 desc
   }
 }
 private boolean allStaticPartitions(
     Operator<? extends OperatorDesc> op, final DynamicPartitionCtx dynPartCtx) {
   int numDpCols = dynPartCtx.getNumDPCols();
   int numCols = op.getSchema().getColumnNames().size();
   List<String> dpCols = op.getSchema().getColumnNames().subList(numCols - numDpCols, numCols);
   if (op.getColumnExprMap() == null) {
     // find first operator upstream with valid (non-null) column expression map
     for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
       if (parent.getColumnExprMap() != null) {
         op = parent;
         break;
       }
     }
   }
   if (op.getColumnExprMap() != null) {
     for (String dpCol : dpCols) {
       ExprNodeDesc end = ExprNodeDescUtils.findConstantExprOrigin(dpCol, op);
       if (!(end instanceof ExprNodeConstantDesc)) {
         return false;
       }
     }
   } else {
     return false;
   }
   return true;
 }
  private static void pruneReduceSinkOperator(
      boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx)
      throws SemanticException {
    ReduceSinkDesc reduceConf = reduce.getConf();
    Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
    LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
    RowResolver oldRR = cppCtx.getOpToParseCtxMap().get(reduce).getRowResolver();
    ArrayList<ColumnInfo> old_signature = oldRR.getRowSchema().getSignature();
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);

    List<String> valueColNames = reduceConf.getOutputValueColumnNames();
    ArrayList<String> newValueColNames = new ArrayList<String>();

    List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
    List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
    ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();

    for (int i = 0; i < retainFlags.length; i++) {
      String outputCol = valueColNames.get(i);
      ExprNodeDesc outputColExpr = valueExprs.get(i);
      if (!retainFlags[i]) {
        String[] nm = oldRR.reverseLookup(outputCol);
        if (nm == null) {
          outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
          nm = oldRR.reverseLookup(outputCol);
        }

        // In case there are multiple columns referenced to the same column name, we won't
        // do row resolve once more because the ColumnInfo in row resolver is already removed
        if (nm == null) {
          continue;
        }

        // Only remove information of a column if it is not a key,
        // i.e. this column is not appearing in keyExprs of the RS
        if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
          ColumnInfo colInfo = oldRR.getFieldMap(nm[0]).remove(nm[1]);
          oldRR.getInvRslvMap().remove(colInfo.getInternalName());
          oldMap.remove(outputCol);
          signature.remove(colInfo);
        }

      } else {
        newValueColNames.add(outputCol);
        newValueExprs.add(outputColExpr);
      }
    }

    oldRR.getRowSchema().setSignature(signature);
    reduce.getSchema().setSignature(signature);
    reduceConf.setOutputValueColumnNames(newValueColNames);
    reduceConf.setValueCols(newValueExprs);
    TableDesc newValueTable =
        PlanUtils.getReduceValueTableDesc(
            PlanUtils.getFieldSchemasFromColumnList(
                reduceConf.getValueCols(), newValueColNames, 0, ""));
    reduceConf.setValueSerializeInfo(newValueTable);
    LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
  }
Ejemplo n.º 6
0
 // check same filter exists already
 private boolean filterExists(ReduceSinkOperator target, ExprNodeDesc replaced) {
   Operator<?> operator = target.getParentOperators().get(0);
   for (; operator instanceof FilterOperator; operator = operator.getParentOperators().get(0)) {
     ExprNodeDesc predicate = ((FilterOperator) operator).getConf().getPredicate();
     if (ExprNodeDescUtils.containsPredicate(predicate, replaced)) {
       return true;
     }
   }
   return false;
 }
Ejemplo n.º 7
0
    @Override
    public Object process(
        Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
        throws SemanticException {
      @SuppressWarnings("unchecked")
      CommonJoinOperator<JoinDesc> join = (CommonJoinOperator) nd;
      ReduceSinkOperator source = (ReduceSinkOperator) stack.get(stack.size() - 2);
      FilterOperator filter = (FilterOperator) stack.get(stack.size() - 3);
      int srcPos = join.getParentOperators().indexOf(source);

      TransitiveContext context = (TransitiveContext) procCtx;
      Map<CommonJoinOperator, int[][]> filterPropagates = context.getFilterPropagates();
      Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();

      int[][] targets = filterPropagates.get(join);
      if (targets == null) {
        filterPropagates.put(join, targets = getTargets(join));
      }

      List<Operator<? extends OperatorDesc>> parents = join.getParentOperators();
      for (int targetPos : targets[srcPos]) {
        ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos);
        List<ExprNodeDesc> sourceKeys = source.getConf().getKeyCols();
        List<ExprNodeDesc> targetKeys = target.getConf().getKeyCols();

        ExprNodeDesc predicate = filter.getConf().getPredicate();
        ExprNodeDesc replaced = ExprNodeDescUtils.replace(predicate, sourceKeys, targetKeys);
        if (replaced != null && !filterExists(target, replaced)) {
          List<ExprNodeDesc> prev = newFilters.get(target);
          if (prev == null) {
            newFilters.put(target, ExprNodeDescUtils.split(replaced));
          } else {
            ExprNodeDescUtils.split(replaced, prev);
          }
        }
      }
      return null;
    }
 // backtrack key exprs of child to parent and compare it with parent's
 protected Integer sameKeys(
     List<ExprNodeDesc> cexprs, List<ExprNodeDesc> pexprs, Operator<?> child, Operator<?> parent)
     throws SemanticException {
   int common = Math.min(cexprs.size(), pexprs.size());
   int limit = Math.max(cexprs.size(), pexprs.size());
   int i = 0;
   for (; i < common; i++) {
     ExprNodeDesc pexpr = pexprs.get(i);
     ExprNodeDesc cexpr = ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent);
     if (cexpr == null || !pexpr.isSame(cexpr)) {
       return null;
     }
   }
   for (; i < limit; i++) {
     if (cexprs.size() > pexprs.size()) {
       if (ExprNodeDescUtils.backtrack(cexprs.get(i), child, parent) == null) {
         // cKey is not present in parent
         return null;
       }
     }
   }
   return Integer.valueOf(cexprs.size()).compareTo(pexprs.size());
 }
  private ExprNodeDesc analyzeExpr(
      ExprNodeGenericFuncDesc expr,
      List<IndexSearchCondition> searchConditions,
      Object... nodeOutputs) {

    if (FunctionRegistry.isOpAnd(expr)) {
      assert (nodeOutputs.length == 2);
      ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
      ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1];
      if (residual1 == null) {
        return residual2;
      }
      if (residual2 == null) {
        return residual1;
      }
      List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
      residuals.add(residual1);
      residuals.add(residual2);
      return new ExprNodeGenericFuncDesc(
          TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals);
    }

    GenericUDF genericUDF = expr.getGenericUDF();
    if (!(genericUDF instanceof GenericUDFBaseCompare)) {
      return expr;
    }
    ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
    ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
    // We may need to peel off the GenericUDFBridge that is added by CBO or user
    if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
      expr1 = getColumnExpr(expr1);
      expr2 = getColumnExpr(expr2);
    }

    ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
    if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
      return expr;
    }

    ExprNodeColumnDesc columnDesc;
    ExprNodeConstantDesc constantDesc;
    if (extracted[0] instanceof ExprNodeConstantDesc) {
      genericUDF = genericUDF.flip();
      columnDesc = (ExprNodeColumnDesc) extracted[1];
      constantDesc = (ExprNodeConstantDesc) extracted[0];
    } else {
      columnDesc = (ExprNodeColumnDesc) extracted[0];
      constantDesc = (ExprNodeConstantDesc) extracted[1];
    }

    String udfName = genericUDF.getUdfName();
    if (!udfNames.contains(genericUDF.getUdfName())) {
      return expr;
    }

    if (!allowedColumnNames.contains(columnDesc.getColumn())) {
      return expr;
    }

    String[] fields = null;
    if (extracted.length > 2) {
      ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
      if (!isValidField(fieldDesc)) {
        return expr;
      }
      fields = ExprNodeDescUtils.extractFields(fieldDesc);
    }

    // We also need to update the expr so that the index query can be generated.
    // Note that, hive does not support UDFToDouble etc in the query text.
    List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
    list.add(expr1);
    list.add(expr2);
    expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);

    searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr, fields));

    // we converted the expression to a search condition, so
    // remove it from the residual predicate
    return fields == null ? null : expr;
  }
Ejemplo n.º 10
0
  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    try {

      numRows = 0;
      cntr = 1;
      logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);

      statsMap.put(getCounterName(Counter.RECORDS_OUT_INTERMEDIATE, hconf), recordCounter);

      List<ExprNodeDesc> keys = conf.getKeyCols();

      if (isLogDebugEnabled) {
        LOG.debug("keys size is " + keys.size());
        for (ExprNodeDesc k : keys) {
          LOG.debug("Key exprNodeDesc " + k.getExprString());
        }
      }

      keyEval = new ExprNodeEvaluator[keys.size()];
      int i = 0;
      for (ExprNodeDesc e : keys) {
        keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
      }

      numDistributionKeys = conf.getNumDistributionKeys();
      distinctColIndices = conf.getDistinctColumnIndices();
      numDistinctExprs = distinctColIndices.size();

      valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
      i = 0;
      for (ExprNodeDesc e : conf.getValueCols()) {
        valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
      }

      partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
      i = 0;
      for (ExprNodeDesc e : conf.getPartitionCols()) {
        int index = ExprNodeDescUtils.indexOf(e, keys);
        partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
      }

      if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
        bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];

        i = 0;
        for (ExprNodeDesc e : conf.getBucketCols()) {
          int index = ExprNodeDescUtils.indexOf(e, keys);
          bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
        }

        buckColIdxInKey = conf.getPartitionCols().size();
      }

      tag = conf.getTag();
      tagByte[0] = (byte) tag;
      skipTag = conf.getSkipTag();
      if (isLogInfoEnabled) {
        LOG.info("Using tag = " + tag);
      }

      TableDesc keyTableDesc = conf.getKeySerializeInfo();
      keySerializer = (Serializer) keyTableDesc.getDeserializerClass().newInstance();
      keySerializer.initialize(null, keyTableDesc.getProperties());
      keyIsText = keySerializer.getSerializedClass().equals(Text.class);

      TableDesc valueTableDesc = conf.getValueSerializeInfo();
      valueSerializer = (Serializer) valueTableDesc.getDeserializerClass().newInstance();
      valueSerializer.initialize(null, valueTableDesc.getProperties());

      int limit = conf.getTopN();
      float memUsage = conf.getTopNMemoryUsage();

      if (limit >= 0 && memUsage > 0) {
        reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : reducerHash;
        reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this);
      }

      useUniformHash = conf.getReducerTraits().contains(UNIFORM);

      firstRow = true;
    } catch (Exception e) {
      String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
      LOG.error(msg, e);
      throw new RuntimeException(e);
    }
  }
Ejemplo n.º 11
0
  private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException {
    ExprNodeDesc tmpExprNode;
    RexNode tmpRN;

    List<RexNode> childRexNodeLst = new LinkedList<RexNode>();
    Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();

    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
    TypeInfo tgtDT = null;
    GenericUDF tgtUdf = func.getGenericUDF();

    boolean isNumeric =
        (tgtUdf instanceof GenericUDFBaseBinary
            && func.getTypeInfo().getCategory() == Category.PRIMITIVE
            && (PrimitiveGrouping.NUMERIC_GROUP
                == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
                    ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
    boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;

    if (isNumeric) {
      tgtDT = func.getTypeInfo();

      assert func.getChildren().size() == 2;
      // TODO: checking 2 children is useless, compare already does that.
    } else if (isCompare && (func.getChildren().size() == 2)) {
      tgtDT =
          FunctionRegistry.getCommonClassForComparison(
              func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    }

    for (ExprNodeDesc childExpr : func.getChildren()) {
      tmpExprNode = childExpr;
      if (tgtDT != null
          && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
        if (isCompare) {
          // For compare, we will convert requisite children
          tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
        } else if (isNumeric) {
          // For numeric, we'll do minimum necessary cast - if we cast to the type
          // of expression, bad things will happen.
          PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
          tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
        } else {
          throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
        }
      }
      argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
      tmpRN = convert(tmpExprNode);
      childRexNodeLst.add(tmpRN);
    }

    // See if this is an explicit cast.
    RexNode expr = null;
    RelDataType retType = null;
    expr = handleExplicitCast(func, childRexNodeLst);

    if (expr == null) {
      // This is not a cast; process the function.
      retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
      SqlOperator calciteOp =
          SqlFunctionConverter.getCalciteOperator(
              func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
      expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
    } else {
      retType = expr.getType();
    }

    // TODO: Cast Function in Calcite have a bug where it infer type on cast throws
    // an exception
    if (flattenExpr
        && (expr instanceof RexCall)
        && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
      RexCall call = (RexCall) expr;
      expr =
          cluster
              .getRexBuilder()
              .makeCall(
                  retType,
                  call.getOperator(),
                  RexUtil.flatten(call.getOperands(), call.getOperator()));
    }

    return expr;
  }
Ejemplo n.º 12
0
    /**
     * Current RSDedup remove/replace child RS. For key columns, sorting order, and the number of
     * reducers, copy more specific part of configurations of child RS to that of parent RS. For
     * partitioning columns, if both child RS and parent RS have been assigned partitioning columns,
     * we will choose the more general partitioning columns. If parent RS has not been assigned any
     * partitioning column, we will use partitioning columns (if exist) of child RS.
     */
    protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer)
        throws SemanticException {
      int[] result = checkStatus(cRS, pRS, minReducer);
      if (result == null) {
        return false;
      }

      if (result[0] > 0) {
        // The sorting columns of the child RS are more specific than
        // those of the parent RS. Assign sorting columns of the child RS
        // to the parent RS.
        List<ExprNodeDesc> childKCs = cRS.getConf().getKeyCols();
        pRS.getConf().setKeyCols(ExprNodeDescUtils.backtrack(childKCs, cRS, pRS));
      }

      if (result[1] < 0) {
        // The partitioning columns of the parent RS are more specific than
        // those of the child RS.
        List<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
        if (childPCs != null && !childPCs.isEmpty()) {
          // If partitioning columns of the child RS are assigned,
          // assign these to the partitioning columns of the parent RS.
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      } else if (result[1] > 0) {
        // The partitioning columns of the child RS are more specific than
        // those of the parent RS.
        List<ExprNodeDesc> parentPCs = pRS.getConf().getPartitionCols();
        if (parentPCs == null || parentPCs.isEmpty()) {
          // If partitioning columns of the parent RS are not assigned,
          // assign partitioning columns of the child RS to the parent RS.
          ArrayList<ExprNodeDesc> childPCs = cRS.getConf().getPartitionCols();
          pRS.getConf().setPartitionCols(ExprNodeDescUtils.backtrack(childPCs, cRS, pRS));
        }
      }

      if (result[2] > 0) {
        // The sorting order of the child RS is more specific than
        // that of the parent RS. Assign the sorting order of the child RS
        // to the parent RS.
        if (result[0] <= 0) {
          // Sorting columns of the parent RS are more specific than those of the
          // child RS but Sorting order of the child RS is more specific than
          // that of the parent RS.
          throw new SemanticException(
              "Sorting columns and order don't match. "
                  + "Try set "
                  + HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION
                  + "=false;");
        }
        pRS.getConf().setOrder(cRS.getConf().getOrder());
      }

      if (result[3] > 0) {
        // The number of reducers of the child RS is more specific than
        // that of the parent RS. Assign the number of reducers of the child RS
        // to the parent RS.
        pRS.getConf().setNumReducers(cRS.getConf().getNumReducers());
      }

      if (result[4] > 0) {
        // This case happens only when pRS key is empty in which case we can use
        // number of distribution keys and key serialization info from cRS
        pRS.getConf().setNumDistributionKeys(cRS.getConf().getNumDistributionKeys());
        List<FieldSchema> fields =
            PlanUtils.getFieldSchemasFromColumnList(pRS.getConf().getKeyCols(), "reducesinkkey");
        TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, pRS.getConf().getOrder());
        ArrayList<String> outputKeyCols = Lists.newArrayList();
        for (int i = 0; i < fields.size(); i++) {
          outputKeyCols.add(fields.get(i).getName());
        }
        pRS.getConf().setOutputKeyColumnNames(outputKeyCols);
        pRS.getConf().setKeySerializeInfo(keyTable);
      }
      return true;
    }
Ejemplo n.º 13
0
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
     throws SemanticException {
   RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx) ctx;
   for (Node node : stack) {
     // For table scan operator,
     // check ReferencedColumns to make sure that only the index column is
     // selected for the following operators.
     if (node instanceof TableScanOperator) {
       TableScanOperator ts = (TableScanOperator) node;
       canApplyCtx.setTableScanOperator(ts);
       List<String> selectColumns = ts.getConf().getReferencedColumns();
       if (selectColumns == null || selectColumns.size() != 1) {
         canApplyCtx.setSelClauseColsFetchException(true);
         return null;
       } else {
         canApplyCtx.setIndexKey(selectColumns.get(0));
       }
     } else if (node instanceof SelectOperator) {
       // For select operators in the stack, we just add them
       if (canApplyCtx.getSelectOperators() == null) {
         canApplyCtx.setSelectOperators(new ArrayList<SelectOperator>());
       }
       canApplyCtx.getSelectOperators().add((SelectOperator) node);
     } else if (node instanceof GroupByOperator) {
       if (canApplyCtx.getGroupByOperators() == null) {
         canApplyCtx.setGroupByOperators(new ArrayList<GroupByOperator>());
       }
       // According to the pre-order,
       // the first GroupbyOperator is the one before RS
       // and the second one is the one after RS
       GroupByOperator operator = (GroupByOperator) node;
       canApplyCtx.getGroupByOperators().add(operator);
       if (!canApplyCtx.isQueryHasGroupBy()) {
         canApplyCtx.setQueryHasGroupBy(true);
         GroupByDesc conf = operator.getConf();
         List<AggregationDesc> aggrList = conf.getAggregators();
         if (aggrList == null
             || aggrList.size() != 1
             || !("count".equals(aggrList.get(0).getGenericUDAFName()))) {
           // In the current implementation, we make sure that only count is
           // in the function
           canApplyCtx.setAggFuncIsNotCount(true);
           return null;
         } else {
           List<ExprNodeDesc> para = aggrList.get(0).getParameters();
           if (para == null || para.size() == 0 || para.size() > 1) {
             canApplyCtx.setAggParameterException(true);
             return null;
           } else {
             ExprNodeDesc expr =
                 ExprNodeDescUtils.backtrack(
                     para.get(0), operator, (Operator<OperatorDesc>) stack.get(0));
             if (!(expr instanceof ExprNodeColumnDesc)) {
               canApplyCtx.setAggParameterException(true);
               return null;
             }
           }
         }
       }
     }
   }
   return null;
 }