public ExprNodeGenericFuncEvaluator(ExprNodeGenericFuncDesc expr) { this.expr = expr; children = new ExprNodeEvaluator[expr.getChildExprs().size()]; isEager = false; for (int i = 0; i < children.length; i++) { ExprNodeDesc child = expr.getChildExprs().get(i); ExprNodeEvaluator nodeEvaluator = ExprNodeEvaluatorFactory.get(child); children[i] = nodeEvaluator; // If we have eager evaluators anywhere below us, then we are eager too. if (nodeEvaluator instanceof ExprNodeGenericFuncEvaluator) { if (((ExprNodeGenericFuncEvaluator) nodeEvaluator).isEager) { isEager = true; } // Base case: we are eager if a child is stateful GenericUDF childUDF = ((ExprNodeGenericFuncDesc) child).getGenericUDF(); if (FunctionRegistry.isStateful(childUDF)) { isEager = true; } } } deferredChildren = new GenericUDF.DeferredObject[expr.getChildExprs().size()]; for (int i = 0; i < deferredChildren.length; i++) { if (isEager) { deferredChildren[i] = new EagerExprObject(children[i]); } else { deferredChildren[i] = new DeferredExprObject(children[i]); } } }
// Check if ExprNodeColumnDesc is wrapped in expr. // If so, peel off. Otherwise return itself. private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) { if (expr instanceof ExprNodeColumnDesc) { return expr; } ExprNodeGenericFuncDesc funcDesc = null; if (expr instanceof ExprNodeGenericFuncDesc) { funcDesc = (ExprNodeGenericFuncDesc) expr; } if (null == funcDesc) { return expr; } GenericUDF udf = funcDesc.getGenericUDF(); // check if its a simple cast expression. if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) { return expr.getChildren().get(0); } return expr; }
private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst) throws CalciteSemanticException { RexNode castExpr = null; if (childRexNodeLst != null && childRexNodeLst.size() == 1) { GenericUDF udf = func.getGenericUDF(); if ((udf instanceof GenericUDFToChar) || (udf instanceof GenericUDFToVarchar) || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { castExpr = cluster .getRexBuilder() .makeAbstractCast( TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), childRexNodeLst.get(0)); } } return castExpr; }
@Override public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException { // Initialize all children first ObjectInspector[] childrenOIs = new ObjectInspector[children.length]; for (int i = 0; i < children.length; i++) { childrenOIs[i] = children[i].initialize(rowInspector); } genericUDF = expr.getGenericUDF(); if (isEager && ((genericUDF instanceof GenericUDFCase) || (genericUDF instanceof GenericUDFWhen))) { throw new HiveException("Stateful expressions cannot be used inside of CASE"); } this.outputOI = genericUDF.initializeAndFoldConstants(childrenOIs); return this.outputOI; }
/** * If the genericUDF is a base comparison, it returns an integer based on the result of comparing * the two sides of the UDF, like the compareTo method in Comparable. * * <p>If the genericUDF is not a base comparison, or there is an error executing the comparison, * it returns null. * * @param row * @return * @throws HiveException */ public Integer compare(Object row) throws HiveException { if (!expr.isSortedExpr() || !(genericUDF instanceof GenericUDFBaseCompare)) { for (ExprNodeEvaluator evaluator : children) { if (evaluator instanceof ExprNodeGenericFuncEvaluator) { Integer comparison = ((ExprNodeGenericFuncEvaluator) evaluator).compare(row); if (comparison != null) { return comparison; } } } return null; } rowObject = row; if (isEager) { for (int i = 0; i < deferredChildren.length; i++) { ((EagerExprObject) deferredChildren[i]).evaluate(); } } return ((GenericUDFBaseCompare) genericUDF).compare(deferredChildren); }
private ExprNodeDesc analyzeExpr( ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) { if (FunctionRegistry.isOpAnd(expr)) { assert (nodeOutputs.length == 2); ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0]; ExprNodeDesc residual2 = (ExprNodeDesc) nodeOutputs[1]; if (residual1 == null) { return residual2; } if (residual2 == null) { return residual1; } List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>(); residuals.add(residual1); residuals.add(residual2); return new ExprNodeGenericFuncDesc( TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals); } GenericUDF genericUDF = expr.getGenericUDF(); if (!(genericUDF instanceof GenericUDFBaseCompare)) { return expr; } ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0]; ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1]; // We may need to peel off the GenericUDFBridge that is added by CBO or user if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) { expr1 = getColumnExpr(expr1); expr2 = getColumnExpr(expr2); } ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2); if (extracted == null || (extracted.length > 2 && !acceptsFields)) { return expr; } ExprNodeColumnDesc columnDesc; ExprNodeConstantDesc constantDesc; if (extracted[0] instanceof ExprNodeConstantDesc) { genericUDF = genericUDF.flip(); columnDesc = (ExprNodeColumnDesc) extracted[1]; constantDesc = (ExprNodeConstantDesc) extracted[0]; } else { columnDesc = (ExprNodeColumnDesc) extracted[0]; constantDesc = (ExprNodeConstantDesc) extracted[1]; } String udfName = genericUDF.getUdfName(); if (!udfNames.contains(genericUDF.getUdfName())) { return expr; } if (!allowedColumnNames.contains(columnDesc.getColumn())) { return expr; } String[] fields = null; if (extracted.length > 2) { ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2]; if (!isValidField(fieldDesc)) { return expr; } fields = ExprNodeDescUtils.extractFields(fieldDesc); } // We also need to update the expr so that the index query can be generated. // Note that, hive does not support UDFToDouble etc in the query text. List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>(); list.add(expr1); list.add(expr2); expr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list); searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, expr, fields)); // we converted the expression to a search condition, so // remove it from the residual predicate return fields == null ? null : expr; }
/** * Construct the filter expression from the skewed keys and skewed values. If the skewed join * keys are (k1), and (k1,k3) with the skewed values (1,2) and ((2,3),(4,5)) respectively, the * filter expression would be: (k1=1) or (k1=2) or ((k1=2) and (k3=3)) or ((k1=4) and (k3=5)). */ private ExprNodeDesc constructFilterExpr( Map<List<ExprNodeDesc>, List<List<String>>> skewedValuesMap, boolean skewed) { ExprNodeDesc finalExprNodeDesc = null; try { for (Map.Entry<List<ExprNodeDesc>, List<List<String>>> mapEntry : skewedValuesMap.entrySet()) { List<ExprNodeDesc> keyCols = mapEntry.getKey(); List<List<String>> skewedValuesList = mapEntry.getValue(); for (List<String> skewedValues : skewedValuesList) { int keyPos = 0; ExprNodeDesc currExprNodeDesc = null; // Make the following condition: all the values match for all the columns for (String skewedValue : skewedValues) { List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); // We have ensured that the keys are columns ExprNodeColumnDesc keyCol = (ExprNodeColumnDesc) keyCols.get(keyPos).clone(); keyPos++; children.add(keyCol); // Convert the constants available as strings to the corresponding objects children.add(createConstDesc(skewedValue, keyCol)); ExprNodeGenericFuncDesc expr = null; // Create the equality condition expr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), children); if (currExprNodeDesc == null) { currExprNodeDesc = expr; } else { // If there are previous nodes, then AND the current node with the previous one List<ExprNodeDesc> childrenAND = new ArrayList<ExprNodeDesc>(); childrenAND.add(currExprNodeDesc); childrenAND.add(expr); currExprNodeDesc = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), childrenAND); } } // If there are more than one skewed values, // then OR the current node with the previous one if (finalExprNodeDesc == null) { finalExprNodeDesc = currExprNodeDesc; } else { List<ExprNodeDesc> childrenOR = new ArrayList<ExprNodeDesc>(); childrenOR.add(finalExprNodeDesc); childrenOR.add(currExprNodeDesc); finalExprNodeDesc = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), childrenOR); } } } // Add a NOT operator in the beginning (this is for the cloned operator because we // want the values which are not skewed if (skewed == false) { List<ExprNodeDesc> childrenNOT = new ArrayList<ExprNodeDesc>(); childrenNOT.add(finalExprNodeDesc); finalExprNodeDesc = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), childrenNOT); } } catch (UDFArgumentException e) { // Ignore the exception because we are not comparing Long vs. String here. // There should never be an exception assert false; } return finalExprNodeDesc; }
private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException { ExprNodeDesc tmpExprNode; RexNode tmpRN; List<RexNode> childRexNodeLst = new LinkedList<RexNode>(); Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder(); // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. TypeInfo tgtDT = null; GenericUDF tgtUdf = func.getGenericUDF(); boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; if (isNumeric) { tgtDT = func.getTypeInfo(); assert func.getChildren().size() == 2; // TODO: checking 2 children is useless, compare already does that. } else if (isCompare && (func.getChildren().size() == 2)) { tgtDT = FunctionRegistry.getCommonClassForComparison( func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } for (ExprNodeDesc childExpr : func.getChildren()) { tmpExprNode = childExpr; if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { if (isCompare) { // For compare, we will convert requisite children tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); } else if (isNumeric) { // For numeric, we'll do minimum necessary cast - if we cast to the type // of expression, bad things will happen. PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT); tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType); } else { throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare"); } } argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); } // See if this is an explicit cast. RexNode expr = null; RelDataType retType = null; expr = handleExplicitCast(func, childRexNodeLst); if (expr == null) { // This is not a cast; process the function. retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator( func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst); } else { retType = expr.getType(); } // TODO: Cast Function in Calcite have a bug where it infer type on cast throws // an exception if (flattenExpr && (expr instanceof RexCall) && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { RexCall call = (RexCall) expr; expr = cluster .getRexBuilder() .makeCall( retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } return expr; }