/** * A normal reduce operator's rowObjectInspector looks like a struct containing nested key/value * structs that contain the column values: { key: { reducesinkkey0:int }, value: { _col0:int, * _col1:int, .. } } * * <p>While the rowObjectInspector looks the same for vectorized queries during compilation time, * within the tasks at query execution the rowObjectInspector has changed to a flatter structure * without nested key/value structs: { 'key.reducesinkkey0':int, 'value._col0':int, * 'value._col1':int, .. } * * <p>Trying to fetch 'key.reducesinkkey0' by name from the list of flattened ObjectInspectors * does not work because the '.' gets interpreted as a field member, even though it is a flattened * list of column values. This workaround converts the column name referenced in the ExprNodeDesc * from a nested field name (key.reducesinkkey0) to key_reducesinkkey0, simply by replacing '.' * with '_'. * * @param source * @return */ public static ExprNodeDesc flattenExpr(ExprNodeDesc source) { if (source instanceof ExprNodeGenericFuncDesc) { // all children expression should be resolved ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone(); List<ExprNodeDesc> newChildren = flattenExprList(function.getChildren()); for (ExprNodeDesc newChild : newChildren) { if (newChild == null) { // Could not resolve all of the function children, fail return null; } } function.setChildren(newChildren); return function; } if (source instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc column = (ExprNodeColumnDesc) source; // Create a new ColumnInfo, replacing STRUCT.COLUMN with STRUCT_COLUMN String newColumn = column.getColumn().replace('.', '_'); return new ExprNodeColumnDesc(source.getTypeInfo(), newColumn, column.getTabAlias(), false); } if (source instanceof ExprNodeFieldDesc) { // field expression should be resolved ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone(); ExprNodeDesc fieldDesc = flattenExpr(field.getDesc()); if (fieldDesc == null) { return null; } field.setDesc(fieldDesc); return field; } // constant or null expr, just return return source; }
private ObjectInspector initExprNodeEvaluator( ExprNodeEvaluator exprEval, ExprNodeDesc exprNode, ShapeDetails inpShape) throws HiveException { ObjectInspector outOI; outOI = exprEval.initialize(inpShape.getOI()); /* * if there are any LeadLag functions in this Expression Tree: - setup a * duplicate Evaluator for the 1st arg of the LLFuncDesc - initialize it * using the InputInfo provided for this Expr tree - set the duplicate * evaluator on the LLUDF instance. */ List<ExprNodeGenericFuncDesc> llFuncExprs = llInfo.getLLFuncExprsInTopExpr(exprNode); if (llFuncExprs != null) { for (ExprNodeGenericFuncDesc llFuncExpr : llFuncExprs) { ExprNodeDesc firstArg = llFuncExpr.getChildren().get(0); ExprNodeEvaluator dupExprEval = WindowingExprNodeEvaluatorFactory.get(llInfo, firstArg); dupExprEval.initialize(inpShape.getOI()); GenericUDFLeadLag llFn = (GenericUDFLeadLag) llFuncExpr.getGenericUDF(); llFn.setExprEvaluator(dupExprEval); } } return outOI; }
private static ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) { GenericUDF udf = func.getGenericUDF(); if (!FunctionRegistry.isDeterministic(udf) || FunctionRegistry.isStateful(udf)) { return null; } try { // If the UDF depends on any external resources, we can't fold because the // resources may not be available at compile time. if (udf instanceof GenericUDFBridge) { UDF internal = ReflectionUtils.newInstance(((GenericUDFBridge) udf).getUdfClass(), null); if (internal.getRequiredFiles() != null || internal.getRequiredJars() != null) { return null; } } else { if (udf.getRequiredFiles() != null || udf.getRequiredJars() != null) { return null; } } if (func.getChildren() != null) { for (ExprNodeDesc child : func.getChildren()) { if (child instanceof ExprNodeConstantDesc) { continue; } if (child instanceof ExprNodeGenericFuncDesc) { if (foldConstant((ExprNodeGenericFuncDesc) child) != null) { continue; } } return null; } } ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(func); ObjectInspector output = evaluator.initialize(null); Object constant = evaluator.evaluate(null); Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output); return new ExprNodeConstantDesc(java); } catch (Exception e) { return null; } }
public static boolean isConstant(ExprNodeDesc value) { if (value instanceof ExprNodeConstantDesc) { return true; } if (value instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) value; if (!FunctionRegistry.isDeterministic(func.getGenericUDF())) { return false; } for (ExprNodeDesc child : func.getChildren()) { if (!isConstant(child)) { return false; } } return true; } return false; }
public static ExprNodeDesc backtrack( ExprNodeDesc source, Operator<?> current, Operator<?> terminal, boolean foldExpr) throws SemanticException { Operator<?> parent = getSingleParent(current, terminal); if (parent == null) { return source; } if (source instanceof ExprNodeGenericFuncDesc) { // all children expression should be resolved ExprNodeGenericFuncDesc function = (ExprNodeGenericFuncDesc) source.clone(); List<ExprNodeDesc> children = backtrack(function.getChildren(), current, terminal, foldExpr); for (ExprNodeDesc child : children) { if (child == null) { // Could not resolve all of the function children, fail return null; } } function.setChildren(children); if (foldExpr) { // fold after replacing, if possible ExprNodeDesc foldedFunction = ConstantPropagateProcFactory.foldExpr(function); if (foldedFunction != null) { return foldedFunction; } } return function; } if (source instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc column = (ExprNodeColumnDesc) source; return backtrack(column, parent, terminal); } if (source instanceof ExprNodeFieldDesc) { // field expression should be resolved ExprNodeFieldDesc field = (ExprNodeFieldDesc) source.clone(); ExprNodeDesc fieldDesc = backtrack(field.getDesc(), current, terminal, foldExpr); if (fieldDesc == null) { return null; } field.setDesc(fieldDesc); return field; } // constant or null expr, just return return source; }