@Override public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException { String uriPath = PrimitiveObjectInspectorUtils.getString( arguments[0].get(), (PrimitiveObjectInspector) argumentsOI[0]); String uriQuery = PrimitiveObjectInspectorUtils.getString( arguments[1].get(), (PrimitiveObjectInspector) argumentsOI[1]); String contentType = PrimitiveObjectInspectorUtils.getString( arguments[2].get(), (PrimitiveObjectInspector) argumentsOI[2]); String userAgent = PrimitiveObjectInspectorUtils.getString( arguments[3].get(), (PrimitiveObjectInspector) argumentsOI[3]); String rawXAnalyticsHeader = ""; if (checkForXAnalytics) { rawXAnalyticsHeader = PrimitiveObjectInspectorUtils.getString( arguments[4].get(), (PrimitiveObjectInspector) argumentsOI[4]); } return PageviewDefinition.getInstance() .isAppPageview(uriPath, uriQuery, contentType, userAgent, rawXAnalyticsHeader); }
@Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { SumLongAgg myagg = (SumLongAgg) agg; myagg.empty = false; if (isWindowingDistinct()) { throw new HiveException( "Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI); } } }
@Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert (parameters.length == 1); try { if (isEligibleValue((SumLongAgg) agg, parameters[0])) { ((SumLongAgg) agg).empty = false; ((SumLongAgg) agg).sum += PrimitiveObjectInspectorUtils.getLong(parameters[0], inputOI); } } catch (NumberFormatException e) { if (!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); } } }
@Override public List<Text> evaluate(@Nonnull final DeferredObject[] arguments) throws HiveException { result.clear(); final int size = arguments.length - 1; for (int i = 0; i < size; i++) { Object argument = arguments[i + 1].get(); if (argument == null) { continue; } PrimitiveObjectInspector oi = inputOIs[i]; String s = PrimitiveObjectInspectorUtils.getString(argument, oi); if (s.isEmpty()) { continue; } // categorical feature representation String featureName = featureNames[i]; Text f = new Text(featureName + '#' + s); result.add(f); } return result; }
private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException { ExprNodeDesc tmpExprNode; RexNode tmpRN; List<RexNode> childRexNodeLst = new LinkedList<RexNode>(); Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder(); // TODO: 1) Expand to other functions as needed 2) What about types other than primitive. TypeInfo tgtDT = null; GenericUDF tgtUdf = func.getGenericUDF(); boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; if (isNumeric) { tgtDT = func.getTypeInfo(); assert func.getChildren().size() == 2; // TODO: checking 2 children is useless, compare already does that. } else if (isCompare && (func.getChildren().size() == 2)) { tgtDT = FunctionRegistry.getCommonClassForComparison( func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } for (ExprNodeDesc childExpr : func.getChildren()) { tmpExprNode = childExpr; if (tgtDT != null && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) { if (isCompare) { // For compare, we will convert requisite children tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT); } else if (isNumeric) { // For numeric, we'll do minimum necessary cast - if we cast to the type // of expression, bad things will happen. PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT); tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType); } else { throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare"); } } argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory())); tmpRN = convert(tmpExprNode); childRexNodeLst.add(tmpRN); } // See if this is an explicit cast. RexNode expr = null; RelDataType retType = null; expr = handleExplicitCast(func, childRexNodeLst); if (expr == null) { // This is not a cast; process the function. retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()); SqlOperator calciteOp = SqlFunctionConverter.getCalciteOperator( func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType); expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst); } else { retType = expr.getType(); } // TODO: Cast Function in Calcite have a bug where it infer type on cast throws // an exception if (flattenExpr && (expr instanceof RexCall) && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) { RexCall call = (RexCall) expr; expr = cluster .getRexBuilder() .makeCall( retType, call.getOperator(), RexUtil.flatten(call.getOperands(), call.getOperator())); } return expr; }
public void validate() throws SemanticException { if ((this.getCols() == null) || (this.getCols().size() == 0)) { // for now make sure that serde exists if (StringUtils.isEmpty(this.getSerName()) || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) { throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg()); } return; } if (this.getStorageHandler() == null) { try { Class<?> origin = Class.forName(this.getOutputFormat(), true, JavaUtils.getClassLoader()); Class<? extends HiveOutputFormat> replaced = HiveFileFormatUtils.getOutputFormatSubstitute(origin); if (replaced == null) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } } catch (ClassNotFoundException e) { throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg()); } } List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols()); if (this.getBucketCols() != null) { // all columns in cluster and sort are valid columns Iterator<String> bucketCols = this.getBucketCols().iterator(); while (bucketCols.hasNext()) { String bucketCol = bucketCols.next(); boolean found = false; Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = colNamesIter.next(); if (bucketCol.equalsIgnoreCase(colName)) { found = true; break; } } if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); } } } if (this.getSortCols() != null) { // all columns in cluster and sort are valid columns Iterator<Order> sortCols = this.getSortCols().iterator(); while (sortCols.hasNext()) { String sortCol = sortCols.next().getCol(); boolean found = false; Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = colNamesIter.next(); if (sortCol.equalsIgnoreCase(colName)) { found = true; break; } } if (!found) { throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg()); } } } if (this.getPartCols() != null) { // there is no overlap between columns and partitioning columns Iterator<FieldSchema> partColsIter = this.getPartCols().iterator(); while (partColsIter.hasNext()) { FieldSchema fs = partColsIter.next(); String partCol = fs.getName(); PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(fs.getType()); if (null == pte) { throw new SemanticException( ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found " + partCol + " of type: " + fs.getType()); } Iterator<String> colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next()); if (partCol.equalsIgnoreCase(colName)) { throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg()); } } } } /* Validate skewed information. */ ValidationUtility.validateSkewedInformation( colNames, this.getSkewedColNames(), this.getSkewedColValues()); }