@Override
  public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {

    String uriPath =
        PrimitiveObjectInspectorUtils.getString(
            arguments[0].get(), (PrimitiveObjectInspector) argumentsOI[0]);
    String uriQuery =
        PrimitiveObjectInspectorUtils.getString(
            arguments[1].get(), (PrimitiveObjectInspector) argumentsOI[1]);

    String contentType =
        PrimitiveObjectInspectorUtils.getString(
            arguments[2].get(), (PrimitiveObjectInspector) argumentsOI[2]);
    String userAgent =
        PrimitiveObjectInspectorUtils.getString(
            arguments[3].get(), (PrimitiveObjectInspector) argumentsOI[3]);

    String rawXAnalyticsHeader = "";

    if (checkForXAnalytics) {
      rawXAnalyticsHeader =
          PrimitiveObjectInspectorUtils.getString(
              arguments[4].get(), (PrimitiveObjectInspector) argumentsOI[4]);
    }
    return PageviewDefinition.getInstance()
        .isAppPageview(uriPath, uriQuery, contentType, userAgent, rawXAnalyticsHeader);
  }
Esempio n. 2
0
 @Override
 public void merge(AggregationBuffer agg, Object partial) throws HiveException {
   if (partial != null) {
     SumLongAgg myagg = (SumLongAgg) agg;
     myagg.empty = false;
     if (isWindowingDistinct()) {
       throw new HiveException(
           "Distinct windowing UDAF doesn't support merge and terminatePartial");
     } else {
       myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI);
     }
   }
 }
Esempio n. 3
0
 @Override
 public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
   assert (parameters.length == 1);
   try {
     if (isEligibleValue((SumLongAgg) agg, parameters[0])) {
       ((SumLongAgg) agg).empty = false;
       ((SumLongAgg) agg).sum += PrimitiveObjectInspectorUtils.getLong(parameters[0], inputOI);
     }
   } catch (NumberFormatException e) {
     if (!warned) {
       warned = true;
       LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
     }
   }
 }
  @Override
  public List<Text> evaluate(@Nonnull final DeferredObject[] arguments) throws HiveException {
    result.clear();

    final int size = arguments.length - 1;
    for (int i = 0; i < size; i++) {
      Object argument = arguments[i + 1].get();
      if (argument == null) {
        continue;
      }

      PrimitiveObjectInspector oi = inputOIs[i];
      String s = PrimitiveObjectInspectorUtils.getString(argument, oi);
      if (s.isEmpty()) {
        continue;
      }

      // categorical feature representation
      String featureName = featureNames[i];
      Text f = new Text(featureName + '#' + s);
      result.add(f);
    }
    return result;
  }
Esempio n. 5
0
  private RexNode convert(final ExprNodeGenericFuncDesc func) throws SemanticException {
    ExprNodeDesc tmpExprNode;
    RexNode tmpRN;

    List<RexNode> childRexNodeLst = new LinkedList<RexNode>();
    Builder<RelDataType> argTypeBldr = ImmutableList.<RelDataType>builder();

    // TODO: 1) Expand to other functions as needed 2) What about types other than primitive.
    TypeInfo tgtDT = null;
    GenericUDF tgtUdf = func.getGenericUDF();

    boolean isNumeric =
        (tgtUdf instanceof GenericUDFBaseBinary
            && func.getTypeInfo().getCategory() == Category.PRIMITIVE
            && (PrimitiveGrouping.NUMERIC_GROUP
                == PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
                    ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory())));
    boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare;

    if (isNumeric) {
      tgtDT = func.getTypeInfo();

      assert func.getChildren().size() == 2;
      // TODO: checking 2 children is useless, compare already does that.
    } else if (isCompare && (func.getChildren().size() == 2)) {
      tgtDT =
          FunctionRegistry.getCommonClassForComparison(
              func.getChildren().get(0).getTypeInfo(), func.getChildren().get(1).getTypeInfo());
    }

    for (ExprNodeDesc childExpr : func.getChildren()) {
      tmpExprNode = childExpr;
      if (tgtDT != null
          && TypeInfoUtils.isConversionRequiredForComparison(tgtDT, childExpr.getTypeInfo())) {
        if (isCompare) {
          // For compare, we will convert requisite children
          tmpExprNode = ParseUtils.createConversionCast(childExpr, (PrimitiveTypeInfo) tgtDT);
        } else if (isNumeric) {
          // For numeric, we'll do minimum necessary cast - if we cast to the type
          // of expression, bad things will happen.
          PrimitiveTypeInfo minArgType = ExprNodeDescUtils.deriveMinArgumentCast(childExpr, tgtDT);
          tmpExprNode = ParseUtils.createConversionCast(childExpr, minArgType);
        } else {
          throw new AssertionError("Unexpected " + tgtDT + " - not a numeric op or compare");
        }
      }
      argTypeBldr.add(TypeConverter.convert(tmpExprNode.getTypeInfo(), cluster.getTypeFactory()));
      tmpRN = convert(tmpExprNode);
      childRexNodeLst.add(tmpRN);
    }

    // See if this is an explicit cast.
    RexNode expr = null;
    RelDataType retType = null;
    expr = handleExplicitCast(func, childRexNodeLst);

    if (expr == null) {
      // This is not a cast; process the function.
      retType = TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory());
      SqlOperator calciteOp =
          SqlFunctionConverter.getCalciteOperator(
              func.getFuncText(), func.getGenericUDF(), argTypeBldr.build(), retType);
      expr = cluster.getRexBuilder().makeCall(calciteOp, childRexNodeLst);
    } else {
      retType = expr.getType();
    }

    // TODO: Cast Function in Calcite have a bug where it infer type on cast throws
    // an exception
    if (flattenExpr
        && (expr instanceof RexCall)
        && !(((RexCall) expr).getOperator() instanceof SqlCastFunction)) {
      RexCall call = (RexCall) expr;
      expr =
          cluster
              .getRexBuilder()
              .makeCall(
                  retType,
                  call.getOperator(),
                  RexUtil.flatten(call.getOperands(), call.getOperator()));
    }

    return expr;
  }
Esempio n. 6
0
  public void validate() throws SemanticException {

    if ((this.getCols() == null) || (this.getCols().size() == 0)) {
      // for now make sure that serde exists
      if (StringUtils.isEmpty(this.getSerName())
          || !SerDeUtils.shouldGetColsFromSerDe(this.getSerName())) {
        throw new SemanticException(ErrorMsg.INVALID_TBL_DDL_SERDE.getMsg());
      }
      return;
    }

    if (this.getStorageHandler() == null) {
      try {
        Class<?> origin = Class.forName(this.getOutputFormat(), true, JavaUtils.getClassLoader());
        Class<? extends HiveOutputFormat> replaced =
            HiveFileFormatUtils.getOutputFormatSubstitute(origin);
        if (replaced == null) {
          throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
        }
      } catch (ClassNotFoundException e) {
        throw new SemanticException(ErrorMsg.INVALID_OUTPUT_FORMAT_TYPE.getMsg());
      }
    }

    List<String> colNames = ParseUtils.validateColumnNameUniqueness(this.getCols());

    if (this.getBucketCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<String> bucketCols = this.getBucketCols().iterator();
      while (bucketCols.hasNext()) {
        String bucketCol = bucketCols.next();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (bucketCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getSortCols() != null) {
      // all columns in cluster and sort are valid columns
      Iterator<Order> sortCols = this.getSortCols().iterator();
      while (sortCols.hasNext()) {
        String sortCol = sortCols.next().getCol();
        boolean found = false;
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = colNamesIter.next();
          if (sortCol.equalsIgnoreCase(colName)) {
            found = true;
            break;
          }
        }
        if (!found) {
          throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg());
        }
      }
    }

    if (this.getPartCols() != null) {
      // there is no overlap between columns and partitioning columns
      Iterator<FieldSchema> partColsIter = this.getPartCols().iterator();
      while (partColsIter.hasNext()) {
        FieldSchema fs = partColsIter.next();
        String partCol = fs.getName();
        PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte =
            PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(fs.getType());
        if (null == pte) {
          throw new SemanticException(
              ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg()
                  + " Found "
                  + partCol
                  + " of type: "
                  + fs.getType());
        }
        Iterator<String> colNamesIter = colNames.iterator();
        while (colNamesIter.hasNext()) {
          String colName = BaseSemanticAnalyzer.unescapeIdentifier(colNamesIter.next());
          if (partCol.equalsIgnoreCase(colName)) {
            throw new SemanticException(ErrorMsg.COLUMN_REPEATED_IN_PARTITIONING_COLS.getMsg());
          }
        }
      }
    }

    /* Validate skewed information. */
    ValidationUtility.validateSkewedInformation(
        colNames, this.getSkewedColNames(), this.getSkewedColValues());
  }