/** * Takes the actual arguments and returns the result. Gets passed the input, does whatever it * wants to it, and then returns the output. * * <p>The input is accessed using the ObjectInspectors that were saved into global variables in * the call to initialize() * * <p>This method is called once for every row of data being processed. UDFs are called during the * map phase of the MapReduce job. This means that we have no control over the order in which the * records get sent to the UDF. * * @param arguments * @return * @throws HiveException */ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert arguments != null : "Method 'evaluate' of HostNormalizerUDF " + "called with null arguments array"; assert arguments.length == 1 : "Method 'evaluate' of " + "HostNormalizerUDF called arguments of length " + arguments.length + " (instead of 1)"; // arguments is an array with exactly 1 entry. assert result != null : "Result object has not yet been initialized, " + "but evaluate called"; // result object has been initialized. So it's an array of objects of // the right length. String uriHost = argumentOI.getPrimitiveJavaObject(arguments[0].get()); NormalizedHostInfo normHost = webrequest.normalizeHost(uriHost); if (normHost == null) { result[IDX_PROJECT_CLASS] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; result[IDX_PROJECT] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; result[IDX_QUALIFIERS] = new ArrayList<String>(); result[IDX_TLD] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; } else { result[IDX_PROJECT_CLASS] = normHost.getProjectClass(); result[IDX_PROJECT] = normHost.getProject(); result[IDX_QUALIFIERS] = normHost.getQualifiers(); result[IDX_TLD] = normHost.getTld(); } return result; }
/** * The initialize method is called only once during the lifetime of the UDF. * * <p>Method checks for the validity (number, type, etc) of the arguments being passed to the UDF. * It also sets the return type of the result of the UDF, in this case the ObjectInspector * equivalent of Map<String,Object> * * @param arguments * @return ObjectInspector Map<String,Object> * @throws UDFArgumentException */ @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentLengthException( "The HostNormalizerUDF takes an array with only 1 element as argument"); } // we are expecting the parameter to be of String type. ObjectInspector arg = arguments[0]; int argIndex = 0; if (arg.getCategory() != Category.PRIMITIVE) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Now that we have made sure that the argument is of primitive type, we can get the primitive // category PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) arg).getPrimitiveCategory(); if (primitiveCategory != PrimitiveCategory.STRING) { throw new UDFArgumentTypeException( argIndex, "A string argument was expected but an argument of type " + arg.getTypeName() + " was given."); } // Instantiate the Webrequest webrequest = Webrequest.getInstance(); argumentOI = (StringObjectInspector) arg; List<String> fieldNames = new LinkedList<>(); List<ObjectInspector> fieldOIs = new LinkedList<>(); int idx = 0; fieldNames.add("project_class"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT_CLASS = idx++; fieldNames.add("project"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_PROJECT = idx++; fieldNames.add("qualifiers"); fieldOIs.add( ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector)); IDX_QUALIFIERS = idx++; fieldNames.add("tld"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); IDX_TLD = idx++; result = new Object[idx]; return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); }