/** * Takes the actual arguments and returns the result. Gets passed the input, does whatever it * wants to it, and then returns the output. * * <p>The input is accessed using the ObjectInspectors that were saved into global variables in * the call to initialize() * * <p>This method is called once for every row of data being processed. UDFs are called during the * map phase of the MapReduce job. This means that we have no control over the order in which the * records get sent to the UDF. * * @param arguments * @return * @throws HiveException */ @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert arguments != null : "Method 'evaluate' of HostNormalizerUDF " + "called with null arguments array"; assert arguments.length == 1 : "Method 'evaluate' of " + "HostNormalizerUDF called arguments of length " + arguments.length + " (instead of 1)"; // arguments is an array with exactly 1 entry. assert result != null : "Result object has not yet been initialized, " + "but evaluate called"; // result object has been initialized. So it's an array of objects of // the right length. String uriHost = argumentOI.getPrimitiveJavaObject(arguments[0].get()); NormalizedHostInfo normHost = webrequest.normalizeHost(uriHost); if (normHost == null) { result[IDX_PROJECT_CLASS] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; result[IDX_PROJECT] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; result[IDX_QUALIFIERS] = new ArrayList<String>(); result[IDX_TLD] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE; } else { result[IDX_PROJECT_CLASS] = normHost.getProjectClass(); result[IDX_PROJECT] = normHost.getProject(); result[IDX_QUALIFIERS] = normHost.getQualifiers(); result[IDX_TLD] = normHost.getTld(); } return result; }
@Override public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector; final List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs(); if (outputFieldRefs.size() != numCols) { throw new SerDeException( "Cannot serialize the object because there are " + outputFieldRefs.size() + " fields but the table has " + numCols + " columns."); } // Get all data out. for (int c = 0; c < numCols; c++) { final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c)); final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector(); // The data must be of type String final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI; // Convert the field to Java class String, because objects of String // type // can be stored in String, Text, or some other classes. outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field); } final StringWriter writer = new StringWriter(); final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar); try { csv.writeNext(outputFields); csv.close(); return new Text(writer.toString()); } catch (final IOException ioe) { throw new SerDeException(ioe); } }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { // get the list and string from the deferred objects using the object inspectors List<String> list = (List<String>) this.listOI.getList(arguments[0].get()); String arg = elementOI.getPrimitiveJavaObject(arguments[1].get()); // check for nulls if (list == null || arg == null) { return null; } // see if our list contains the value we need for (String s : list) { if (arg.equals(s)) return new Boolean(true); } return new Boolean(false); }
/** date-time by string. */ @Test public void datetime_by_string_w_zeros() { ValueSerde serde = StringValueSerdeFactory.DATETIME; StringObjectInspector inspector = (StringObjectInspector) serde.getInspector(); DateTimeOption option = new DateTimeOption(new DateTime(1, 1, 1, 0, 0, 0)); String value = "0001-01-01 00:00:00"; assertThat(inspector.copyObject(option), is((Object) option)); assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option)))); assertThat(inspector.copyObject(null), is(nullValue())); assertThat(inspector.getPrimitiveJavaObject(option), is(value)); assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue())); assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value))); assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue())); ValueDriver driver = serde.getDriver(inspector); DateTimeOption copy = new DateTimeOption(); driver.set(copy, option); assertThat(copy, is(option)); driver.set(copy, null); assertThat(copy.isNull(), is(true)); }
/** date by string. */ @Test public void date_by_string() { ValueSerde serde = StringValueSerdeFactory.DATE; StringObjectInspector inspector = (StringObjectInspector) serde.getInspector(); DateOption option = new DateOption(new Date(2014, 7, 1)); String value = "2014-07-01"; assertThat(inspector.copyObject(option), is((Object) option)); assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option)))); assertThat(inspector.copyObject(null), is(nullValue())); assertThat(inspector.getPrimitiveJavaObject(option), is(value)); assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue())); assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value))); assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue())); ValueDriver driver = serde.getDriver(inspector); DateOption copy = new DateOption(); driver.set(copy, option); assertThat(copy, is(option)); driver.set(copy, null); assertThat(copy.isNull(), is(true)); }
/** decimal by string. */ @Test public void decimal_by_string() { ValueSerde serde = StringValueSerdeFactory.DECIMAL; StringObjectInspector inspector = (StringObjectInspector) serde.getInspector(); DecimalOption option = new DecimalOption(new BigDecimal("123.45")); String value = "123.45"; assertThat(inspector.copyObject(option), is((Object) option)); assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option)))); assertThat(inspector.copyObject(null), is(nullValue())); assertThat(inspector.getPrimitiveJavaObject(option), is(value)); assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue())); assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value))); assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue())); ValueDriver driver = serde.getDriver(inspector); DecimalOption copy = new DecimalOption(); driver.set(copy, option); assertThat(copy, is(option)); driver.set(copy, null); assertThat(copy.isNull(), is(true)); }
static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); return; } // This field is not a null. buffer.write((byte) 1, invert); switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; switch (poi.getPrimitiveCategory()) { case VOID: { return; } case BOOLEAN: { boolean v = ((BooleanObjectInspector) poi).get(o); buffer.write((byte) (v ? 2 : 1), invert); return; } case BYTE: { ByteObjectInspector boi = (ByteObjectInspector) poi; byte v = boi.get(o); buffer.write((byte) (v ^ 0x80), invert); return; } case SHORT: { ShortObjectInspector spoi = (ShortObjectInspector) poi; short v = spoi.get(o); buffer.write((byte) ((v >> 8) ^ 0x80), invert); buffer.write((byte) v, invert); return; } case INT: { IntObjectInspector ioi = (IntObjectInspector) poi; int v = ioi.get(o); buffer.write((byte) ((v >> 24) ^ 0x80), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case LONG: { LongObjectInspector loi = (LongObjectInspector) poi; long v = loi.get(o); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case FLOAT: { FloatObjectInspector foi = (FloatObjectInspector) poi; int v = Float.floatToIntBits(foi.get(o)); if ((v & (1 << 31)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case DOUBLE: { DoubleObjectInspector doi = (DoubleObjectInspector) poi; long v = Double.doubleToLongBits(doi.get(o)); if ((v & (1L << 63)) != 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } buffer.write((byte) (v >> 56), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case STRING: { StringObjectInspector soi = (StringObjectInspector) poi; Text t = soi.getPrimitiveWritableObject(o); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); byte[] toSer = new byte[ba.getLength()]; System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength()); serializeBytes(buffer, toSer, ba.getLength(), invert); return; } case DATE: { DateObjectInspector doi = (DateObjectInspector) poi; long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds(); buffer.write((byte) ((v >> 56) ^ 0x80), invert); buffer.write((byte) (v >> 48), invert); buffer.write((byte) (v >> 40), invert); buffer.write((byte) (v >> 32), invert); buffer.write((byte) (v >> 24), invert); buffer.write((byte) (v >> 16), invert); buffer.write((byte) (v >> 8), invert); buffer.write((byte) v, invert); return; } case TIMESTAMP: { TimestampObjectInspector toi = (TimestampObjectInspector) poi; TimestampWritable t = toi.getPrimitiveWritableObject(o); byte[] data = t.getBinarySortable(); for (int i = 0; i < data.length; i++) { buffer.write(data[i], invert); } return; } default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); } } } case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; ObjectInspector eoi = loi.getListElementObjectInspector(); // \1 followed by each element int size = loi.getListLength(o); for (int eid = 0; eid < size; eid++) { buffer.write((byte) 1, invert); serialize(buffer, loi.getListElement(o, eid), eoi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); // \1 followed by each key and then each value Map<?, ?> map = moi.getMap(o); for (Map.Entry<?, ?> entry : map.entrySet()) { buffer.write((byte) 1, invert); serialize(buffer, entry.getKey(), koi, invert); serialize(buffer, entry.getValue(), voi, invert); } // and \0 to terminate buffer.write((byte) 0, invert); return; } case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List<? extends StructField> fields = soi.getAllStructFieldRefs(); for (int i = 0; i < fields.size(); i++) { serialize( buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(), invert); } return; } case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; byte tag = uoi.getTag(o); buffer.write(tag, invert); serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert); return; } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } } }