/**
   * Takes the actual arguments and returns the result. Gets passed the input, does whatever it
   * wants to it, and then returns the output.
   *
   * <p>The input is accessed using the ObjectInspectors that were saved into global variables in
   * the call to initialize()
   *
   * <p>This method is called once for every row of data being processed. UDFs are called during the
   * map phase of the MapReduce job. This means that we have no control over the order in which the
   * records get sent to the UDF.
   *
   * @param arguments
   * @return
   * @throws HiveException
   */
  @Override
  public Object evaluate(DeferredObject[] arguments) throws HiveException {
    assert arguments != null
        : "Method 'evaluate' of HostNormalizerUDF " + "called with null arguments array";
    assert arguments.length == 1
        : "Method 'evaluate' of "
            + "HostNormalizerUDF called arguments of length "
            + arguments.length
            + " (instead of 1)";
    // arguments is an array with exactly 1 entry.

    assert result != null : "Result object has not yet been initialized, " + "but evaluate called";
    // result object has been initialized. So it's an array of objects of
    // the right length.

    String uriHost = argumentOI.getPrimitiveJavaObject(arguments[0].get());

    NormalizedHostInfo normHost = webrequest.normalizeHost(uriHost);

    if (normHost == null) {
      result[IDX_PROJECT_CLASS] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
      result[IDX_PROJECT] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
      result[IDX_QUALIFIERS] = new ArrayList<String>();
      result[IDX_TLD] = NormalizedHostInfo.EMPTY_NORM_HOST_VALUE;
    } else {
      result[IDX_PROJECT_CLASS] = normHost.getProjectClass();
      result[IDX_PROJECT] = normHost.getProject();
      result[IDX_QUALIFIERS] = normHost.getQualifiers();
      result[IDX_TLD] = normHost.getTld();
    }

    return result;
  }
  @Override
  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
    final StructObjectInspector outputRowOI = (StructObjectInspector) objInspector;
    final List<? extends StructField> outputFieldRefs = outputRowOI.getAllStructFieldRefs();

    if (outputFieldRefs.size() != numCols) {
      throw new SerDeException(
          "Cannot serialize the object because there are "
              + outputFieldRefs.size()
              + " fields but the table has "
              + numCols
              + " columns.");
    }

    // Get all data out.
    for (int c = 0; c < numCols; c++) {
      final Object field = outputRowOI.getStructFieldData(obj, outputFieldRefs.get(c));
      final ObjectInspector fieldOI = outputFieldRefs.get(c).getFieldObjectInspector();

      // The data must be of type String
      final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;

      // Convert the field to Java class String, because objects of String
      // type
      // can be stored in String, Text, or some other classes.
      outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
    }

    final StringWriter writer = new StringWriter();
    final CSVWriter csv = newWriter(writer, separatorChar, quoteChar, escapeChar);

    try {
      csv.writeNext(outputFields);
      csv.close();

      return new Text(writer.toString());
    } catch (final IOException ioe) {
      throw new SerDeException(ioe);
    }
  }
示例#3
0
  @Override
  public Object evaluate(DeferredObject[] arguments) throws HiveException {

    // get the list and string from the deferred objects using the object inspectors
    List<String> list = (List<String>) this.listOI.getList(arguments[0].get());
    String arg = elementOI.getPrimitiveJavaObject(arguments[1].get());

    // check for nulls
    if (list == null || arg == null) {
      return null;
    }

    // see if our list contains the value we need
    for (String s : list) {
      if (arg.equals(s)) return new Boolean(true);
    }
    return new Boolean(false);
  }
  /** date-time by string. */
  @Test
  public void datetime_by_string_w_zeros() {
    ValueSerde serde = StringValueSerdeFactory.DATETIME;
    StringObjectInspector inspector = (StringObjectInspector) serde.getInspector();

    DateTimeOption option = new DateTimeOption(new DateTime(1, 1, 1, 0, 0, 0));
    String value = "0001-01-01 00:00:00";

    assertThat(inspector.copyObject(option), is((Object) option));
    assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option))));
    assertThat(inspector.copyObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveJavaObject(option), is(value));
    assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value)));
    assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue()));

    ValueDriver driver = serde.getDriver(inspector);
    DateTimeOption copy = new DateTimeOption();

    driver.set(copy, option);
    assertThat(copy, is(option));
    driver.set(copy, null);
    assertThat(copy.isNull(), is(true));
  }
  /** date by string. */
  @Test
  public void date_by_string() {
    ValueSerde serde = StringValueSerdeFactory.DATE;
    StringObjectInspector inspector = (StringObjectInspector) serde.getInspector();

    DateOption option = new DateOption(new Date(2014, 7, 1));
    String value = "2014-07-01";

    assertThat(inspector.copyObject(option), is((Object) option));
    assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option))));
    assertThat(inspector.copyObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveJavaObject(option), is(value));
    assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value)));
    assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue()));

    ValueDriver driver = serde.getDriver(inspector);
    DateOption copy = new DateOption();

    driver.set(copy, option);
    assertThat(copy, is(option));
    driver.set(copy, null);
    assertThat(copy.isNull(), is(true));
  }
  /** decimal by string. */
  @Test
  public void decimal_by_string() {
    ValueSerde serde = StringValueSerdeFactory.DECIMAL;
    StringObjectInspector inspector = (StringObjectInspector) serde.getInspector();

    DecimalOption option = new DecimalOption(new BigDecimal("123.45"));
    String value = "123.45";

    assertThat(inspector.copyObject(option), is((Object) option));
    assertThat(inspector.copyObject(option), is(not(sameInstance((Object) option))));
    assertThat(inspector.copyObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveJavaObject(option), is(value));
    assertThat(inspector.getPrimitiveJavaObject(null), is(nullValue()));
    assertThat(inspector.getPrimitiveWritableObject(option), is(new Text(value)));
    assertThat(inspector.getPrimitiveWritableObject(null), is(nullValue()));

    ValueDriver driver = serde.getDriver(inspector);
    DecimalOption copy = new DecimalOption();

    driver.set(copy, option);
    assertThat(copy, is(option));
    driver.set(copy, null);
    assertThat(copy.isNull(), is(true));
  }
  static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert) {
    // Is this field a null?
    if (o == null) {
      buffer.write((byte) 0, invert);
      return;
    }
    // This field is not a null.
    buffer.write((byte) 1, invert);

    switch (oi.getCategory()) {
      case PRIMITIVE:
        {
          PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
          switch (poi.getPrimitiveCategory()) {
            case VOID:
              {
                return;
              }
            case BOOLEAN:
              {
                boolean v = ((BooleanObjectInspector) poi).get(o);
                buffer.write((byte) (v ? 2 : 1), invert);
                return;
              }
            case BYTE:
              {
                ByteObjectInspector boi = (ByteObjectInspector) poi;
                byte v = boi.get(o);
                buffer.write((byte) (v ^ 0x80), invert);
                return;
              }
            case SHORT:
              {
                ShortObjectInspector spoi = (ShortObjectInspector) poi;
                short v = spoi.get(o);
                buffer.write((byte) ((v >> 8) ^ 0x80), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case INT:
              {
                IntObjectInspector ioi = (IntObjectInspector) poi;
                int v = ioi.get(o);
                buffer.write((byte) ((v >> 24) ^ 0x80), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case LONG:
              {
                LongObjectInspector loi = (LongObjectInspector) poi;
                long v = loi.get(o);
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case FLOAT:
              {
                FloatObjectInspector foi = (FloatObjectInspector) poi;
                int v = Float.floatToIntBits(foi.get(o));
                if ((v & (1 << 31)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1 << 31);
                }
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case DOUBLE:
              {
                DoubleObjectInspector doi = (DoubleObjectInspector) poi;
                long v = Double.doubleToLongBits(doi.get(o));
                if ((v & (1L << 63)) != 0) {
                  // negative number, flip all bits
                  v = ~v;
                } else {
                  // positive number, flip the first bit
                  v = v ^ (1L << 63);
                }
                buffer.write((byte) (v >> 56), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case STRING:
              {
                StringObjectInspector soi = (StringObjectInspector) poi;
                Text t = soi.getPrimitiveWritableObject(o);
                serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
                return;
              }

            case BINARY:
              {
                BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
                BytesWritable ba = baoi.getPrimitiveWritableObject(o);
                byte[] toSer = new byte[ba.getLength()];
                System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
                serializeBytes(buffer, toSer, ba.getLength(), invert);
                return;
              }
            case DATE:
              {
                DateObjectInspector doi = (DateObjectInspector) poi;
                long v = doi.getPrimitiveWritableObject(o).getTimeInSeconds();
                buffer.write((byte) ((v >> 56) ^ 0x80), invert);
                buffer.write((byte) (v >> 48), invert);
                buffer.write((byte) (v >> 40), invert);
                buffer.write((byte) (v >> 32), invert);
                buffer.write((byte) (v >> 24), invert);
                buffer.write((byte) (v >> 16), invert);
                buffer.write((byte) (v >> 8), invert);
                buffer.write((byte) v, invert);
                return;
              }
            case TIMESTAMP:
              {
                TimestampObjectInspector toi = (TimestampObjectInspector) poi;
                TimestampWritable t = toi.getPrimitiveWritableObject(o);
                byte[] data = t.getBinarySortable();
                for (int i = 0; i < data.length; i++) {
                  buffer.write(data[i], invert);
                }
                return;
              }
            default:
              {
                throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
              }
          }
        }
      case LIST:
        {
          ListObjectInspector loi = (ListObjectInspector) oi;
          ObjectInspector eoi = loi.getListElementObjectInspector();

          // \1 followed by each element
          int size = loi.getListLength(o);
          for (int eid = 0; eid < size; eid++) {
            buffer.write((byte) 1, invert);
            serialize(buffer, loi.getListElement(o, eid), eoi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case MAP:
        {
          MapObjectInspector moi = (MapObjectInspector) oi;
          ObjectInspector koi = moi.getMapKeyObjectInspector();
          ObjectInspector voi = moi.getMapValueObjectInspector();

          // \1 followed by each key and then each value
          Map<?, ?> map = moi.getMap(o);
          for (Map.Entry<?, ?> entry : map.entrySet()) {
            buffer.write((byte) 1, invert);
            serialize(buffer, entry.getKey(), koi, invert);
            serialize(buffer, entry.getValue(), voi, invert);
          }
          // and \0 to terminate
          buffer.write((byte) 0, invert);
          return;
        }
      case STRUCT:
        {
          StructObjectInspector soi = (StructObjectInspector) oi;
          List<? extends StructField> fields = soi.getAllStructFieldRefs();

          for (int i = 0; i < fields.size(); i++) {
            serialize(
                buffer,
                soi.getStructFieldData(o, fields.get(i)),
                fields.get(i).getFieldObjectInspector(),
                invert);
          }
          return;
        }
      case UNION:
        {
          UnionObjectInspector uoi = (UnionObjectInspector) oi;
          byte tag = uoi.getTag(o);
          buffer.write(tag, invert);
          serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert);
          return;
        }
      default:
        {
          throw new RuntimeException("Unrecognized type: " + oi.getCategory());
        }
    }
  }