@Setup
  public void setup() {
    Random random = new Random();
    RowExpression[] arguments = new RowExpression[1 + inListCount];
    switch (type) {
      case StandardTypes.BIGINT:
        prestoType = BIGINT;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant((long) random.nextInt(), BIGINT);
        }
        break;
      case StandardTypes.DOUBLE:
        prestoType = DOUBLE;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant(random.nextDouble(), DOUBLE);
        }
        break;
      case StandardTypes.VARCHAR:
        prestoType = VARCHAR;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant(Slices.utf8Slice(Long.toString(random.nextLong())), VARCHAR);
        }
        break;
      default:
        throw new IllegalStateException();
    }

    arguments[0] = field(0, prestoType);
    RowExpression project = field(0, prestoType);

    PageBuilder pageBuilder = new PageBuilder(ImmutableList.of(prestoType));
    for (int i = 0; i < 10_000; i++) {
      pageBuilder.declarePosition();

      switch (type) {
        case StandardTypes.BIGINT:
          BIGINT.writeLong(pageBuilder.getBlockBuilder(0), random.nextInt());
          break;
        case StandardTypes.DOUBLE:
          DOUBLE.writeDouble(pageBuilder.getBlockBuilder(0), random.nextDouble());
          break;
        case StandardTypes.VARCHAR:
          VARCHAR.writeSlice(
              pageBuilder.getBlockBuilder(0), Slices.utf8Slice(Long.toString(random.nextLong())));
          break;
      }
    }
    inputPage = pageBuilder.build();

    RowExpression filter =
        call(
            new Signature(IN, SCALAR, parseTypeSignature(StandardTypes.BOOLEAN)),
            BOOLEAN,
            arguments);

    processor =
        new ExpressionCompiler(MetadataManager.createTestMetadataManager())
            .compilePageProcessor(filter, ImmutableList.of(project))
            .get();
  }
Пример #2
0
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      Slice value;
      if (fieldValue instanceof String) {
        value = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        value = Slices.wrappedBuffer((byte[]) fieldValue);
      } else if (fieldValue instanceof HiveVarchar) {
        value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      Type type = types[column];
      if (isVarcharType(type)) {
        value = truncateToLength(value, type);
      }
      slices[column] = value;
      nulls[column] = false;
    }
  }
Пример #3
0
  @Test
  public void testUtf8Conversion() {
    String s = "apple \u2603 snowman";
    Slice slice = Slices.copiedBuffer(s, UTF_8);

    assertEquals(Slices.utf8Slice(s), slice);
    assertEquals(slice.toStringUtf8(), s);
    assertEquals(Slices.utf8Slice(s).toStringUtf8(), s);
  }
Пример #4
0
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else if (hiveTypes[column] == HiveType.MAP
        || hiveTypes[column] == HiveType.LIST
        || hiveTypes[column] == HiveType.STRUCT) {
      // temporarily special case MAP, LIST, and STRUCT types as strings
      slices[column] =
          Slices.wrappedBuffer(
              SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column]));
      nulls[column] = false;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      if (fieldValue instanceof String) {
        slices[column] = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        slices[column] = Slices.wrappedBuffer((byte[]) fieldValue);
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      nulls[column] = false;
    }
  }
 public static Slice toJson(Type rowType, ConnectorSession session, Slice row) {
   Object object = rowType.getObjectValue(session, createBlock(rowType, row), 0);
   try {
     return Slices.utf8Slice(OBJECT_MAPPER.get().writeValueAsString(object));
   } catch (JsonProcessingException e) {
     throw Throwables.propagate(e);
   }
 }
Пример #6
0
 public static Slice charPartitionKey(String value, String name, Type columnType) {
   Slice partitionKey = trimSpaces(Slices.utf8Slice(value));
   CharType charType = checkType(columnType, CharType.class, "columnType");
   if (SliceUtf8.countCodePoints(partitionKey) > charType.getLength()) {
     throw new PrestoException(
         HIVE_INVALID_PARTITION_VALUE,
         format(
             "Invalid partition value '%s' for %s partition key: %s",
             value, columnType.toString(), name));
   }
   return partitionKey;
 }
Пример #7
0
  public static Expression toExpression(Object object, Type type) {
    requireNonNull(type, "type is null");

    if (object instanceof Expression) {
      return (Expression) object;
    }

    if (object == null) {
      if (type.equals(UNKNOWN)) {
        return new NullLiteral();
      }
      return new Cast(new NullLiteral(), type.getTypeSignature().toString(), false, true);
    }

    checkArgument(
        Primitives.wrap(type.getJavaType()).isInstance(object),
        "object.getClass (%s) and type.getJavaType (%s) do not agree",
        object.getClass(),
        type.getJavaType());

    if (type.equals(BIGINT)) {
      return new LongLiteral(object.toString());
    }

    if (type.equals(DOUBLE)) {
      Double value = (Double) object;
      // WARNING: the ORC predicate code depends on NaN and infinity not appearing in a tuple
      // domain, so
      // if you remove this, you will need to update the TupleDomainOrcPredicate
      if (value.isNaN()) {
        return new FunctionCall(new QualifiedName("nan"), ImmutableList.<Expression>of());
      } else if (value.equals(Double.NEGATIVE_INFINITY)) {
        return ArithmeticUnaryExpression.negative(
            new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of()));
      } else if (value.equals(Double.POSITIVE_INFINITY)) {
        return new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of());
      } else {
        return new DoubleLiteral(object.toString());
      }
    }

    if (type instanceof VarcharType) {
      if (object instanceof String) {
        object = Slices.utf8Slice((String) object);
      }

      if (object instanceof Slice) {
        Slice value = (Slice) object;
        int length = SliceUtf8.countCodePoints(value);

        if (length == ((VarcharType) type).getLength()) {
          return new StringLiteral(value.toStringUtf8());
        }

        return new Cast(
            new StringLiteral(value.toStringUtf8()), type.getDisplayName(), false, true);
      }

      throw new IllegalArgumentException(
          "object must be instance of Slice or String when type is VARCHAR");
    }

    if (type.equals(BOOLEAN)) {
      return new BooleanLiteral(object.toString());
    }

    if (object instanceof Block) {
      SliceOutput output = new DynamicSliceOutput(((Block) object).getSizeInBytes());
      BlockSerdeUtil.writeBlock(output, (Block) object);
      object = output.slice();
      // This if condition will evaluate to true: object instanceof Slice && !type.equals(VARCHAR)
    }

    if (object instanceof Slice) {
      // HACK: we need to serialize VARBINARY in a format that can be embedded in an expression to
      // be
      // able to encode it in the plan that gets sent to workers.
      // We do this by transforming the in-memory varbinary into a call to
      // from_base64(<base64-encoded value>)
      FunctionCall fromBase64 =
          new FunctionCall(
              new QualifiedName("from_base64"),
              ImmutableList.of(
                  new StringLiteral(VarbinaryFunctions.toBase64((Slice) object).toStringUtf8())));
      Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type);
      return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(fromBase64));
    }

    Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type);
    Expression rawLiteral = toExpression(object, FunctionRegistry.typeForMagicLiteral(type));

    return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(rawLiteral));
  }
Пример #8
0
  private static void serializePrimitive(
      Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) {
    requireNonNull(builder, "parent builder is null");

    if (object == null) {
      builder.appendNull();
      return;
    }

    switch (inspector.getPrimitiveCategory()) {
      case BOOLEAN:
        BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object));
        return;
      case BYTE:
        TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object));
        return;
      case SHORT:
        SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object));
        return;
      case INT:
        IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object));
        return;
      case LONG:
        BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object));
        return;
      case FLOAT:
        DoubleType.DOUBLE.writeDouble(builder, ((FloatObjectInspector) inspector).get(object));
        return;
      case DOUBLE:
        DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object));
        return;
      case STRING:
        type.writeSlice(
            builder,
            Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object)));
        return;
      case VARCHAR:
        type.writeSlice(
            builder,
            Slices.utf8Slice(
                ((HiveVarcharObjectInspector) inspector)
                    .getPrimitiveJavaObject(object)
                    .getValue()));
        return;
      case CHAR:
        CharType charType = checkType(type, CharType.class, "type");
        HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object);
        type.writeSlice(
            builder,
            trimSpacesAndTruncateToLength(
                Slices.utf8Slice(hiveChar.getValue()), charType.getLength()));
        return;
      case DATE:
        DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
        return;
      case TIMESTAMP:
        TimestampType.TIMESTAMP.writeLong(
            builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
        return;
      case BINARY:
        VARBINARY.writeSlice(
            builder,
            Slices.wrappedBuffer(
                ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
        return;
      case DECIMAL:
        DecimalType decimalType = checkType(type, DecimalType.class, "type");
        HiveDecimalWritable hiveDecimal =
            ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object);
        if (decimalType.isShort()) {
          decimalType.writeLong(
              builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale()));
        } else {
          decimalType.writeSlice(
              builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale()));
        }
        return;
    }
    throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
  }
@Test(groups = "hive")
public abstract class AbstractTestHiveFileFormats {
  private static final double EPSILON = 0.001;
  private static final TypeManager TYPE_MANAGER = new TypeRegistry();

  private static final long DATE_MILLIS_UTC = new DateTime(2011, 5, 6, 0, 0, UTC).getMillis();
  private static final long DATE_DAYS = TimeUnit.MILLISECONDS.toDays(DATE_MILLIS_UTC);
  private static final String DATE_STRING =
      DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC().print(DATE_MILLIS_UTC);
  private static final Date SQL_DATE =
      new Date(UTC.getMillisKeepLocal(DateTimeZone.getDefault(), DATE_MILLIS_UTC));

  public static final long TIMESTAMP = new DateTime(2011, 5, 6, 7, 8, 9, 123).getMillis();
  public static final String TIMESTAMP_STRING =
      DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").print(TIMESTAMP);

  // TODO: support null values and determine if timestamp and binary are allowed as partition keys
  public static final int NUM_ROWS = 1000;
  public static final List<TestColumn> TEST_COLUMNS =
      ImmutableList.<TestColumn>builder()
          .add(
              new TestColumn(
                  "p_empty_string", javaStringObjectInspector, "", Slices.EMPTY_SLICE, true))
          .add(
              new TestColumn(
                  "p_string", javaStringObjectInspector, "test", Slices.utf8Slice("test"), true))
          .add(new TestColumn("p_tinyint", javaByteObjectInspector, "1", 1L, true))
          .add(new TestColumn("p_smallint", javaShortObjectInspector, "2", 2L, true))
          .add(new TestColumn("p_int", javaIntObjectInspector, "3", 3L, true))
          .add(new TestColumn("p_bigint", javaLongObjectInspector, "4", 4L, true))
          .add(new TestColumn("p_float", javaFloatObjectInspector, "5.1", 5.1, true))
          .add(new TestColumn("p_double", javaDoubleObjectInspector, "6.2", 6.2, true))
          .add(new TestColumn("p_boolean", javaBooleanObjectInspector, "true", true, true))
          .add(new TestColumn("p_date", javaDateObjectInspector, DATE_STRING, DATE_DAYS, true))
          .add(
              new TestColumn(
                  "p_timestamp", javaTimestampObjectInspector, TIMESTAMP_STRING, TIMESTAMP, true))
          //            .add(new TestColumn("p_binary", javaByteArrayObjectInspector, "test2",
          // Slices.utf8Slice("test2"), true))
          .add(
              new TestColumn(
                  "p_null_string",
                  javaStringObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_tinyint",
                  javaByteObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_smallint",
                  javaShortObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_int", javaIntObjectInspector, HIVE_DEFAULT_DYNAMIC_PARTITION, null, true))
          .add(
              new TestColumn(
                  "p_null_bigint",
                  javaLongObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_float",
                  javaFloatObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_double",
                  javaDoubleObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_boolean",
                  javaBooleanObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_date",
                  javaDateObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          .add(
              new TestColumn(
                  "p_null_timestamp",
                  javaTimestampObjectInspector,
                  HIVE_DEFAULT_DYNAMIC_PARTITION,
                  null,
                  true))
          //            .add(new TestColumn("p_null_binary", javaByteArrayObjectInspector,
          // HIVE_DEFAULT_DYNAMIC_PARTITION, null, true))
          .add(new TestColumn("t_null_string", javaStringObjectInspector, null, null))
          .add(
              new TestColumn(
                  "t_null_array_int",
                  getStandardListObjectInspector(javaIntObjectInspector),
                  null,
                  null))
          .add(new TestColumn("t_empty_string", javaStringObjectInspector, "", Slices.EMPTY_SLICE))
          .add(
              new TestColumn(
                  "t_string", javaStringObjectInspector, "test", Slices.utf8Slice("test")))
          .add(new TestColumn("t_tinyint", javaByteObjectInspector, (byte) 1, 1L))
          .add(new TestColumn("t_smallint", javaShortObjectInspector, (short) 2, 2L))
          .add(new TestColumn("t_int", javaIntObjectInspector, 3, 3L))
          .add(new TestColumn("t_bigint", javaLongObjectInspector, 4L, 4L))
          .add(new TestColumn("t_float", javaFloatObjectInspector, 5.1f, 5.1))
          .add(new TestColumn("t_double", javaDoubleObjectInspector, 6.2, 6.2))
          .add(new TestColumn("t_boolean_true", javaBooleanObjectInspector, true, true))
          .add(new TestColumn("t_boolean_false", javaBooleanObjectInspector, false, false))
          .add(new TestColumn("t_date", javaDateObjectInspector, SQL_DATE, DATE_DAYS))
          .add(
              new TestColumn(
                  "t_timestamp", javaTimestampObjectInspector, new Timestamp(TIMESTAMP), TIMESTAMP))
          .add(
              new TestColumn(
                  "t_binary",
                  javaByteArrayObjectInspector,
                  Slices.utf8Slice("test2"),
                  Slices.utf8Slice("test2")))
          .add(
              new TestColumn(
                  "t_map_string",
                  getStandardMapObjectInspector(
                      javaStringObjectInspector, javaStringObjectInspector),
                  ImmutableMap.of("test", "test"),
                  mapBlockOf(VARCHAR, VARCHAR, "test", "test")))
          .add(
              new TestColumn(
                  "t_map_tinyint",
                  getStandardMapObjectInspector(javaByteObjectInspector, javaByteObjectInspector),
                  ImmutableMap.of((byte) 1, (byte) 1),
                  mapBlockOf(BIGINT, BIGINT, 1, 1)))
          .add(
              new TestColumn(
                  "t_map_smallint",
                  getStandardMapObjectInspector(javaShortObjectInspector, javaShortObjectInspector),
                  ImmutableMap.of((short) 2, (short) 2),
                  mapBlockOf(BIGINT, BIGINT, 2, 2)))
          .add(
              new TestColumn(
                  "t_map_null_key",
                  getStandardMapObjectInspector(javaIntObjectInspector, javaIntObjectInspector),
                  asMap(null, 0, 2, 3),
                  mapBlockOf(BIGINT, BIGINT, 2, 3)))
          .add(
              new TestColumn(
                  "t_map_int",
                  getStandardMapObjectInspector(javaIntObjectInspector, javaIntObjectInspector),
                  ImmutableMap.of(3, 3),
                  mapBlockOf(BIGINT, BIGINT, 3, 3)))
          .add(
              new TestColumn(
                  "t_map_bigint",
                  getStandardMapObjectInspector(javaLongObjectInspector, javaLongObjectInspector),
                  ImmutableMap.of(4L, 4L),
                  mapBlockOf(BIGINT, BIGINT, 4L, 4L)))
          .add(
              new TestColumn(
                  "t_map_float",
                  getStandardMapObjectInspector(javaFloatObjectInspector, javaFloatObjectInspector),
                  ImmutableMap.of(5.0f, 5.0f),
                  mapBlockOf(DOUBLE, DOUBLE, 5.0f, 5.0f)))
          .add(
              new TestColumn(
                  "t_map_double",
                  getStandardMapObjectInspector(
                      javaDoubleObjectInspector, javaDoubleObjectInspector),
                  ImmutableMap.of(6.0, 6.0),
                  mapBlockOf(DOUBLE, DOUBLE, 6.0, 6.0)))
          .add(
              new TestColumn(
                  "t_map_boolean",
                  getStandardMapObjectInspector(
                      javaBooleanObjectInspector, javaBooleanObjectInspector),
                  ImmutableMap.of(true, true),
                  mapBlockOf(BOOLEAN, BOOLEAN, true, true)))
          .add(
              new TestColumn(
                  "t_map_date",
                  getStandardMapObjectInspector(javaDateObjectInspector, javaDateObjectInspector),
                  ImmutableMap.of(SQL_DATE, SQL_DATE),
                  mapBlockOf(DateType.DATE, DateType.DATE, DATE_DAYS, DATE_DAYS)))
          .add(
              new TestColumn(
                  "t_map_timestamp",
                  getStandardMapObjectInspector(
                      javaTimestampObjectInspector, javaTimestampObjectInspector),
                  ImmutableMap.of(new Timestamp(TIMESTAMP), new Timestamp(TIMESTAMP)),
                  mapBlockOf(
                      TimestampType.TIMESTAMP, TimestampType.TIMESTAMP, TIMESTAMP, TIMESTAMP)))
          .add(
              new TestColumn(
                  "t_array_empty",
                  getStandardListObjectInspector(javaStringObjectInspector),
                  ImmutableList.of(),
                  arrayBlockOf(VARCHAR)))
          .add(
              new TestColumn(
                  "t_array_string",
                  getStandardListObjectInspector(javaStringObjectInspector),
                  ImmutableList.of("test"),
                  arrayBlockOf(VARCHAR, "test")))
          .add(
              new TestColumn(
                  "t_array_tinyint",
                  getStandardListObjectInspector(javaByteObjectInspector),
                  ImmutableList.of((byte) 1),
                  arrayBlockOf(BIGINT, 1)))
          .add(
              new TestColumn(
                  "t_array_smallint",
                  getStandardListObjectInspector(javaShortObjectInspector),
                  ImmutableList.of((short) 2),
                  arrayBlockOf(BIGINT, 2)))
          .add(
              new TestColumn(
                  "t_array_int",
                  getStandardListObjectInspector(javaIntObjectInspector),
                  ImmutableList.of(3),
                  arrayBlockOf(BIGINT, 3)))
          .add(
              new TestColumn(
                  "t_array_bigint",
                  getStandardListObjectInspector(javaLongObjectInspector),
                  ImmutableList.of(4L),
                  arrayBlockOf(BIGINT, 4L)))
          .add(
              new TestColumn(
                  "t_array_float",
                  getStandardListObjectInspector(javaFloatObjectInspector),
                  ImmutableList.of(5.0f),
                  arrayBlockOf(DOUBLE, 5.0f)))
          .add(
              new TestColumn(
                  "t_array_double",
                  getStandardListObjectInspector(javaDoubleObjectInspector),
                  ImmutableList.of(6.0),
                  arrayBlockOf(DOUBLE, 6.0)))
          .add(
              new TestColumn(
                  "t_array_boolean",
                  getStandardListObjectInspector(javaBooleanObjectInspector),
                  ImmutableList.of(true),
                  arrayBlockOf(BOOLEAN, true)))
          .add(
              new TestColumn(
                  "t_array_date",
                  getStandardListObjectInspector(javaDateObjectInspector),
                  ImmutableList.of(SQL_DATE),
                  arrayBlockOf(DateType.DATE, DATE_DAYS)))
          .add(
              new TestColumn(
                  "t_array_timestamp",
                  getStandardListObjectInspector(javaTimestampObjectInspector),
                  ImmutableList.of(new Timestamp(TIMESTAMP)),
                  arrayBlockOf(TimestampType.TIMESTAMP, TIMESTAMP)))
          .add(
              new TestColumn(
                  "t_struct_bigint",
                  getStandardStructObjectInspector(
                      ImmutableList.of("s_bigint"), ImmutableList.of(javaLongObjectInspector)),
                  new Long[] {1L},
                  rowBlockOf(ImmutableList.of(BIGINT), 1)))
          .add(
              new TestColumn(
                  "t_complex",
                  getStandardMapObjectInspector(
                      javaStringObjectInspector,
                      getStandardListObjectInspector(
                          getStandardStructObjectInspector(
                              ImmutableList.of("s_int"),
                              ImmutableList.<ObjectInspector>of(javaIntObjectInspector)))),
                  ImmutableMap.of("test", ImmutableList.<Object>of(new Integer[] {1})),
                  mapBlockOf(
                      VARCHAR,
                      new ArrayType(new RowType(ImmutableList.of(BIGINT), Optional.empty())),
                      "test",
                      arrayBlockOf(
                          new RowType(ImmutableList.of(BIGINT), Optional.empty()),
                          rowBlockOf(ImmutableList.of(BIGINT), 1)))))
          .add(
              new TestColumn(
                  "t_map_null_key_complex_value",
                  getStandardMapObjectInspector(
                      javaStringObjectInspector,
                      getStandardMapObjectInspector(
                          javaLongObjectInspector, javaBooleanObjectInspector)),
                  asMap(null, ImmutableMap.of(15L, true), "k", ImmutableMap.of(16L, false)),
                  mapBlockOf(
                      VARCHAR,
                      new MapType(BIGINT, BOOLEAN),
                      "k",
                      mapBlockOf(BIGINT, BOOLEAN, 16L, false))))
          .add(
              new TestColumn(
                  "t_map_null_key_complex_key_value",
                  getStandardMapObjectInspector(
                      getStandardListObjectInspector(javaStringObjectInspector),
                      getStandardMapObjectInspector(
                          javaLongObjectInspector, javaBooleanObjectInspector)),
                  asMap(
                      null,
                      ImmutableMap.of(15L, true),
                      ImmutableList.of("k", "ka"),
                      ImmutableMap.of(16L, false)),
                  mapBlockOf(
                      new ArrayType(VARCHAR),
                      new MapType(BIGINT, BOOLEAN),
                      arrayBlockOf(VARCHAR, "k", "ka"),
                      mapBlockOf(BIGINT, BOOLEAN, 16L, false))))
          .add(
              new TestColumn(
                  "t_struct_nested",
                  getStandardStructObjectInspector(
                      ImmutableList.of("struct_field"),
                      ImmutableList.of(getStandardListObjectInspector(javaStringObjectInspector))),
                  ImmutableList.of(ImmutableList.of("1", "2", "3")),
                  rowBlockOf(
                      ImmutableList.of(new ArrayType(VARCHAR)),
                      arrayBlockOf(VARCHAR, "1", "2", "3"))))
          .add(
              new TestColumn(
                  "t_struct_null",
                  getStandardStructObjectInspector(
                      ImmutableList.of("struct_field", "struct_field2"),
                      ImmutableList.of(javaStringObjectInspector, javaStringObjectInspector)),
                  Arrays.asList(null, null),
                  rowBlockOf(ImmutableList.of(VARCHAR, VARCHAR), null, null)))
          .add(
              new TestColumn(
                  "t_struct_non_nulls_after_nulls",
                  getStandardStructObjectInspector(
                      ImmutableList.of("struct_field1", "struct_field2"),
                      ImmutableList.of(javaIntObjectInspector, javaStringObjectInspector)),
                  Arrays.asList(null, "some string"),
                  rowBlockOf(ImmutableList.of(BIGINT, VARCHAR), null, "some string")))
          .add(
              new TestColumn(
                  "t_nested_struct_non_nulls_after_nulls",
                  getStandardStructObjectInspector(
                      ImmutableList.of("struct_field1", "struct_field2", "strict_field3"),
                      ImmutableList.of(
                          javaIntObjectInspector,
                          javaStringObjectInspector,
                          getStandardStructObjectInspector(
                              ImmutableList.of("nested_struct_field1", "nested_struct_field2"),
                              ImmutableList.of(
                                  javaIntObjectInspector, javaStringObjectInspector)))),
                  Arrays.asList(null, "some string", Arrays.asList(null, "nested_string2")),
                  rowBlockOf(
                      ImmutableList.of(
                          BIGINT,
                          VARCHAR,
                          new RowType(ImmutableList.of(BIGINT, VARCHAR), Optional.empty())),
                      null,
                      "some string",
                      rowBlockOf(ImmutableList.of(BIGINT, VARCHAR), null, "nested_string2"))))
          .build();

  private static Map<Integer, Integer> mapWithNullKey() {
    Map<Integer, Integer> map = new HashMap<>();
    map.put(null, 0);
    map.put(2, 3);
    return map;
  }

  private static <K, V> Map<K, V> asMap(K k1, V v1, K k2, V v2) {
    Map<K, V> map = new HashMap<>();
    map.put(k1, v1);
    map.put(k2, v2);
    return map;
  }

  protected List<HiveColumnHandle> getColumnHandles(List<TestColumn> testColumns) {
    List<HiveColumnHandle> columns = new ArrayList<>();
    int nextHiveColumnIndex = 0;
    for (int i = 0; i < testColumns.size(); i++) {
      TestColumn testColumn = testColumns.get(i);
      int columnIndex = testColumn.isPartitionKey() ? -1 : nextHiveColumnIndex++;

      HiveType hiveType = HiveType.valueOf(testColumn.getObjectInspector().getTypeName());
      columns.add(
          new HiveColumnHandle(
              "client_id",
              testColumn.getName(),
              hiveType,
              hiveType.getTypeSignature(),
              columnIndex,
              testColumn.isPartitionKey()));
    }
    return columns;
  }

  public static FileSplit createTestFile(
      String filePath,
      HiveOutputFormat<?, ?> outputFormat,
      @SuppressWarnings("deprecation") SerDe serDe,
      String compressionCodec,
      List<TestColumn> testColumns,
      int numRows)
      throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));

    JobConf jobConf = new JobConf();
    ReaderWriterProfiler.setProfilerOptions(jobConf);

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
        "columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty(
        "columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(new Configuration(), tableProperties);

    if (compressionCodec != null) {
      CompressionCodec codec =
          new CompressionCodecFactory(new Configuration()).getCodecByName(compressionCodec);
      jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
      jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
      jobConf.set("parquet.compression", compressionCodec);
      jobConf.set("parquet.enable.dictionary", "true");
    }

    RecordWriter recordWriter =
        outputFormat.getHiveRecordWriter(
            jobConf,
            new Path(filePath),
            Text.class,
            compressionCodec != null,
            tableProperties,
            new Progressable() {
              @Override
              public void progress() {}
            });

    try {
      serDe.initialize(new Configuration(), tableProperties);

      SettableStructObjectInspector objectInspector =
          getStandardStructObjectInspector(
              ImmutableList.copyOf(transform(testColumns, TestColumn::getName)),
              ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));

      Object row = objectInspector.create();

      List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

      for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
        for (int i = 0; i < testColumns.size(); i++) {
          Object writeValue = testColumns.get(i).getWriteValue();
          if (writeValue instanceof Slice) {
            writeValue = ((Slice) writeValue).getBytes();
          }
          objectInspector.setStructFieldData(row, fields.get(i), writeValue);
        }

        Writable record = serDe.serialize(row, objectInspector);
        recordWriter.write(record);
      }
    } finally {
      recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(new Configuration()).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
  }

  protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows)
      throws IOException {
    for (int row = 0; row < numRows; row++) {
      assertTrue(cursor.advanceNextPosition());
      for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) {
        TestColumn testColumn = testColumns.get(i);

        Object fieldFromCursor;
        Type type =
            HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER);
        if (cursor.isNull(i)) {
          fieldFromCursor = null;
        } else if (BOOLEAN.equals(type)) {
          fieldFromCursor = cursor.getBoolean(i);
        } else if (BIGINT.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (DOUBLE.equals(type)) {
          fieldFromCursor = cursor.getDouble(i);
        } else if (VARCHAR.equals(type)) {
          fieldFromCursor = cursor.getSlice(i);
        } else if (VARBINARY.equals(type)) {
          fieldFromCursor = cursor.getSlice(i);
        } else if (DateType.DATE.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (TimestampType.TIMESTAMP.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (isStructuralType(type)) {
          fieldFromCursor = cursor.getObject(i);
        } else {
          throw new RuntimeException("unknown type");
        }

        if (fieldFromCursor == null) {
          assertEquals(
              null,
              testColumn.getExpectedValue(),
              String.format("Expected null for column %s", testColumn.getName()));
        } else if (testColumn.getObjectInspector().getTypeName().equals("float")
            || testColumn.getObjectInspector().getTypeName().equals("double")) {
          assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON);
        } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) {
          assertEquals(
              fieldFromCursor,
              testColumn.getExpectedValue(),
              String.format("Wrong value for column %s", testColumn.getName()));
        } else {
          Block expected = (Block) testColumn.getExpectedValue();
          Block actual = (Block) fieldFromCursor;
          assertBlockEquals(
              actual, expected, String.format("Wrong value for column %s", testColumn.getName()));
        }
      }
    }
  }

  protected void checkPageSource(
      ConnectorPageSource pageSource, List<TestColumn> testColumns, List<Type> types)
      throws IOException {
    try {
      MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, types);

      for (MaterializedRow row : result) {
        for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) {
          TestColumn testColumn = testColumns.get(i);
          Type type = types.get(i);

          Object actualValue = row.getField(i);
          Object expectedValue = testColumn.getExpectedValue();
          if (actualValue == null) {
            assertEquals(null, expectedValue, String.format("Expected non-null for column %d", i));
          } else if (testColumn.getObjectInspector().getTypeName().equals("float")
              || testColumn.getObjectInspector().getTypeName().equals("double")) {
            assertEquals((double) actualValue, (double) expectedValue, EPSILON);
          } else if (testColumn.getObjectInspector().getTypeName().equals("date")) {
            SqlDate expectedDate = new SqlDate(((Long) expectedValue).intValue());
            assertEquals(actualValue, expectedDate);
          } else if (testColumn.getObjectInspector().getTypeName().equals("timestamp")) {
            SqlTimestamp expectedTimestamp =
                new SqlTimestamp((Long) expectedValue, SESSION.getTimeZoneKey());
            assertEquals(actualValue, expectedTimestamp);
          } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) {
            if (expectedValue instanceof Slice) {
              expectedValue = ((Slice) expectedValue).toStringUtf8();
            }

            if (actualValue instanceof Slice) {
              actualValue = ((Slice) actualValue).toStringUtf8();
            }
            if (actualValue instanceof SqlVarbinary) {
              actualValue = new String(((SqlVarbinary) actualValue).getBytes(), UTF_8);
            }
            assertEquals(actualValue, expectedValue, String.format("Wrong value for column %d", i));
          } else {
            BlockBuilder builder = type.createBlockBuilder(new BlockBuilderStatus(), 1);
            type.writeObject(builder, expectedValue);
            expectedValue = type.getObjectValue(SESSION, builder.build(), 0);
            assertEquals(
                actualValue,
                expectedValue,
                String.format("Wrong value for column %s", testColumn.getName()));
          }
        }
      }
    } finally {
      pageSource.close();
    }
  }

  private static void assertBlockEquals(Block actual, Block expected, String message) {
    assertEquals(blockToSlice(actual), blockToSlice(expected), message);
  }

  private static Slice blockToSlice(Block block) {
    // This function is strictly for testing use only
    SliceOutput sliceOutput = new DynamicSliceOutput(1000);
    BlockSerdeUtil.writeBlock(sliceOutput, block.copyRegion(0, block.getPositionCount()));
    return sliceOutput.slice();
  }

  public static final class TestColumn {
    private final String name;
    private final ObjectInspector objectInspector;
    private final Object writeValue;
    private final Object expectedValue;
    private final boolean partitionKey;

    public TestColumn(
        String name, ObjectInspector objectInspector, Object writeValue, Object expectedValue) {
      this(name, objectInspector, writeValue, expectedValue, false);
    }

    public TestColumn(
        String name,
        ObjectInspector objectInspector,
        Object writeValue,
        Object expectedValue,
        boolean partitionKey) {
      this.name = requireNonNull(name, "name is null");
      this.objectInspector = requireNonNull(objectInspector, "objectInspector is null");
      this.writeValue = writeValue;
      this.expectedValue = expectedValue;
      this.partitionKey = partitionKey;
    }

    public String getName() {
      return name;
    }

    public String getType() {
      return objectInspector.getTypeName();
    }

    public ObjectInspector getObjectInspector() {
      return objectInspector;
    }

    public Object getWriteValue() {
      return writeValue;
    }

    public Object getExpectedValue() {
      return expectedValue;
    }

    public boolean isPartitionKey() {
      return partitionKey;
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder("TestColumn{");
      sb.append("name='").append(name).append('\'');
      sb.append(", objectInspector=").append(objectInspector);
      sb.append(", writeValue=").append(writeValue);
      sb.append(", expectedValue=").append(expectedValue);
      sb.append(", partitionKey=").append(partitionKey);
      sb.append('}');
      return sb.toString();
    }
  }
}
Пример #10
0
 @Override
 public Slice getSlice(int field) {
   checkFieldType(field, VARCHAR);
   return Slices.utf8Slice(getFieldValue(field));
 }