예제 #1
0
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      TypeManager typeManager) {
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nulls[columnIndex] = true;
        } else if (BOOLEAN.equals(type)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (BIGINT.equals(type)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (INTEGER.equals(type)) {
          longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name);
        } else if (SMALLINT.equals(type)) {
          longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name);
        } else if (TINYINT.equals(type)) {
          longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name);
        } else if (DOUBLE.equals(type)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (isVarcharType(type)) {
          slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type);
        } else if (DATE.equals(type)) {
          longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name);
        } else if (TIMESTAMP.equals(type)) {
          longs[columnIndex] =
              timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name);
        } else if (isShortDecimal(type)) {
          longs[columnIndex] =
              shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else if (isLongDecimal(type)) {
          slices[columnIndex] =
              longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }
예제 #2
0
  public static Expression toExpression(Object object, Type type) {
    requireNonNull(type, "type is null");

    if (object instanceof Expression) {
      return (Expression) object;
    }

    if (object == null) {
      if (type.equals(UNKNOWN)) {
        return new NullLiteral();
      }
      return new Cast(new NullLiteral(), type.getTypeSignature().toString(), false, true);
    }

    checkArgument(
        Primitives.wrap(type.getJavaType()).isInstance(object),
        "object.getClass (%s) and type.getJavaType (%s) do not agree",
        object.getClass(),
        type.getJavaType());

    if (type.equals(BIGINT)) {
      return new LongLiteral(object.toString());
    }

    if (type.equals(DOUBLE)) {
      Double value = (Double) object;
      // WARNING: the ORC predicate code depends on NaN and infinity not appearing in a tuple
      // domain, so
      // if you remove this, you will need to update the TupleDomainOrcPredicate
      if (value.isNaN()) {
        return new FunctionCall(new QualifiedName("nan"), ImmutableList.<Expression>of());
      } else if (value.equals(Double.NEGATIVE_INFINITY)) {
        return ArithmeticUnaryExpression.negative(
            new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of()));
      } else if (value.equals(Double.POSITIVE_INFINITY)) {
        return new FunctionCall(new QualifiedName("infinity"), ImmutableList.<Expression>of());
      } else {
        return new DoubleLiteral(object.toString());
      }
    }

    if (type instanceof VarcharType) {
      if (object instanceof String) {
        object = Slices.utf8Slice((String) object);
      }

      if (object instanceof Slice) {
        Slice value = (Slice) object;
        int length = SliceUtf8.countCodePoints(value);

        if (length == ((VarcharType) type).getLength()) {
          return new StringLiteral(value.toStringUtf8());
        }

        return new Cast(
            new StringLiteral(value.toStringUtf8()), type.getDisplayName(), false, true);
      }

      throw new IllegalArgumentException(
          "object must be instance of Slice or String when type is VARCHAR");
    }

    if (type.equals(BOOLEAN)) {
      return new BooleanLiteral(object.toString());
    }

    if (object instanceof Block) {
      SliceOutput output = new DynamicSliceOutput(((Block) object).getSizeInBytes());
      BlockSerdeUtil.writeBlock(output, (Block) object);
      object = output.slice();
      // This if condition will evaluate to true: object instanceof Slice && !type.equals(VARCHAR)
    }

    if (object instanceof Slice) {
      // HACK: we need to serialize VARBINARY in a format that can be embedded in an expression to
      // be
      // able to encode it in the plan that gets sent to workers.
      // We do this by transforming the in-memory varbinary into a call to
      // from_base64(<base64-encoded value>)
      FunctionCall fromBase64 =
          new FunctionCall(
              new QualifiedName("from_base64"),
              ImmutableList.of(
                  new StringLiteral(VarbinaryFunctions.toBase64((Slice) object).toStringUtf8())));
      Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type);
      return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(fromBase64));
    }

    Signature signature = FunctionRegistry.getMagicLiteralFunctionSignature(type);
    Expression rawLiteral = toExpression(object, FunctionRegistry.typeForMagicLiteral(type));

    return new FunctionCall(new QualifiedName(signature.getName()), ImmutableList.of(rawLiteral));
  }
예제 #3
0
  public ParquetHiveRecordCursor(
      Configuration configuration,
      Path path,
      long start,
      long length,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      boolean useParquetColumnNames,
      TypeManager typeManager) {
    requireNonNull(path, "path is null");
    checkArgument(length >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");

    this.totalBytes = length;

    int size = columns.size();

    this.names = new String[size];
    this.types = new Type[size];

    this.isPartitionColumn = new boolean[size];

    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];
    this.nullsRowDefault = new boolean[size];

    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());

      isPartitionColumn[i] = column.isPartitionKey();
      nullsRowDefault[i] = !column.isPartitionKey();
    }

    this.recordReader =
        createParquetRecordReader(
            configuration, path, start, length, columns, useParquetColumnNames);

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nullsRowDefault[columnIndex] = true;
        } else if (type.equals(BOOLEAN)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(BIGINT)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(DOUBLE)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(VARCHAR)) {
          slices[columnIndex] = Slices.wrappedBuffer(bytes);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }