private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else if (hiveTypes[column] == HiveType.MAP
        || hiveTypes[column] == HiveType.LIST
        || hiveTypes[column] == HiveType.STRUCT) {
      // temporarily special case MAP, LIST, and STRUCT types as strings
      slices[column] =
          Slices.wrappedBuffer(
              SerDeUtils.getJsonBytes(sessionTimeZone, fieldData, fieldInspectors[column]));
      nulls[column] = false;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      if (fieldValue instanceof String) {
        slices[column] = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        slices[column] = Slices.wrappedBuffer((byte[]) fieldValue);
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      nulls[column] = false;
    }
  }
Example #2
0
  private void parseStringColumn(int column) {
    // don't include column number in message because it causes boxing which is expensive here
    checkArgument(!isPartitionColumn[column], "Column is a partition key");

    loaded[column] = true;

    Object fieldData = rowInspector.getStructFieldData(rowData, structFields[column]);

    if (fieldData == null) {
      nulls[column] = true;
    } else {
      Object fieldValue =
          ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData);
      checkState(fieldValue != null, "fieldValue should not be null");
      Slice value;
      if (fieldValue instanceof String) {
        value = Slices.utf8Slice((String) fieldValue);
      } else if (fieldValue instanceof byte[]) {
        value = Slices.wrappedBuffer((byte[]) fieldValue);
      } else if (fieldValue instanceof HiveVarchar) {
        value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
      } else {
        throw new IllegalStateException(
            "unsupported string field type: " + fieldValue.getClass().getName());
      }
      Type type = types[column];
      if (isVarcharType(type)) {
        value = truncateToLength(value, type);
      }
      slices[column] = value;
      nulls[column] = false;
    }
  }
Example #3
0
 private void newBuffer() {
   this.buffer = allocator.allocate(PageFormat.PAGE_HEADER_SIZE + fixedRecordSize);
   this.bufferSlice = Slices.wrappedBuffer(buffer.array(), buffer.offset(), buffer.capacity());
   this.count = 0;
   this.position = PageFormat.PAGE_HEADER_SIZE;
   this.stringReferences.clear();
   this.stringReferenceSize = 0;
 }
Example #4
0
  private static Slice formatDatetime(
      ISOChronology chronology, Locale locale, long timestamp, Slice formatString) {
    String pattern = formatString.toString(Charsets.UTF_8);
    DateTimeFormatter formatter =
        DateTimeFormat.forPattern(pattern).withChronology(chronology).withLocale(locale);

    String datetimeString = formatter.print(timestamp);
    return Slices.wrappedBuffer(datetimeString.getBytes(Charsets.UTF_8));
  }
  private Page generateZeroPage(List<Type> types, int rowsCount, int fieldLength) {
    byte[] constantBytes = new byte[fieldLength];
    Arrays.fill(constantBytes, (byte) 42);
    Slice constantSlice = Slices.wrappedBuffer(constantBytes);

    Block[] blocks = new Block[types.size()];
    for (int i = 0; i < blocks.length; i++) {
      blocks[i] = createZeroBlock(types.get(i), rowsCount, constantSlice);
    }

    return new Page(rowsCount, blocks);
  }
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      DateTimeZone sessionTimeZone) {
    checkNotNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    checkNotNull(splitSchema, "splitSchema is null");
    checkNotNull(partitionKeys, "partitionKeys is null");
    checkNotNull(columns, "columns is null");
    checkArgument(!columns.isEmpty(), "columns is empty");
    checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
    checkNotNull(sessionTimeZone, "sessionTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;
    this.sessionTimeZone = sessionTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = column.getType();
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey.nameGetter());
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8);

        Type type = types[columnIndex];
        if (BOOLEAN.equals(type)) {
          if (isTrue(bytes, 0, bytes.length)) {
            booleans[columnIndex] = true;
          } else if (isFalse(bytes, 0, bytes.length)) {
            booleans[columnIndex] = false;
          } else {
            String valueString = new String(bytes, Charsets.UTF_8);
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '%s' for BOOLEAN partition key %s",
                    valueString, names[columnIndex]));
          }
        } else if (BIGINT.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for BIGINT partition key %s", names[columnIndex]));
          }
          longs[columnIndex] = parseLong(bytes, 0, bytes.length);
        } else if (DOUBLE.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex]));
          }
          doubles[columnIndex] = parseDouble(bytes, 0, bytes.length);
        } else if (VARCHAR.equals(type)) {
          slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length));
        } else {
          throw new UnsupportedOperationException("Unsupported column type: " + type);
        }
      }
    }
  }
Example #7
0
  private static void serializePrimitive(
      Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) {
    requireNonNull(builder, "parent builder is null");

    if (object == null) {
      builder.appendNull();
      return;
    }

    switch (inspector.getPrimitiveCategory()) {
      case BOOLEAN:
        BooleanType.BOOLEAN.writeBoolean(builder, ((BooleanObjectInspector) inspector).get(object));
        return;
      case BYTE:
        TinyintType.TINYINT.writeLong(builder, ((ByteObjectInspector) inspector).get(object));
        return;
      case SHORT:
        SmallintType.SMALLINT.writeLong(builder, ((ShortObjectInspector) inspector).get(object));
        return;
      case INT:
        IntegerType.INTEGER.writeLong(builder, ((IntObjectInspector) inspector).get(object));
        return;
      case LONG:
        BigintType.BIGINT.writeLong(builder, ((LongObjectInspector) inspector).get(object));
        return;
      case FLOAT:
        DoubleType.DOUBLE.writeDouble(builder, ((FloatObjectInspector) inspector).get(object));
        return;
      case DOUBLE:
        DoubleType.DOUBLE.writeDouble(builder, ((DoubleObjectInspector) inspector).get(object));
        return;
      case STRING:
        type.writeSlice(
            builder,
            Slices.utf8Slice(((StringObjectInspector) inspector).getPrimitiveJavaObject(object)));
        return;
      case VARCHAR:
        type.writeSlice(
            builder,
            Slices.utf8Slice(
                ((HiveVarcharObjectInspector) inspector)
                    .getPrimitiveJavaObject(object)
                    .getValue()));
        return;
      case CHAR:
        CharType charType = checkType(type, CharType.class, "type");
        HiveChar hiveChar = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(object);
        type.writeSlice(
            builder,
            trimSpacesAndTruncateToLength(
                Slices.utf8Slice(hiveChar.getValue()), charType.getLength()));
        return;
      case DATE:
        DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
        return;
      case TIMESTAMP:
        TimestampType.TIMESTAMP.writeLong(
            builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
        return;
      case BINARY:
        VARBINARY.writeSlice(
            builder,
            Slices.wrappedBuffer(
                ((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
        return;
      case DECIMAL:
        DecimalType decimalType = checkType(type, DecimalType.class, "type");
        HiveDecimalWritable hiveDecimal =
            ((HiveDecimalObjectInspector) inspector).getPrimitiveWritableObject(object);
        if (decimalType.isShort()) {
          decimalType.writeLong(
              builder, DecimalUtils.getShortDecimalValue(hiveDecimal, decimalType.getScale()));
        } else {
          decimalType.writeSlice(
              builder, DecimalUtils.getLongDecimalValue(hiveDecimal, decimalType.getScale()));
        }
        return;
    }
    throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
  }
Example #8
0
 private StringLiteral(Optional<NodeLocation> location, String value) {
   super(location);
   requireNonNull(value, "value is null");
   this.value = value;
   this.slice = Slices.wrappedBuffer(value.getBytes(UTF_8));
 }
 @Override
 public void addBinary(Binary value) {
   nulls[fieldIndex] = false;
   slices[fieldIndex] = Slices.wrappedBuffer(value.getBytes());
 }
 @Override
 public void addBinary(Binary value) {
   VARBINARY.writeSlice(builder, Slices.wrappedBuffer(value.getBytes()));
   wroteValue = true;
 }
  public ParquetHiveRecordCursor(
      Configuration configuration,
      Path path,
      long start,
      long length,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      boolean useParquetColumnNames,
      TypeManager typeManager) {
    requireNonNull(path, "path is null");
    checkArgument(length >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");

    this.totalBytes = length;

    int size = columns.size();

    this.names = new String[size];
    this.types = new Type[size];

    this.isPartitionColumn = new boolean[size];

    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];
    this.nullsRowDefault = new boolean[size];

    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());

      isPartitionColumn[i] = column.isPartitionKey();
      nullsRowDefault[i] = !column.isPartitionKey();
    }

    this.recordReader =
        createParquetRecordReader(
            configuration, path, start, length, columns, useParquetColumnNames);

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nullsRowDefault[columnIndex] = true;
        } else if (type.equals(BOOLEAN)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(BIGINT)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(DOUBLE)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(VARCHAR)) {
          slices[columnIndex] = Slices.wrappedBuffer(bytes);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }
Example #12
0
 public static Slice base64Decode(byte[] bytes) {
   return Slices.wrappedBuffer(Base64.getDecoder().decode(bytes));
 }