Exemplo n.º 1
0
 private static ImmutableMap<String, Integer> indexColumns(
     List<ConnectorColumnHandle> columnHandles) {
   ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder();
   int i = 0;
   for (ConnectorColumnHandle columnHandle : columnHandles) {
     HiveColumnHandle hiveColumnHandle =
         checkType(columnHandle, HiveColumnHandle.class, "columnHandle");
     index.put(hiveColumnHandle.getName(), i);
     i++;
   }
   return index.build();
 }
Exemplo n.º 2
0
    private parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType) {
      if (useParquetColumnNames) {
        if (messageType.containsField(column.getName())) {
          return messageType.getType(column.getName());
        }
        return null;
      }

      if (column.getHiveColumnIndex() < messageType.getFieldCount()) {
        return messageType.getType(column.getHiveColumnIndex());
      }
      return null;
    }
Exemplo n.º 3
0
    public PrestoReadSupport(
        boolean useParquetColumnNames, List<HiveColumnHandle> columns, MessageType messageType) {
      this.columns = columns;
      this.useParquetColumnNames = useParquetColumnNames;

      ImmutableList.Builder<Converter> converters = ImmutableList.builder();
      for (int i = 0; i < columns.size(); i++) {
        HiveColumnHandle column = columns.get(i);
        if (!column.isPartitionKey()) {
          parquet.schema.Type parquetType = getParquetType(column, messageType);
          if (parquetType == null) {
            continue;
          }
          if (parquetType.isPrimitive()) {
            converters.add(new ParquetPrimitiveColumnConverter(i));
          } else {
            GroupType groupType = parquetType.asGroupType();
            switch (column.getTypeSignature().getBase()) {
              case ARRAY:
                ParquetColumnConverter listConverter =
                    new ParquetColumnConverter(
                        new ParquetListConverter(types[i], groupType.getName(), groupType), i);
                converters.add(listConverter);
                break;
              case StandardTypes.MAP:
                ParquetColumnConverter mapConverter =
                    new ParquetColumnConverter(
                        new ParquetMapConverter(types[i], groupType.getName(), groupType), i);
                converters.add(mapConverter);
                break;
              case ROW:
                ParquetColumnConverter rowConverter =
                    new ParquetColumnConverter(
                        new ParquetStructConverter(types[i], groupType.getName(), groupType), i);
                converters.add(rowConverter);
                break;
              default:
                throw new IllegalArgumentException(
                    "Group column "
                        + groupType.getName()
                        + " type "
                        + groupType.getOriginalType()
                        + " not supported");
            }
          }
        }
      }
      this.converters = converters.build();
    }
Exemplo n.º 4
0
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      DateTimeZone sessionTimeZone) {
    checkNotNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    checkNotNull(splitSchema, "splitSchema is null");
    checkNotNull(partitionKeys, "partitionKeys is null");
    checkNotNull(columns, "columns is null");
    checkArgument(!columns.isEmpty(), "columns is empty");
    checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
    checkNotNull(sessionTimeZone, "sessionTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;
    this.sessionTimeZone = sessionTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = column.getType();
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey.nameGetter());
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8);

        Type type = types[columnIndex];
        if (BOOLEAN.equals(type)) {
          if (isTrue(bytes, 0, bytes.length)) {
            booleans[columnIndex] = true;
          } else if (isFalse(bytes, 0, bytes.length)) {
            booleans[columnIndex] = false;
          } else {
            String valueString = new String(bytes, Charsets.UTF_8);
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '%s' for BOOLEAN partition key %s",
                    valueString, names[columnIndex]));
          }
        } else if (BIGINT.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for BIGINT partition key %s", names[columnIndex]));
          }
          longs[columnIndex] = parseLong(bytes, 0, bytes.length);
        } else if (DOUBLE.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex]));
          }
          doubles[columnIndex] = parseDouble(bytes, 0, bytes.length);
        } else if (VARCHAR.equals(type)) {
          slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length));
        } else {
          throw new UnsupportedOperationException("Unsupported column type: " + type);
        }
      }
    }
  }
  private void testRoundTrip(HiveColumnHandle expected) {
    String json = codec.toJson(expected);
    HiveColumnHandle actual = codec.fromJson(json);

    assertEquals(actual.getClientId(), expected.getClientId());
    assertEquals(actual.getName(), expected.getName());
    assertEquals(actual.getHiveType(), expected.getHiveType());
    assertEquals(actual.getHiveColumnIndex(), expected.getHiveColumnIndex());
    assertEquals(actual.isPartitionKey(), expected.isPartitionKey());
  }
 @Test
 public void testHiddenColumn() {
   HiveColumnHandle hiddenColumn = HiveColumnHandle.pathColumnHandle("client");
   testRoundTrip(hiddenColumn);
 }
Exemplo n.º 7
0
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      TypeManager typeManager) {
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nulls[columnIndex] = true;
        } else if (BOOLEAN.equals(type)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (BIGINT.equals(type)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (INTEGER.equals(type)) {
          longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name);
        } else if (SMALLINT.equals(type)) {
          longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name);
        } else if (TINYINT.equals(type)) {
          longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name);
        } else if (DOUBLE.equals(type)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (isVarcharType(type)) {
          slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type);
        } else if (DATE.equals(type)) {
          longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name);
        } else if (TIMESTAMP.equals(type)) {
          longs[columnIndex] =
              timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name);
        } else if (isShortDecimal(type)) {
          longs[columnIndex] =
              shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else if (isLongDecimal(type)) {
          slices[columnIndex] =
              longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }
Exemplo n.º 8
0
  public ParquetHiveRecordCursor(
      Configuration configuration,
      Path path,
      long start,
      long length,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      boolean useParquetColumnNames,
      TypeManager typeManager) {
    requireNonNull(path, "path is null");
    checkArgument(length >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");

    this.totalBytes = length;

    int size = columns.size();

    this.names = new String[size];
    this.types = new Type[size];

    this.isPartitionColumn = new boolean[size];

    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];
    this.nullsRowDefault = new boolean[size];

    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());

      isPartitionColumn[i] = column.isPartitionKey();
      nullsRowDefault[i] = !column.isPartitionKey();
    }

    this.recordReader =
        createParquetRecordReader(
            configuration, path, start, length, columns, useParquetColumnNames);

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nullsRowDefault[columnIndex] = true;
        } else if (type.equals(BOOLEAN)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(BIGINT)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(DOUBLE)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (type.equals(VARCHAR)) {
          slices[columnIndex] = Slices.wrappedBuffer(bytes);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }