private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (INTEGER.equals(type)) { parseLongColumn(column); } else if (SMALLINT.equals(type)) { parseLongColumn(column); } else if (TINYINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (isVarcharType(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (isStructuralType(hiveTypes[column])) { parseObjectColumn(column); } else if (DATE.equals(type)) { parseLongColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else if (type instanceof DecimalType) { parseDecimalColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
private static void assertReadFields(RecordCursor cursor, List<ColumnMetadata> schema) { for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) { ColumnMetadata column = schema.get(columnIndex); if (!cursor.isNull(columnIndex)) { Type type = column.getType(); if (BOOLEAN.equals(type)) { cursor.getBoolean(columnIndex); } else if (BIGINT.equals(type)) { cursor.getLong(columnIndex); } else if (TIMESTAMP.equals(type)) { cursor.getLong(columnIndex); } else if (DOUBLE.equals(type)) { cursor.getDouble(columnIndex); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { try { cursor.getSlice(columnIndex); } catch (RuntimeException e) { throw new RuntimeException("column " + column, e); } } else { fail("Unknown primitive type " + columnIndex); } } } }
protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows) throws IOException { for (int row = 0; row < numRows; row++) { assertTrue(cursor.advanceNextPosition()); for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) { TestColumn testColumn = testColumns.get(i); Object fieldFromCursor; Type type = HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER); if (cursor.isNull(i)) { fieldFromCursor = null; } else if (BOOLEAN.equals(type)) { fieldFromCursor = cursor.getBoolean(i); } else if (BIGINT.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (DOUBLE.equals(type)) { fieldFromCursor = cursor.getDouble(i); } else if (VARCHAR.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (VARBINARY.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (DateType.DATE.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (TimestampType.TIMESTAMP.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (isStructuralType(type)) { fieldFromCursor = cursor.getObject(i); } else { throw new RuntimeException("unknown type"); } if (fieldFromCursor == null) { assertEquals( null, testColumn.getExpectedValue(), String.format("Expected null for column %s", testColumn.getName())); } else if (testColumn.getObjectInspector().getTypeName().equals("float") || testColumn.getObjectInspector().getTypeName().equals("double")) { assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON); } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) { assertEquals( fieldFromCursor, testColumn.getExpectedValue(), String.format("Wrong value for column %s", testColumn.getName())); } else { Block expected = (Block) testColumn.getExpectedValue(); Block actual = (Block) fieldFromCursor; assertBlockEquals( actual, expected, String.format("Wrong value for column %s", testColumn.getName())); } } } }
private static void insertRows( ConnectorTableMetadata tableMetadata, Handle handle, RecordSet data) { List<ColumnMetadata> columns = ImmutableList.copyOf( Iterables.filter( tableMetadata.getColumns(), new Predicate<ColumnMetadata>() { @Override public boolean apply(ColumnMetadata columnMetadata) { return !columnMetadata.isHidden(); } })); String vars = Joiner.on(',').join(nCopies(columns.size(), "?")); String sql = format("INSERT INTO %s VALUES (%s)", tableMetadata.getTable().getTableName(), vars); RecordCursor cursor = data.cursor(); while (true) { // insert 1000 rows at a time PreparedBatch batch = handle.prepareBatch(sql); for (int row = 0; row < 1000; row++) { if (!cursor.advanceNextPosition()) { batch.execute(); return; } PreparedBatchPart part = batch.add(); for (int column = 0; column < columns.size(); column++) { Type type = columns.get(column).getType(); if (BOOLEAN.equals(type)) { part.bind(column, cursor.getBoolean(column)); } else if (BIGINT.equals(type)) { part.bind(column, cursor.getLong(column)); } else if (DOUBLE.equals(type)) { part.bind(column, cursor.getDouble(column)); } else if (VARCHAR.equals(type)) { part.bind(column, cursor.getSlice(column).toStringUtf8()); } else if (DATE.equals(type)) { long millisUtc = TimeUnit.DAYS.toMillis(cursor.getLong(column)); // H2 expects dates in to be millis at midnight in the JVM timezone long localMillis = DateTimeZone.UTC.getMillisKeepLocal(DateTimeZone.getDefault(), millisUtc); part.bind(column, new Date(localMillis)); } else { throw new IllegalArgumentException("Unsupported type " + type); } } } batch.execute(); } }
private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public static NullableValue parsePartitionValue( String partitionName, String value, Type type, DateTimeZone timeZone) { boolean isNull = HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value); if (type instanceof DecimalType) { DecimalType decimalType = (DecimalType) type; if (isNull) { return NullableValue.asNull(decimalType); } if (decimalType.isShort()) { if (value.isEmpty()) { return NullableValue.of(decimalType, 0L); } return NullableValue.of( decimalType, shortDecimalPartitionKey(value, decimalType, partitionName)); } else { if (value.isEmpty()) { return NullableValue.of(decimalType, Decimals.encodeUnscaledValue(BigInteger.ZERO)); } return NullableValue.of( decimalType, longDecimalPartitionKey(value, decimalType, partitionName)); } } if (BOOLEAN.equals(type)) { if (isNull) { return NullableValue.asNull(BOOLEAN); } if (value.isEmpty()) { return NullableValue.of(BOOLEAN, false); } return NullableValue.of(BOOLEAN, booleanPartitionKey(value, partitionName)); } if (TINYINT.equals(type)) { if (isNull) { return NullableValue.asNull(TINYINT); } if (value.isEmpty()) { return NullableValue.of(TINYINT, 0L); } return NullableValue.of(TINYINT, tinyintPartitionKey(value, partitionName)); } if (SMALLINT.equals(type)) { if (isNull) { return NullableValue.asNull(SMALLINT); } if (value.isEmpty()) { return NullableValue.of(SMALLINT, 0L); } return NullableValue.of(SMALLINT, smallintPartitionKey(value, partitionName)); } if (INTEGER.equals(type)) { if (isNull) { return NullableValue.asNull(INTEGER); } if (value.isEmpty()) { return NullableValue.of(INTEGER, 0L); } return NullableValue.of(INTEGER, integerPartitionKey(value, partitionName)); } if (BIGINT.equals(type)) { if (isNull) { return NullableValue.asNull(BIGINT); } if (value.isEmpty()) { return NullableValue.of(BIGINT, 0L); } return NullableValue.of(BIGINT, bigintPartitionKey(value, partitionName)); } if (DATE.equals(type)) { if (isNull) { return NullableValue.asNull(DATE); } return NullableValue.of(DATE, datePartitionKey(value, partitionName)); } if (TIMESTAMP.equals(type)) { if (isNull) { return NullableValue.asNull(TIMESTAMP); } return NullableValue.of(TIMESTAMP, timestampPartitionKey(value, timeZone, partitionName)); } if (REAL.equals(type)) { if (isNull) { return NullableValue.asNull(REAL); } if (value.isEmpty()) { return NullableValue.of(REAL, (long) floatToRawIntBits(0.0f)); } return NullableValue.of(REAL, floatPartitionKey(value, partitionName)); } if (DOUBLE.equals(type)) { if (isNull) { return NullableValue.asNull(DOUBLE); } if (value.isEmpty()) { return NullableValue.of(DOUBLE, 0.0); } return NullableValue.of(DOUBLE, doublePartitionKey(value, partitionName)); } if (type instanceof VarcharType) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, varcharPartitionKey(value, partitionName, type)); } if (isCharType(type)) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, charPartitionKey(value, partitionName, type)); } throw new PrestoException( NOT_SUPPORTED, format("Unsupported Type [%s] for partition: %s", type, partitionName)); }