private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (INTEGER.equals(type)) { parseLongColumn(column); } else if (SMALLINT.equals(type)) { parseLongColumn(column); } else if (TINYINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (isVarcharType(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (isStructuralType(hiveTypes[column])) { parseObjectColumn(column); } else if (DATE.equals(type)) { parseLongColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else if (type instanceof DecimalType) { parseDecimalColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
@Override public void readFields(DataInput in) throws IOException { /* * extract pkt len. * * GPSQL-1107: * The DataInput might already be empty (EOF), but we can't check it beforehand. * If that's the case, pktlen is updated to -1, to mark that the object is still empty. * (can be checked with isEmpty()). */ pktlen = readPktLen(in); if (isEmpty()) { return; } /* extract the version and col cnt */ int version = in.readShort(); int curOffset = 4 + 2; int colCnt; /* !!! Check VERSION !!! */ if (version != GPDBWritable.VERSION && version != GPDBWritable.PREV_VERSION) { throw new IOException( "Current GPDBWritable version(" + GPDBWritable.VERSION + ") does not match input version(" + version + ")"); } if (version == GPDBWritable.VERSION) { errorFlag = in.readByte(); curOffset += 1; } colCnt = in.readShort(); curOffset += 2; /* Extract Column Type */ colType = new int[colCnt]; DBType[] coldbtype = new DBType[colCnt]; for (int i = 0; i < colCnt; i++) { int enumType = (in.readByte()); curOffset += 1; if (enumType == DBType.BIGINT.ordinal()) { colType[i] = BIGINT.getOID(); coldbtype[i] = DBType.BIGINT; } else if (enumType == DBType.BOOLEAN.ordinal()) { colType[i] = BOOLEAN.getOID(); coldbtype[i] = DBType.BOOLEAN; } else if (enumType == DBType.FLOAT8.ordinal()) { colType[i] = FLOAT8.getOID(); coldbtype[i] = DBType.FLOAT8; } else if (enumType == DBType.INTEGER.ordinal()) { colType[i] = INTEGER.getOID(); coldbtype[i] = DBType.INTEGER; } else if (enumType == DBType.REAL.ordinal()) { colType[i] = REAL.getOID(); coldbtype[i] = DBType.REAL; } else if (enumType == DBType.SMALLINT.ordinal()) { colType[i] = SMALLINT.getOID(); coldbtype[i] = DBType.SMALLINT; } else if (enumType == DBType.BYTEA.ordinal()) { colType[i] = BYTEA.getOID(); coldbtype[i] = DBType.BYTEA; } else if (enumType == DBType.TEXT.ordinal()) { colType[i] = TEXT.getOID(); coldbtype[i] = DBType.TEXT; } else { throw new IOException("Unknown GPDBWritable.DBType ordinal value"); } } /* Extract null bit array */ byte[] nullbytes = new byte[getNullByteArraySize(colCnt)]; in.readFully(nullbytes); curOffset += nullbytes.length; boolean[] colIsNull = byteArrayToBooleanArray(nullbytes, colCnt); /* extract column value */ colValue = new Object[colCnt]; for (int i = 0; i < colCnt; i++) { if (!colIsNull[i]) { /* Skip the alignment padding */ int skipbytes = roundUpAlignment(curOffset, coldbtype[i].getAlignment()) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; /* For fixed length type, increment the offset according to type type length here. * For var length type (BYTEA, TEXT), we'll read 4 byte length header and the * actual payload. */ int varcollen = -1; if (coldbtype[i].isVarLength()) { varcollen = in.readInt(); curOffset += 4 + varcollen; } else { curOffset += coldbtype[i].getTypeLength(); } switch (DataType.get(colType[i])) { case BIGINT: { colValue[i] = in.readLong(); break; } case BOOLEAN: { colValue[i] = in.readBoolean(); break; } case FLOAT8: { colValue[i] = in.readDouble(); break; } case INTEGER: { colValue[i] = in.readInt(); break; } case REAL: { colValue[i] = in.readFloat(); break; } case SMALLINT: { colValue[i] = in.readShort(); break; } /* For BYTEA column, it has a 4 byte var length header. */ case BYTEA: { colValue[i] = new byte[varcollen]; in.readFully((byte[]) colValue[i]); break; } /* For text formatted column, it has a 4 byte var length header * and it's always null terminated string. * So, we can remove the last "\0" when constructing the string. */ case TEXT: { byte[] data = new byte[varcollen]; in.readFully(data, 0, varcollen); colValue[i] = new String(data, 0, varcollen - 1, CHARSET); break; } default: throw new IOException("Unknown GPDBWritable ColType"); } } } /* Skip the ending alignment padding */ int skipbytes = roundUpAlignment(curOffset, 8) - curOffset; for (int j = 0; j < skipbytes; j++) { in.readByte(); } curOffset += skipbytes; if (errorFlag != 0) { throw new IOException("Received error value " + errorFlag + " from format"); } }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) { requireNonNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(partitionKeys, "partitionKeys is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.objects = new Object[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = typeManager.getType(column.getTypeSignature()); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(UTF_8); String name = names[columnIndex]; Type type = types[columnIndex]; if (HiveUtil.isHiveNull(bytes)) { nulls[columnIndex] = true; } else if (BOOLEAN.equals(type)) { booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name); } else if (BIGINT.equals(type)) { longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name); } else if (INTEGER.equals(type)) { longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name); } else if (SMALLINT.equals(type)) { longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name); } else if (TINYINT.equals(type)) { longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name); } else if (DOUBLE.equals(type)) { doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name); } else if (isVarcharType(type)) { slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type); } else if (DATE.equals(type)) { longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name); } else if (TIMESTAMP.equals(type)) { longs[columnIndex] = timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name); } else if (isShortDecimal(type)) { longs[columnIndex] = shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else if (isLongDecimal(type)) { slices[columnIndex] = longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name); } else { throw new PrestoException( NOT_SUPPORTED, format( "Unsupported column type %s for partition key: %s", type.getDisplayName(), name)); } } } }
public static NullableValue parsePartitionValue( String partitionName, String value, Type type, DateTimeZone timeZone) { boolean isNull = HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value); if (type instanceof DecimalType) { DecimalType decimalType = (DecimalType) type; if (isNull) { return NullableValue.asNull(decimalType); } if (decimalType.isShort()) { if (value.isEmpty()) { return NullableValue.of(decimalType, 0L); } return NullableValue.of( decimalType, shortDecimalPartitionKey(value, decimalType, partitionName)); } else { if (value.isEmpty()) { return NullableValue.of(decimalType, Decimals.encodeUnscaledValue(BigInteger.ZERO)); } return NullableValue.of( decimalType, longDecimalPartitionKey(value, decimalType, partitionName)); } } if (BOOLEAN.equals(type)) { if (isNull) { return NullableValue.asNull(BOOLEAN); } if (value.isEmpty()) { return NullableValue.of(BOOLEAN, false); } return NullableValue.of(BOOLEAN, booleanPartitionKey(value, partitionName)); } if (TINYINT.equals(type)) { if (isNull) { return NullableValue.asNull(TINYINT); } if (value.isEmpty()) { return NullableValue.of(TINYINT, 0L); } return NullableValue.of(TINYINT, tinyintPartitionKey(value, partitionName)); } if (SMALLINT.equals(type)) { if (isNull) { return NullableValue.asNull(SMALLINT); } if (value.isEmpty()) { return NullableValue.of(SMALLINT, 0L); } return NullableValue.of(SMALLINT, smallintPartitionKey(value, partitionName)); } if (INTEGER.equals(type)) { if (isNull) { return NullableValue.asNull(INTEGER); } if (value.isEmpty()) { return NullableValue.of(INTEGER, 0L); } return NullableValue.of(INTEGER, integerPartitionKey(value, partitionName)); } if (BIGINT.equals(type)) { if (isNull) { return NullableValue.asNull(BIGINT); } if (value.isEmpty()) { return NullableValue.of(BIGINT, 0L); } return NullableValue.of(BIGINT, bigintPartitionKey(value, partitionName)); } if (DATE.equals(type)) { if (isNull) { return NullableValue.asNull(DATE); } return NullableValue.of(DATE, datePartitionKey(value, partitionName)); } if (TIMESTAMP.equals(type)) { if (isNull) { return NullableValue.asNull(TIMESTAMP); } return NullableValue.of(TIMESTAMP, timestampPartitionKey(value, timeZone, partitionName)); } if (REAL.equals(type)) { if (isNull) { return NullableValue.asNull(REAL); } if (value.isEmpty()) { return NullableValue.of(REAL, (long) floatToRawIntBits(0.0f)); } return NullableValue.of(REAL, floatPartitionKey(value, partitionName)); } if (DOUBLE.equals(type)) { if (isNull) { return NullableValue.asNull(DOUBLE); } if (value.isEmpty()) { return NullableValue.of(DOUBLE, 0.0); } return NullableValue.of(DOUBLE, doublePartitionKey(value, partitionName)); } if (type instanceof VarcharType) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, varcharPartitionKey(value, partitionName, type)); } if (isCharType(type)) { if (isNull) { return NullableValue.asNull(type); } return NullableValue.of(type, charPartitionKey(value, partitionName, type)); } throw new PrestoException( NOT_SUPPORTED, format("Unsupported Type [%s] for partition: %s", type, partitionName)); }