コード例 #1
0
 private void parseColumn(int column) {
   Type type = types[column];
   if (BOOLEAN.equals(type)) {
     parseBooleanColumn(column);
   } else if (BIGINT.equals(type)) {
     parseLongColumn(column);
   } else if (INTEGER.equals(type)) {
     parseLongColumn(column);
   } else if (SMALLINT.equals(type)) {
     parseLongColumn(column);
   } else if (TINYINT.equals(type)) {
     parseLongColumn(column);
   } else if (DOUBLE.equals(type)) {
     parseDoubleColumn(column);
   } else if (isVarcharType(type) || VARBINARY.equals(type)) {
     parseStringColumn(column);
   } else if (isStructuralType(hiveTypes[column])) {
     parseObjectColumn(column);
   } else if (DATE.equals(type)) {
     parseLongColumn(column);
   } else if (TIMESTAMP.equals(type)) {
     parseLongColumn(column);
   } else if (type instanceof DecimalType) {
     parseDecimalColumn(column);
   } else {
     throw new UnsupportedOperationException("Unsupported column type: " + type);
   }
 }
コード例 #2
0
  @Override
  public void readFields(DataInput in) throws IOException {
    /*
     * extract pkt len.
     *
     * GPSQL-1107:
     * The DataInput might already be empty (EOF), but we can't check it beforehand.
     * If that's the case, pktlen is updated to -1, to mark that the object is still empty.
     * (can be checked with isEmpty()).
     */
    pktlen = readPktLen(in);
    if (isEmpty()) {
      return;
    }

    /* extract the version and col cnt */
    int version = in.readShort();
    int curOffset = 4 + 2;
    int colCnt;

    /* !!! Check VERSION !!! */
    if (version != GPDBWritable.VERSION && version != GPDBWritable.PREV_VERSION) {
      throw new IOException(
          "Current GPDBWritable version("
              + GPDBWritable.VERSION
              + ") does not match input version("
              + version
              + ")");
    }

    if (version == GPDBWritable.VERSION) {
      errorFlag = in.readByte();
      curOffset += 1;
    }

    colCnt = in.readShort();
    curOffset += 2;

    /* Extract Column Type */
    colType = new int[colCnt];
    DBType[] coldbtype = new DBType[colCnt];
    for (int i = 0; i < colCnt; i++) {
      int enumType = (in.readByte());
      curOffset += 1;
      if (enumType == DBType.BIGINT.ordinal()) {
        colType[i] = BIGINT.getOID();
        coldbtype[i] = DBType.BIGINT;
      } else if (enumType == DBType.BOOLEAN.ordinal()) {
        colType[i] = BOOLEAN.getOID();
        coldbtype[i] = DBType.BOOLEAN;
      } else if (enumType == DBType.FLOAT8.ordinal()) {
        colType[i] = FLOAT8.getOID();
        coldbtype[i] = DBType.FLOAT8;
      } else if (enumType == DBType.INTEGER.ordinal()) {
        colType[i] = INTEGER.getOID();
        coldbtype[i] = DBType.INTEGER;
      } else if (enumType == DBType.REAL.ordinal()) {
        colType[i] = REAL.getOID();
        coldbtype[i] = DBType.REAL;
      } else if (enumType == DBType.SMALLINT.ordinal()) {
        colType[i] = SMALLINT.getOID();
        coldbtype[i] = DBType.SMALLINT;
      } else if (enumType == DBType.BYTEA.ordinal()) {
        colType[i] = BYTEA.getOID();
        coldbtype[i] = DBType.BYTEA;
      } else if (enumType == DBType.TEXT.ordinal()) {
        colType[i] = TEXT.getOID();
        coldbtype[i] = DBType.TEXT;
      } else {
        throw new IOException("Unknown GPDBWritable.DBType ordinal value");
      }
    }

    /* Extract null bit array */
    byte[] nullbytes = new byte[getNullByteArraySize(colCnt)];
    in.readFully(nullbytes);
    curOffset += nullbytes.length;
    boolean[] colIsNull = byteArrayToBooleanArray(nullbytes, colCnt);

    /* extract column value */
    colValue = new Object[colCnt];
    for (int i = 0; i < colCnt; i++) {
      if (!colIsNull[i]) {
        /* Skip the alignment padding */
        int skipbytes = roundUpAlignment(curOffset, coldbtype[i].getAlignment()) - curOffset;
        for (int j = 0; j < skipbytes; j++) {
          in.readByte();
        }
        curOffset += skipbytes;

        /* For fixed length type, increment the offset according to type type length here.
         * For var length type (BYTEA, TEXT), we'll read 4 byte length header and the
         * actual payload.
         */
        int varcollen = -1;
        if (coldbtype[i].isVarLength()) {
          varcollen = in.readInt();
          curOffset += 4 + varcollen;
        } else {
          curOffset += coldbtype[i].getTypeLength();
        }

        switch (DataType.get(colType[i])) {
          case BIGINT:
            {
              colValue[i] = in.readLong();
              break;
            }
          case BOOLEAN:
            {
              colValue[i] = in.readBoolean();
              break;
            }
          case FLOAT8:
            {
              colValue[i] = in.readDouble();
              break;
            }
          case INTEGER:
            {
              colValue[i] = in.readInt();
              break;
            }
          case REAL:
            {
              colValue[i] = in.readFloat();
              break;
            }
          case SMALLINT:
            {
              colValue[i] = in.readShort();
              break;
            }

            /* For BYTEA column, it has a 4 byte var length header. */
          case BYTEA:
            {
              colValue[i] = new byte[varcollen];
              in.readFully((byte[]) colValue[i]);
              break;
            }
            /* For text formatted column, it has a 4 byte var length header
             * and it's always null terminated string.
             * So, we can remove the last "\0" when constructing the string.
             */
          case TEXT:
            {
              byte[] data = new byte[varcollen];
              in.readFully(data, 0, varcollen);
              colValue[i] = new String(data, 0, varcollen - 1, CHARSET);
              break;
            }

          default:
            throw new IOException("Unknown GPDBWritable ColType");
        }
      }
    }

    /* Skip the ending alignment padding */
    int skipbytes = roundUpAlignment(curOffset, 8) - curOffset;
    for (int j = 0; j < skipbytes; j++) {
      in.readByte();
    }
    curOffset += skipbytes;

    if (errorFlag != 0) {
      throw new IOException("Received error value " + errorFlag + " from format");
    }
  }
コード例 #3
0
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      TypeManager typeManager) {
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(partitionKeys, "partitionKeys is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = typeManager.getType(column.getTypeSignature());
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey::getName);
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(UTF_8);

        String name = names[columnIndex];
        Type type = types[columnIndex];
        if (HiveUtil.isHiveNull(bytes)) {
          nulls[columnIndex] = true;
        } else if (BOOLEAN.equals(type)) {
          booleans[columnIndex] = booleanPartitionKey(partitionKey.getValue(), name);
        } else if (BIGINT.equals(type)) {
          longs[columnIndex] = bigintPartitionKey(partitionKey.getValue(), name);
        } else if (INTEGER.equals(type)) {
          longs[columnIndex] = integerPartitionKey(partitionKey.getValue(), name);
        } else if (SMALLINT.equals(type)) {
          longs[columnIndex] = smallintPartitionKey(partitionKey.getValue(), name);
        } else if (TINYINT.equals(type)) {
          longs[columnIndex] = tinyintPartitionKey(partitionKey.getValue(), name);
        } else if (DOUBLE.equals(type)) {
          doubles[columnIndex] = doublePartitionKey(partitionKey.getValue(), name);
        } else if (isVarcharType(type)) {
          slices[columnIndex] = varcharPartitionKey(partitionKey.getValue(), name, type);
        } else if (DATE.equals(type)) {
          longs[columnIndex] = datePartitionKey(partitionKey.getValue(), name);
        } else if (TIMESTAMP.equals(type)) {
          longs[columnIndex] =
              timestampPartitionKey(partitionKey.getValue(), hiveStorageTimeZone, name);
        } else if (isShortDecimal(type)) {
          longs[columnIndex] =
              shortDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else if (isLongDecimal(type)) {
          slices[columnIndex] =
              longDecimalPartitionKey(partitionKey.getValue(), (DecimalType) type, name);
        } else {
          throw new PrestoException(
              NOT_SUPPORTED,
              format(
                  "Unsupported column type %s for partition key: %s", type.getDisplayName(), name));
        }
      }
    }
  }
コード例 #4
0
ファイル: HiveUtil.java プロジェクト: albertocsm/presto
  public static NullableValue parsePartitionValue(
      String partitionName, String value, Type type, DateTimeZone timeZone) {
    boolean isNull = HIVE_DEFAULT_DYNAMIC_PARTITION.equals(value);

    if (type instanceof DecimalType) {
      DecimalType decimalType = (DecimalType) type;
      if (isNull) {
        return NullableValue.asNull(decimalType);
      }
      if (decimalType.isShort()) {
        if (value.isEmpty()) {
          return NullableValue.of(decimalType, 0L);
        }
        return NullableValue.of(
            decimalType, shortDecimalPartitionKey(value, decimalType, partitionName));
      } else {
        if (value.isEmpty()) {
          return NullableValue.of(decimalType, Decimals.encodeUnscaledValue(BigInteger.ZERO));
        }
        return NullableValue.of(
            decimalType, longDecimalPartitionKey(value, decimalType, partitionName));
      }
    }

    if (BOOLEAN.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(BOOLEAN);
      }
      if (value.isEmpty()) {
        return NullableValue.of(BOOLEAN, false);
      }
      return NullableValue.of(BOOLEAN, booleanPartitionKey(value, partitionName));
    }

    if (TINYINT.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(TINYINT);
      }
      if (value.isEmpty()) {
        return NullableValue.of(TINYINT, 0L);
      }
      return NullableValue.of(TINYINT, tinyintPartitionKey(value, partitionName));
    }

    if (SMALLINT.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(SMALLINT);
      }
      if (value.isEmpty()) {
        return NullableValue.of(SMALLINT, 0L);
      }
      return NullableValue.of(SMALLINT, smallintPartitionKey(value, partitionName));
    }

    if (INTEGER.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(INTEGER);
      }
      if (value.isEmpty()) {
        return NullableValue.of(INTEGER, 0L);
      }
      return NullableValue.of(INTEGER, integerPartitionKey(value, partitionName));
    }

    if (BIGINT.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(BIGINT);
      }
      if (value.isEmpty()) {
        return NullableValue.of(BIGINT, 0L);
      }
      return NullableValue.of(BIGINT, bigintPartitionKey(value, partitionName));
    }

    if (DATE.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(DATE);
      }
      return NullableValue.of(DATE, datePartitionKey(value, partitionName));
    }

    if (TIMESTAMP.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(TIMESTAMP);
      }
      return NullableValue.of(TIMESTAMP, timestampPartitionKey(value, timeZone, partitionName));
    }

    if (REAL.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(REAL);
      }
      if (value.isEmpty()) {
        return NullableValue.of(REAL, (long) floatToRawIntBits(0.0f));
      }
      return NullableValue.of(REAL, floatPartitionKey(value, partitionName));
    }

    if (DOUBLE.equals(type)) {
      if (isNull) {
        return NullableValue.asNull(DOUBLE);
      }
      if (value.isEmpty()) {
        return NullableValue.of(DOUBLE, 0.0);
      }
      return NullableValue.of(DOUBLE, doublePartitionKey(value, partitionName));
    }

    if (type instanceof VarcharType) {
      if (isNull) {
        return NullableValue.asNull(type);
      }
      return NullableValue.of(type, varcharPartitionKey(value, partitionName, type));
    }

    if (isCharType(type)) {
      if (isNull) {
        return NullableValue.asNull(type);
      }
      return NullableValue.of(type, charPartitionKey(value, partitionName, type));
    }

    throw new PrestoException(
        NOT_SUPPORTED, format("Unsupported Type [%s] for partition: %s", type, partitionName));
  }