Java GPDBWritable Examples

Programming Language: Java

Namespace/Package Name: com.emc.greenplum.gpdb.hadoop.io

Class/Type: GPDBWritable

Examples at hotexamples.com: 4

Java GPDBWritable - 4 examples found. These are the top rated real world Java examples of com.emc.greenplum.gpdb.hadoop.io.GPDBWritable extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getBoolean(1)

getBytes(1)

getColumnType(1)

getDouble(1)

getElementTypeFromArrayType(1)

getFloat(1)

getInt(1)

getLong(1)

getShort(1)

getString(1)

isArrayType(1)

readFields(1)

Example #1

Show file

File: GpdbParquetFileWriter.java Project: ginobiliwang/gpdb

  private PqElementType generateElementType(int colType, int notNull, int ndims) {
    PqElementType eType = new PqElementType();

    if (GPDBWritable.isArrayType(colType, ndims)) {
      eType.isArray = true;
      colType = GPDBWritable.getElementTypeFromArrayType(colType);
    }

    switch (colType) {
      case GPDBWritable.BOOLEAN:
        eType.primitiveType = PrimitiveTypeName.BOOLEAN;
        break;

      case GPDBWritable.BYTEA:
        eType.primitiveType = PrimitiveTypeName.BINARY;
        break;

      case GPDBWritable.BIGINT:
        eType.primitiveType = PrimitiveTypeName.INT64;
        break;

      case GPDBWritable.SMALLINT:
      case GPDBWritable.INTEGER:
        eType.primitiveType = PrimitiveTypeName.INT32;
        break;

      case GPDBWritable.REAL:
        eType.primitiveType = PrimitiveTypeName.FLOAT;
        break;

      case GPDBWritable.FLOAT8:
        eType.primitiveType = PrimitiveTypeName.DOUBLE;
        break;

      case GPDBWritable.CHAR:
      case GPDBWritable.VARCHAR:
      case GPDBWritable.BPCHAR:
      case GPDBWritable.NUMERIC:
      case GPDBWritable.DATE:
      case GPDBWritable.TIME:
      case GPDBWritable.TIMESTAMP:
      case GPDBWritable.TEXT:
      default:
        //			others we just treat them as text, may be udt
        eType.primitiveType = PrimitiveTypeName.BINARY;

        if (colType == GPDBWritable.NUMERIC) {
          eType.originalType = OriginalType.DECIMAL;
        } else {
          eType.originalType = OriginalType.UTF8;
        }
    }

    return eType;
  }

Example #2

Show file

File: GpdbParquetFileWriter.java Project: ginobiliwang/gpdb

  /**
   * fill group using GPDBWritable
   *
   * @throws IOException
   */
  private void fillRecord(Group pqGroup, GPDBWritable gw, MessageType schema) throws IOException {
    int[] colType = gw.getColumnType();
    List<Type> fields = schema.getFields();

    for (int i = 0; i < colType.length; i++) {
      fillElement(i, colType[i], pqGroup, gw, fields.get(i));
    }
  }

Example #3

Show file

File: GpdbParquetFileWriter.java Project: ginobiliwang/gpdb

  private void fillElement(int index, int colType, Group pqGroup, GPDBWritable gw, Type field)
      throws IOException {
    switch (colType) {
      case GPDBWritable.BPCHAR:
      case GPDBWritable.CHAR:
      case GPDBWritable.DATE:
      case GPDBWritable.NUMERIC:
      case GPDBWritable.TIME:
      case GPDBWritable.TIMESTAMP:
      case GPDBWritable.VARCHAR:
      case GPDBWritable.TEXT:
        //				utf8 or array
        if (field.getRepetition() == Repetition.REPEATED) {
          decodeArrayString(
              index, field, pqGroup, gw.getString(index), columnSchemas.get(index).getDelim());
        } else {
          int gpdbType = columnSchemas.get(index).getType();
          PrimitiveTypeName priType = field.asPrimitiveType().getPrimitiveTypeName();
          OriginalType originalType = field.getOriginalType();

          if (gpdbType == GPDBWritable.NUMERIC && priType == PrimitiveTypeName.INT32) {
            pqGroup.add(index, Integer.parseInt(gw.getString(index)));
          } else if (gpdbType == GPDBWritable.NUMERIC && priType == PrimitiveTypeName.INT64) {
            pqGroup.add(index, Long.parseLong(gw.getString(index)));
          } else if (gpdbType == GPDBWritable.DATE && priType == PrimitiveTypeName.INT32) {
            pqGroup.add(
                index,
                (int)
                    FormatHandlerUtil.getTimeDiff(
                        gw.getString(index), "1970-01-01", "yyyy-mm-dd", 24 * 60 * 60 * 1000));
          } else if (gpdbType == GPDBWritable.TIME && priType == PrimitiveTypeName.INT32) {
            pqGroup.add(
                index,
                (int)
                    FormatHandlerUtil.getTimeDiff(gw.getString(index), "00:00:00", "mm:hh:ss", 1));
          } else if (gpdbType == GPDBWritable.TIMESTAMP && priType == PrimitiveTypeName.INT64) {
            pqGroup.add(
                index,
                FormatHandlerUtil.getTimeDiff(
                    gw.getString(index), "1970-01-01 00:00:00", "yyyy-mm-dd mm:hh:ss", 1));
          } else if (gpdbType == GPDBWritable.INTERVAL && originalType == OriginalType.INTERVAL) {
            //						interval is complex, we just use string, for now, we just support 'postgres'
            // style interval
            //						1 year 2 mons -3 days +04:05:06.00901
            byte[] interval = FormatHandlerUtil.getParquetInterval(gw.getString(index));
            pqGroup.add(index, Binary.fromByteArray(interval));
          } else {
            pqGroup.add(index, gw.getString(index));
          }
        }
        break;

      case GPDBWritable.BYTEA:
        pqGroup.add(index, Binary.fromByteArray(gw.getBytes(index)));
        break;

      case GPDBWritable.REAL:
        pqGroup.add(index, gw.getFloat(index));
        break;

      case GPDBWritable.BIGINT:
        pqGroup.add(index, gw.getLong(index));
        break;

      case GPDBWritable.BOOLEAN:
        pqGroup.add(index, gw.getBoolean(index));
        break;

      case GPDBWritable.FLOAT8:
        pqGroup.add(index, gw.getDouble(index));
        break;

      case GPDBWritable.INTEGER:
        pqGroup.add(index, gw.getInt(index));
        break;

      case GPDBWritable.SMALLINT:
        pqGroup.add(index, gw.getShort(index));
        break;

      default:
        throw new IOException("internal error, not support type, typeId:" + colType);
    }
  }

Example #4

Show file

File: GpdbParquetFileWriter.java Project: ginobiliwang/gpdb

  /**
   * read GPDBWritable from gpdb and then write it to hdfs
   *
   * @throws Exception when something goes wrong
   */
  public void doWrite() throws IOException {
    //		if there is no schema provided by user, we will read schema later
    ParquetWriter<Group> dataFileWriter = null;
    DataInputStream dis = new DataInputStream(System.in);
    try {
      MessageType schema = null;
      SimpleGroupFactory groupFactory = null;

      //			read table structure info and auto-gen avro schema
      schema = autoGenSchema(dis);
      //			int total = dis.readInt();//skip the original 4 byte VARHDSZ

      if (parquetSchemaFile != null) {
        //				if user give us a schema file, read schema from it
        String schemaString = readSchemaFile(parquetSchemaFile);
        schema = MessageTypeParser.parseMessageType(schemaString);
      }

      GroupWriteSupport.setSchema(schema, conf);

      CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
      if (isCompressed) {
        if (compressCodec.equals(LZO_COMPRESS)) {
          codecName = CompressionCodecName.LZO;
        } else if (compressCodec.equals(SNAPPY_COMPRESS)) {
          codecName = CompressionCodecName.SNAPPY;
        } else if (compressCodec.equals(GZIP_COMPRESS)) {
          codecName = CompressionCodecName.GZIP;
        } else {
          throw new IOException("compression method not support, codec:" + compressCodec);
        }
      }

      dataFileWriter =
          new ParquetWriter<Group>(
              new Path(outputPath),
              new GroupWriteSupport(),
              codecName,
              rowGroupSize,
              pageSize,
              dicPageSize,
              dicEnable,
              false,
              parquetVersion,
              conf);

      groupFactory = new SimpleGroupFactory(schema);

      while (true) {
        GPDBWritable gw = new GPDBWritable();
        gw.readFields(dis);

        Group pqGroup = groupFactory.newGroup();

        fillRecord(pqGroup, gw, schema);

        dataFileWriter.write(pqGroup);
      }
    } catch (EOFException e) {
      // this is ok, read the end of input stream, keep error msg for testing
      // e.printStackTrace();
    } finally {
      if (dataFileWriter != null) {
        dataFileWriter.close();
      }

      dis.close();
    }
  }