예제 #1
0
 @Test
 public void testNestedTypes() {
   MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
   Type type = schema.getType("Links", "Backward");
   assertEquals(PrimitiveTypeName.INT64, type.asPrimitiveType().getPrimitiveTypeName());
   assertEquals(0, schema.getMaxRepetitionLevel("DocId"));
   assertEquals(1, schema.getMaxRepetitionLevel("Name"));
   assertEquals(2, schema.getMaxRepetitionLevel("Name", "Language"));
   assertEquals(0, schema.getMaxDefinitionLevel("DocId"));
   assertEquals(1, schema.getMaxDefinitionLevel("Links"));
   assertEquals(2, schema.getMaxDefinitionLevel("Links", "Backward"));
 }
예제 #2
0
 @Test
 public void testIDs() throws Exception {
   MessageType schema =
       new MessageType(
           "test",
           new PrimitiveType(REQUIRED, BINARY, "foo").withId(4),
           new GroupType(REQUIRED, "bar", new PrimitiveType(REQUIRED, BINARY, "baz").withId(3))
               .withId(8));
   MessageType schema2 = MessageTypeParser.parseMessageType(schema.toString());
   assertEquals(schema, schema2);
   assertEquals(schema.toString(), schema2.toString());
 }
예제 #3
0
  /**
   * read GPDBWritable from gpdb and then write it to hdfs
   *
   * @throws Exception when something goes wrong
   */
  public void doWrite() throws IOException {
    //		if there is no schema provided by user, we will read schema later
    ParquetWriter<Group> dataFileWriter = null;
    DataInputStream dis = new DataInputStream(System.in);
    try {
      MessageType schema = null;
      SimpleGroupFactory groupFactory = null;

      //			read table structure info and auto-gen avro schema
      schema = autoGenSchema(dis);
      //			int total = dis.readInt();//skip the original 4 byte VARHDSZ

      if (parquetSchemaFile != null) {
        //				if user give us a schema file, read schema from it
        String schemaString = readSchemaFile(parquetSchemaFile);
        schema = MessageTypeParser.parseMessageType(schemaString);
      }

      GroupWriteSupport.setSchema(schema, conf);

      CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED;
      if (isCompressed) {
        if (compressCodec.equals(LZO_COMPRESS)) {
          codecName = CompressionCodecName.LZO;
        } else if (compressCodec.equals(SNAPPY_COMPRESS)) {
          codecName = CompressionCodecName.SNAPPY;
        } else if (compressCodec.equals(GZIP_COMPRESS)) {
          codecName = CompressionCodecName.GZIP;
        } else {
          throw new IOException("compression method not support, codec:" + compressCodec);
        }
      }

      dataFileWriter =
          new ParquetWriter<Group>(
              new Path(outputPath),
              new GroupWriteSupport(),
              codecName,
              rowGroupSize,
              pageSize,
              dicPageSize,
              dicEnable,
              false,
              parquetVersion,
              conf);

      groupFactory = new SimpleGroupFactory(schema);

      while (true) {
        GPDBWritable gw = new GPDBWritable();
        gw.readFields(dis);

        Group pqGroup = groupFactory.newGroup();

        fillRecord(pqGroup, gw, schema);

        dataFileWriter.write(pqGroup);
      }
    } catch (EOFException e) {
      // this is ok, read the end of input stream, keep error msg for testing
      // e.printStackTrace();
    } finally {
      if (dataFileWriter != null) {
        dataFileWriter.close();
      }

      dis.close();
    }
  }
예제 #4
0
 @Test
 public void test() throws Exception {
   MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
   assertEquals(Paper.schema, schema);
   assertEquals(schema.toString(), Paper.schema.toString());
 }