@Test public void testNestedTypes() { MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString()); Type type = schema.getType("Links", "Backward"); assertEquals(PrimitiveTypeName.INT64, type.asPrimitiveType().getPrimitiveTypeName()); assertEquals(0, schema.getMaxRepetitionLevel("DocId")); assertEquals(1, schema.getMaxRepetitionLevel("Name")); assertEquals(2, schema.getMaxRepetitionLevel("Name", "Language")); assertEquals(0, schema.getMaxDefinitionLevel("DocId")); assertEquals(1, schema.getMaxDefinitionLevel("Links")); assertEquals(2, schema.getMaxDefinitionLevel("Links", "Backward")); }
@Test public void testIDs() throws Exception { MessageType schema = new MessageType( "test", new PrimitiveType(REQUIRED, BINARY, "foo").withId(4), new GroupType(REQUIRED, "bar", new PrimitiveType(REQUIRED, BINARY, "baz").withId(3)) .withId(8)); MessageType schema2 = MessageTypeParser.parseMessageType(schema.toString()); assertEquals(schema, schema2); assertEquals(schema.toString(), schema2.toString()); }
/** * read GPDBWritable from gpdb and then write it to hdfs * * @throws Exception when something goes wrong */ public void doWrite() throws IOException { // if there is no schema provided by user, we will read schema later ParquetWriter<Group> dataFileWriter = null; DataInputStream dis = new DataInputStream(System.in); try { MessageType schema = null; SimpleGroupFactory groupFactory = null; // read table structure info and auto-gen avro schema schema = autoGenSchema(dis); // int total = dis.readInt();//skip the original 4 byte VARHDSZ if (parquetSchemaFile != null) { // if user give us a schema file, read schema from it String schemaString = readSchemaFile(parquetSchemaFile); schema = MessageTypeParser.parseMessageType(schemaString); } GroupWriteSupport.setSchema(schema, conf); CompressionCodecName codecName = CompressionCodecName.UNCOMPRESSED; if (isCompressed) { if (compressCodec.equals(LZO_COMPRESS)) { codecName = CompressionCodecName.LZO; } else if (compressCodec.equals(SNAPPY_COMPRESS)) { codecName = CompressionCodecName.SNAPPY; } else if (compressCodec.equals(GZIP_COMPRESS)) { codecName = CompressionCodecName.GZIP; } else { throw new IOException("compression method not support, codec:" + compressCodec); } } dataFileWriter = new ParquetWriter<Group>( new Path(outputPath), new GroupWriteSupport(), codecName, rowGroupSize, pageSize, dicPageSize, dicEnable, false, parquetVersion, conf); groupFactory = new SimpleGroupFactory(schema); while (true) { GPDBWritable gw = new GPDBWritable(); gw.readFields(dis); Group pqGroup = groupFactory.newGroup(); fillRecord(pqGroup, gw, schema); dataFileWriter.write(pqGroup); } } catch (EOFException e) { // this is ok, read the end of input stream, keep error msg for testing // e.printStackTrace(); } finally { if (dataFileWriter != null) { dataFileWriter.close(); } dis.close(); } }
@Test public void test() throws Exception { MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString()); assertEquals(Paper.schema, schema); assertEquals(schema.toString(), Paper.schema.toString()); }