@Test public void testToMessageType() throws Exception { String expected = "message ParquetSchema {\n" + " optional group persons (LIST) {\n" + " repeated group persons_tuple {\n" + " required group name {\n" + " optional binary first_name (UTF8);\n" + " optional binary last_name (UTF8);\n" + " }\n" + " optional int32 id;\n" + " optional binary email (UTF8);\n" + " optional group phones (LIST) {\n" + " repeated group phones_tuple {\n" + " optional binary number (UTF8);\n" + " optional binary type (ENUM);\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}"; ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter(); StructType messageStruct = schemaConverter.toStructType(AddressBook.class); final MessageType converted = schemaConverter.convert(messageStruct); assertEquals(MessageTypeParser.parseMessageType(expected), converted); }
/* The test File contains 2-3 hdfs blocks based on the setting of each test, when hdfsBlock size is set to 50: [0-49][50-99] each row group is of size 10, so the rowGroups layout on hdfs is like: xxxxx xxxxx each x is a row group, each groups of x's is a hdfsBlock */ @Before public void setUp() { blocks = new ArrayList<BlockMetaData>(); for (int i = 0; i < 10; i++) { blocks.add(newBlock(i * 10, 10)); } schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }"); fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr"); }
/** {@inheritDoc} */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { Configuration configuration = taskAttemptContext.getConfiguration(); ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit; this.requestedSchema = MessageTypeParser.parseMessageType(parquetInputSplit.getRequestedSchema()); this.columnCount = this.requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, parquetInputSplit.getExtraMetadata(), MessageTypeParser.parseMessageType(parquetInputSplit.getSchema()), new ReadSupport.ReadContext(requestedSchema)); Path path = parquetInputSplit.getPath(); List<BlockMetaData> blocks = parquetInputSplit.getBlocks(); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, path, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } LOG.info("RecordReader initialized will read a total of " + total + " records."); }
private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException { MessageType fileSchema = MessageTypeParser.parseMessageType(schema); DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema); hiveParquetWriter.write(record); }
private void shouldGetProjectedSchema( String filterDesc, String expectedSchemaStr, Class thriftClass) { MessageType requestedSchema = getFilteredSchema(filterDesc, thriftClass); MessageType expectedSchema = MessageTypeParser.parseMessageType(expectedSchemaStr); assertEquals(expectedSchema, requestedSchema); }