@Test
 public void testToMessageType() throws Exception {
   String expected =
       "message ParquetSchema {\n"
           + "  optional group persons (LIST) {\n"
           + "    repeated group persons_tuple {\n"
           + "      required group name {\n"
           + "        optional binary first_name (UTF8);\n"
           + "        optional binary last_name (UTF8);\n"
           + "      }\n"
           + "      optional int32 id;\n"
           + "      optional binary email (UTF8);\n"
           + "      optional group phones (LIST) {\n"
           + "        repeated group phones_tuple {\n"
           + "          optional binary number (UTF8);\n"
           + "          optional binary type (ENUM);\n"
           + "        }\n"
           + "      }\n"
           + "    }\n"
           + "  }\n"
           + "}";
   ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter();
   StructType messageStruct = schemaConverter.toStructType(AddressBook.class);
   final MessageType converted = schemaConverter.convert(messageStruct);
   assertEquals(MessageTypeParser.parseMessageType(expected), converted);
 }
Ejemplo n.º 2
0
 /*
  The test File contains 2-3 hdfs blocks based on the setting of each test, when hdfsBlock size is set to 50: [0-49][50-99]
  each row group is of size 10, so the rowGroups layout on hdfs is like:
  xxxxx xxxxx
  each x is a row group, each groups of x's is a hdfsBlock
 */
 @Before
 public void setUp() {
   blocks = new ArrayList<BlockMetaData>();
   for (int i = 0; i < 10; i++) {
     blocks.add(newBlock(i * 10, 10));
   }
   schema = MessageTypeParser.parseMessageType("message doc { required binary foo; }");
   fileMetaData = new FileMetaData(schema, new HashMap<String, String>(), "parquet-mr");
 }
Ejemplo n.º 3
0
  /** {@inheritDoc} */
  @Override
  public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
      throws IOException, InterruptedException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    ParquetInputSplit parquetInputSplit = (ParquetInputSplit) inputSplit;
    this.requestedSchema =
        MessageTypeParser.parseMessageType(parquetInputSplit.getRequestedSchema());
    this.columnCount = this.requestedSchema.getPaths().size();
    this.recordConverter =
        readSupport.prepareForRead(
            configuration,
            parquetInputSplit.getExtraMetadata(),
            MessageTypeParser.parseMessageType(parquetInputSplit.getSchema()),
            new ReadSupport.ReadContext(requestedSchema));

    Path path = parquetInputSplit.getPath();
    List<BlockMetaData> blocks = parquetInputSplit.getBlocks();
    List<ColumnDescriptor> columns = requestedSchema.getColumns();
    reader = new ParquetFileReader(configuration, path, blocks, columns);
    for (BlockMetaData block : blocks) {
      total += block.getRowCount();
    }
    LOG.info("RecordReader initialized will read a total of " + total + " records.");
  }
 private void writeParquetRecord(String schema, ParquetHiveRecord record) throws SerDeException {
   MessageType fileSchema = MessageTypeParser.parseMessageType(schema);
   DataWritableWriter hiveParquetWriter = new DataWritableWriter(mockRecordConsumer, fileSchema);
   hiveParquetWriter.write(record);
 }
 private void shouldGetProjectedSchema(
     String filterDesc, String expectedSchemaStr, Class thriftClass) {
   MessageType requestedSchema = getFilteredSchema(filterDesc, thriftClass);
   MessageType expectedSchema = MessageTypeParser.parseMessageType(expectedSchemaStr);
   assertEquals(expectedSchema, requestedSchema);
 }