Beispiel #1
0
  /**
   * reads {@link ColumnarMetadata} stored in an RCFile.
   *
   * @throws IOException if metadata is not stored or in case of any other error.
   */
  public static ColumnarMetadata readMetadata(Configuration conf, Path rcfile) throws IOException {

    Metadata metadata = null;

    Configuration confCopy = new Configuration(conf);
    // set up conf to read all the columns
    ColumnProjectionUtils.setFullyReadColumns(confCopy);

    RCFile.Reader reader = new RCFile.Reader(rcfile.getFileSystem(confCopy), rcfile, confCopy);

    // ugly hack to get metadata. RCFile has to provide access to metata
    try {
      Field f = RCFile.Reader.class.getDeclaredField("metadata");
      f.setAccessible(true);
      metadata = (Metadata) f.get(reader);
    } catch (Throwable t) {
      throw new IOException("Could not access metadata field in RCFile reader", t);
    }

    reader.close();

    Text metadataKey = new Text(COLUMN_METADATA_PROTOBUF_KEY);

    if (metadata == null || metadata.get(metadataKey) == null) {
      throw new IOException("could not find ColumnarMetadata in " + rcfile);
    }

    return Protobufs.mergeFromText(ColumnarMetadata.newBuilder(), metadata.get(metadataKey))
        .build();
  }
Beispiel #2
0
  public void fullyReadTest(FileSystem fs, int count, Path file)
      throws IOException, SerDeException {
    LOG.debug("reading " + count + " records");
    long start = System.currentTimeMillis();
    ColumnProjectionUtils.setFullyReadColumns(conf);
    RCFile.Reader reader = new RCFile.Reader(fs, file, conf);

    LongWritable rowID = new LongWritable();
    int actualRead = 0;
    BytesRefArrayWritable cols = new BytesRefArrayWritable();
    while (reader.next(rowID)) {
      reader.getCurrentRow(cols);
      cols.resetValid(8);
      Object row = serDe.deserialize(cols);

      StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
      List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
      assertEquals("Field size should be 8", 8, fieldRefs.size());
      for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
        Object standardWritableData =
            ObjectInspectorUtils.copyToStandardObject(
                fieldData,
                fieldRefs.get(i).getFieldObjectInspector(),
                ObjectInspectorCopyOption.WRITABLE);
        assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]);
      }
      // Serialize
      assertEquals(
          "Class of the serialized object should be BytesRefArrayWritable",
          BytesRefArrayWritable.class,
          serDe.getSerializedClass());
      BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe.serialize(row, oi);
      assertEquals("Serialized data", s, serializedText);
      actualRead++;
    }
    reader.close();
    assertEquals("Expect " + count + " rows, actual read " + actualRead, actualRead, count);
    long cost = System.currentTimeMillis() - start;
    LOG.debug("reading fully costs:" + cost + " milliseconds");
  }