/** * reads {@link ColumnarMetadata} stored in an RCFile. * * @throws IOException if metadata is not stored or in case of any other error. */ public static ColumnarMetadata readMetadata(Configuration conf, Path rcfile) throws IOException { Metadata metadata = null; Configuration confCopy = new Configuration(conf); // set up conf to read all the columns ColumnProjectionUtils.setFullyReadColumns(confCopy); RCFile.Reader reader = new RCFile.Reader(rcfile.getFileSystem(confCopy), rcfile, confCopy); // ugly hack to get metadata. RCFile has to provide access to metata try { Field f = RCFile.Reader.class.getDeclaredField("metadata"); f.setAccessible(true); metadata = (Metadata) f.get(reader); } catch (Throwable t) { throw new IOException("Could not access metadata field in RCFile reader", t); } reader.close(); Text metadataKey = new Text(COLUMN_METADATA_PROTOBUF_KEY); if (metadata == null || metadata.get(metadataKey) == null) { throw new IOException("could not find ColumnarMetadata in " + rcfile); } return Protobufs.mergeFromText(ColumnarMetadata.newBuilder(), metadata.get(metadataKey)) .build(); }
public void fullyReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException { LOG.debug("reading " + count + " records"); long start = System.currentTimeMillis(); ColumnProjectionUtils.setFullyReadColumns(conf); RCFile.Reader reader = new RCFile.Reader(fs, file, conf); LongWritable rowID = new LongWritable(); int actualRead = 0; BytesRefArrayWritable cols = new BytesRefArrayWritable(); while (reader.next(rowID)) { reader.getCurrentRow(cols); cols.resetValid(8); Object row = serDe.deserialize(cols); StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals("Field size should be 8", 8, fieldRefs.size()); for (int i = 0; i < fieldRefs.size(); i++) { Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); Object standardWritableData = ObjectInspectorUtils.copyToStandardObject( fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE); assertEquals("Field " + i, standardWritableData, expectedFieldsData[i]); } // Serialize assertEquals( "Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass()); BytesRefArrayWritable serializedText = (BytesRefArrayWritable) serDe.serialize(row, oi); assertEquals("Serialized data", s, serializedText); actualRead++; } reader.close(); assertEquals("Expect " + count + " rows, actual read " + actualRead, actualRead, count); long cost = System.currentTimeMillis() - start; LOG.debug("reading fully costs:" + cost + " milliseconds"); }