@Test public void testReadingExistingVersion1HFile() throws IOException { URL url = TestHFileReaderV1.class.getResource("8e8ab58dcf39412da19833fcd8f687ac"); Path existingHFilePath = new Path(url.getPath()); HFile.Reader reader = HFile.createReader(fs, existingHFilePath, new CacheConfig(conf)); reader.loadFileInfo(); FixedFileTrailer trailer = reader.getTrailer(); assertEquals(N, reader.getEntries()); assertEquals(N, trailer.getEntryCount()); assertEquals(1, trailer.getMajorVersion()); assertEquals(Compression.Algorithm.GZ, trailer.getCompressionCodec()); for (boolean pread : new boolean[] {false, true}) { int totalDataSize = 0; int n = 0; HFileScanner scanner = reader.getScanner(false, pread); assertTrue(scanner.seekTo()); do { totalDataSize += scanner.getKey().limit() + scanner.getValue().limit() + Bytes.SIZEOF_INT * 2; ++n; } while (scanner.next()); // Add magic record sizes, one per data block. totalDataSize += 8 * trailer.getDataIndexCount(); assertEquals(N, n); assertEquals(trailer.getTotalUncompressedBytes(), totalDataSize); } reader.close(); }
/** * Method returns the reader given the specified arguments. TODO This is a bad abstraction. See * HBASE-6635. * * @param path hfile's path * @param fsdis stream of path's file * @param size max size of the trailer. * @param cacheConf Cache configuation values, cannot be null. * @param hfs * @return an appropriate instance of HFileReader * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException */ private static Reader pickReaderVersion( Path path, FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, HFileSystem hfs) throws IOException { FixedFileTrailer trailer = null; try { boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum(); assert !isHBaseChecksum; // Initially we must read with FS checksum. trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size); switch (trailer.getMajorVersion()) { case 2: return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs); default: throw new CorruptHFileException("Invalid HFile version " + trailer.getMajorVersion()); } } catch (Throwable t) { try { fsdis.close(); } catch (Throwable t2) { LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2); } throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t); } }
@Test(timeout = 20000) public void testHFileEncryptionMetadata() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); CacheConfig cacheConf = new CacheConfig(conf); HFileContext fileContext = new HFileContextBuilder().withEncryptionContext(cryptoContext).build(); // write a simple encrypted hfile Path path = new Path(TEST_UTIL.getDataTestDir(), "cryptometa.hfile"); FSDataOutputStream out = fs.create(path); HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf) .withOutputStream(out) .withFileContext(fileContext) .create(); try { KeyValue kv = new KeyValue("foo".getBytes(), "f1".getBytes(), null, "value".getBytes()); writer.append(kv); } finally { writer.close(); out.close(); } // read it back in and validate correct crypto metadata HFile.Reader reader = HFile.createReader(fs, path, cacheConf, conf); try { reader.loadFileInfo(); FixedFileTrailer trailer = reader.getTrailer(); assertNotNull(trailer.getEncryptionKey()); Encryption.Context readerContext = reader.getFileContext().getEncryptionContext(); assertEquals(readerContext.getCipher().getName(), cryptoContext.getCipher().getName()); assertTrue(Bytes.equals(readerContext.getKeyBytes(), cryptoContext.getKeyBytes())); } finally { reader.close(); } }
protected AbstractHFileReader( Path path, FixedFileTrailer trailer, final long fileSize, final CacheConfig cacheConf, final HFileSystem hfs, final Configuration conf) { this.trailer = trailer; this.compressAlgo = trailer.getCompressionCodec(); this.cacheConf = cacheConf; this.fileSize = fileSize; this.path = path; this.name = path.getName(); this.hfs = hfs; this.conf = conf; }
@Override public String toString() { return "reader=" + path.toString() + (!isFileInfoLoaded() ? "" : ", compression=" + compressAlgo.getName() + ", cacheConf=" + cacheConf + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey()) + ", avgKeyLen=" + avgKeyLen + ", avgValueLen=" + avgValueLen + ", entries=" + trailer.getEntryCount() + ", length=" + fileSize; }
@Override public void close() throws IOException { if (outputStream == null) { return; } // Save data block encoder metadata in the file info. blockEncoder.saveMetadata(this); // Write out the end of the data blocks, then write meta data blocks. // followed by fileinfo, data block index and meta block index. finishBlock(); writeInlineBlocks(true); FixedFileTrailer trailer = new FixedFileTrailer(2, HFileReaderV2.MAX_MINOR_VERSION); // Write out the metadata blocks if any. if (!metaNames.isEmpty()) { for (int i = 0; i < metaNames.size(); ++i) { // store the beginning offset long offset = outputStream.getPos(); // write the metadata content DataOutputStream dos = fsBlockWriter.startWriting(BlockType.META); metaData.get(i).write(dos); fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); // Add the new meta block to the meta index. metaBlockIndexWriter.addEntry( metaNames.get(i), offset, fsBlockWriter.getOnDiskSizeWithHeader()); } } // Load-on-open section. // Data block index. // // In version 2, this section of the file starts with the root level data // block index. We call a function that writes intermediate-level blocks // first, then root level, and returns the offset of the root level block // index. long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream); trailer.setLoadOnOpenOffset(rootIndexOffset); // Meta block index. metaBlockIndexWriter.writeSingleLevelIndex( fsBlockWriter.startWriting(BlockType.ROOT_INDEX), "meta"); fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); if (this.includeMemstoreTS) { appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS)); appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE)); } // File info writeFileInfo(trailer, fsBlockWriter.startWriting(BlockType.FILE_INFO)); fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); // Load-on-open data supplied by higher levels, e.g. Bloom filters. for (BlockWritable w : additionalLoadOnOpenData) { fsBlockWriter.writeBlock(w, outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); } // Now finish off the trailer. trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels()); trailer.setUncompressedDataIndexSize(dataBlockIndexWriter.getTotalUncompressedSize()); trailer.setFirstDataBlockOffset(firstDataBlockOffset); trailer.setLastDataBlockOffset(lastDataBlockOffset); trailer.setComparatorClass(comparator.getClass()); trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries()); finishClose(trailer); fsBlockWriter.release(); }
/** @return number of KV entries in this HFile */ @Override public long getEntries() { return trailer.getEntryCount(); }
@Test(timeout = 6000000) public void testHFileEncryption() throws Exception { // Create 1000 random test KVs RedundantKVGenerator generator = new RedundantKVGenerator(); List<KeyValue> testKvs = generator.generateTestKeyValues(1000); // Iterate through data block encoding and compression combinations Configuration conf = TEST_UTIL.getConfiguration(); CacheConfig cacheConf = new CacheConfig(conf); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { for (Compression.Algorithm compression : TestHFileBlock.COMPRESSION_ALGORITHMS) { HFileContext fileContext = new HFileContextBuilder() .withBlockSize(4096) // small blocks .withEncryptionContext(cryptoContext) .withCompression(compression) .withDataBlockEncoding(encoding) .build(); // write a new test HFile LOG.info("Writing with " + fileContext); Path path = new Path(TEST_UTIL.getDataTestDir(), UUID.randomUUID().toString() + ".hfile"); FSDataOutputStream out = fs.create(path); HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf) .withOutputStream(out) .withFileContext(fileContext) .create(); try { for (KeyValue kv : testKvs) { writer.append(kv); } } finally { writer.close(); out.close(); } // read it back in LOG.info("Reading with " + fileContext); int i = 0; HFileScanner scanner = null; HFile.Reader reader = HFile.createReader(fs, path, cacheConf, conf); try { reader.loadFileInfo(); FixedFileTrailer trailer = reader.getTrailer(); assertNotNull(trailer.getEncryptionKey()); scanner = reader.getScanner(false, false); assertTrue("Initial seekTo failed", scanner.seekTo()); do { Cell kv = scanner.getCell(); assertTrue( "Read back an unexpected or invalid KV", testKvs.contains(KeyValueUtil.ensureKeyValue(kv))); i++; } while (scanner.next()); } finally { reader.close(); scanner.close(); } assertEquals("Did not read back as many KVs as written", i, testKvs.size()); // Test random seeks with pread LOG.info("Random seeking with " + fileContext); reader = HFile.createReader(fs, path, cacheConf, conf); try { scanner = reader.getScanner(false, true); assertTrue("Initial seekTo failed", scanner.seekTo()); for (i = 0; i < 100; i++) { KeyValue kv = testKvs.get(RNG.nextInt(testKvs.size())); assertEquals("Unable to find KV as expected: " + kv, scanner.seekTo(kv), 0); } } finally { scanner.close(); reader.close(); } } } }