/** * Test creating/using/deleting snapshots from the client * * <p>This is an end-to-end test for the snapshot utility * * <p>TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this * because there will be a few more flavors of snapshots that need to run these tests. */ @Category({RegionServerTests.class, LargeTests.class}) public class TestFlushSnapshotFromClient { private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class); @ClassRule public static final TestRule timeout = CategoryBasedTimeout.forClass(TestFlushSnapshotFromClient.class); protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); protected static final int NUM_RS = 2; protected static final byte[] TEST_FAM = Bytes.toBytes("fam"); protected static final TableName TABLE_NAME = TableName.valueOf("test"); protected final int DEFAULT_NUM_ROWS = 100; protected Admin admin = null; @BeforeClass public static void setupCluster() throws Exception { setupConf(UTIL.getConfiguration()); UTIL.startMiniCluster(NUM_RS); } protected static void setupConf(Configuration conf) { // disable the ui conf.setInt("hbase.regionsever.info.port", -1); // change the flush size to a small amount, regulating number of store files conf.setInt("hbase.hregion.memstore.flush.size", 25000); // so make sure we get a compaction when doing a load, but keep around some // files in the store conf.setInt("hbase.hstore.compaction.min", 10); conf.setInt("hbase.hstore.compactionThreshold", 10); // block writes if we get to 12 store files conf.setInt("hbase.hstore.blockingStoreFiles", 12); // Enable snapshot conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); conf.set( HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); conf.set( CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(HColumnDescriptor.MemoryCompaction.NONE)); } @Before public void setup() throws Exception { createTable(); this.admin = UTIL.getConnection().getAdmin(); } protected void createTable() throws Exception { SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM); } @After public void tearDown() throws Exception { UTIL.deleteTable(TABLE_NAME); SnapshotTestingUtils.deleteAllSnapshots(this.admin); this.admin.close(); SnapshotTestingUtils.deleteArchiveDirectory(UTIL); } @AfterClass public static void cleanupTest() throws Exception { try { UTIL.shutdownMiniCluster(); } catch (Exception e) { LOG.warn("failure shutting down cluster", e); } } /** * Test simple flush snapshotting a table that is online * * @throws Exception */ @Test public void testFlushTableSnapshot() throws Exception { // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // put some stuff in the table SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); LOG.debug("FS state before snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); // take a snapshot of the enabled table String snapshotString = "offlineTableSnapshot"; byte[] snapshot = Bytes.toBytes(snapshotString); admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH); LOG.debug("Snapshot completed."); // make sure we have the snapshot List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); // make sure its a valid snapshot LOG.debug("FS state after snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); SnapshotTestingUtils.confirmSnapshotValid( UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); } /** * Test snapshotting a table that is online without flushing * * @throws Exception */ @Test public void testSkipFlushTableSnapshot() throws Exception { // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // put some stuff in the table Table table = UTIL.getConnection().getTable(TABLE_NAME); UTIL.loadTable(table, TEST_FAM); UTIL.flush(TABLE_NAME); LOG.debug("FS state before snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); // take a snapshot of the enabled table String snapshotString = "skipFlushTableSnapshot"; byte[] snapshot = Bytes.toBytes(snapshotString); admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH); LOG.debug("Snapshot completed."); // make sure we have the snapshot List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); // make sure its a valid snapshot LOG.debug("FS state after snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); SnapshotTestingUtils.confirmSnapshotValid( UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); admin.deleteSnapshot(snapshot); snapshots = admin.listSnapshots(); SnapshotTestingUtils.assertNoSnapshots(admin); } /** * Test simple flush snapshotting a table that is online * * @throws Exception */ @Test public void testFlushTableSnapshotWithProcedure() throws Exception { // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // put some stuff in the table SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); LOG.debug("FS state before snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); // take a snapshot of the enabled table String snapshotString = "offlineTableSnapshot"; byte[] snapshot = Bytes.toBytes(snapshotString); Map<String, String> props = new HashMap<String, String>(); props.put("table", TABLE_NAME.getNameAsString()); admin.execProcedure( SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, snapshotString, props); LOG.debug("Snapshot completed."); // make sure we have the snapshot List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); // make sure its a valid snapshot LOG.debug("FS state after snapshot:"); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); SnapshotTestingUtils.confirmSnapshotValid( UTIL, ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); } @Test public void testSnapshotFailsOnNonExistantTable() throws Exception { // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); TableName tableName = TableName.valueOf("_not_a_table"); // make sure the table doesn't exist boolean fail = false; do { try { admin.getTableDescriptor(tableName); fail = true; LOG.error("Table:" + tableName + " already exists, checking a new name"); tableName = TableName.valueOf(tableName + "!"); } catch (TableNotFoundException e) { fail = false; } } while (fail); // snapshot the non-existant table try { admin.snapshot("fail", tableName, SnapshotType.FLUSH); fail("Snapshot succeeded even though there is not table."); } catch (SnapshotCreationException e) { LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); } } @Test public void testAsyncFlushSnapshot() throws Exception { HBaseProtos.SnapshotDescription snapshot = HBaseProtos.SnapshotDescription.newBuilder() .setName("asyncSnapshot") .setTable(TABLE_NAME.getNameAsString()) .setType(HBaseProtos.SnapshotDescription.Type.FLUSH) .build(); // take the snapshot async admin.takeSnapshotAsync( new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); // constantly loop, looking for the snapshot to complete HMaster master = UTIL.getMiniHBaseCluster().getMaster(); SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200); LOG.info(" === Async Snapshot Completed ==="); UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); // make sure we get the snapshot SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot); } @Test public void testSnapshotStateAfterMerge() throws Exception { int numRows = DEFAULT_NUM_ROWS; // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // load the table so we have some data SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); // Take a snapshot String snapshotBeforeMergeName = "snapshotBeforeMerge"; admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH); // Clone the table TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge"); admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName); SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName); // Merge two regions List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); Collections.sort( regions, new Comparator<HRegionInfo>() { public int compare(HRegionInfo r1, HRegionInfo r2) { return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); } }); int numRegions = admin.getTableRegions(TABLE_NAME).size(); int numRegionsAfterMerge = numRegions - 2; admin.mergeRegionsAsync( regions.get(1).getEncodedNameAsBytes(), regions.get(2).getEncodedNameAsBytes(), true); admin.mergeRegionsAsync( regions.get(4).getEncodedNameAsBytes(), regions.get(5).getEncodedNameAsBytes(), true); // Verify that there's one region less waitRegionsAfterMerge(numRegionsAfterMerge); assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); // Clone the table TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge"); admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName); SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName); verifyRowCount(UTIL, TABLE_NAME, numRows); verifyRowCount(UTIL, cloneBeforeMergeName, numRows); verifyRowCount(UTIL, cloneAfterMergeName, numRows); // test that we can delete the snapshot UTIL.deleteTable(cloneAfterMergeName); UTIL.deleteTable(cloneBeforeMergeName); } @Test public void testTakeSnapshotAfterMerge() throws Exception { int numRows = DEFAULT_NUM_ROWS; // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // load the table so we have some data SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); // Merge two regions List<HRegionInfo> regions = admin.getTableRegions(TABLE_NAME); Collections.sort( regions, new Comparator<HRegionInfo>() { public int compare(HRegionInfo r1, HRegionInfo r2) { return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); } }); int numRegions = admin.getTableRegions(TABLE_NAME).size(); int numRegionsAfterMerge = numRegions - 2; admin.mergeRegionsAsync( regions.get(1).getEncodedNameAsBytes(), regions.get(2).getEncodedNameAsBytes(), true); admin.mergeRegionsAsync( regions.get(4).getEncodedNameAsBytes(), regions.get(5).getEncodedNameAsBytes(), true); waitRegionsAfterMerge(numRegionsAfterMerge); assertEquals(numRegionsAfterMerge, admin.getTableRegions(TABLE_NAME).size()); // Take a snapshot String snapshotName = "snapshotAfterMerge"; SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3); // Clone the table TableName cloneName = TableName.valueOf("cloneMerge"); admin.cloneSnapshot(snapshotName, cloneName); SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName); verifyRowCount(UTIL, TABLE_NAME, numRows); verifyRowCount(UTIL, cloneName, numRows); // test that we can delete the snapshot UTIL.deleteTable(cloneName); } /** Basic end-to-end test of simple-flush-based snapshots */ @Test public void testFlushCreateListDestroy() throws Exception { LOG.debug("------- Starting Snapshot test -------------"); // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // load the table so we have some data SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); String snapshotName = "flushSnapshotCreateListDestroy"; FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); SnapshotTestingUtils.createSnapshotAndValidate( admin, TABLE_NAME, Bytes.toString(TEST_FAM), snapshotName, rootDir, fs, true); } /** * Demonstrate that we reject snapshot requests if there is a snapshot already running on the same * table currently running and that concurrent snapshots on different tables can both succeed * concurretly. */ @Test public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException { final TableName TABLE2_NAME = TableName.valueOf(TABLE_NAME + "2"); int ssNum = 20; // make sure we don't fail on listing snapshots SnapshotTestingUtils.assertNoSnapshots(admin); // create second testing table SnapshotTestingUtils.createTable(UTIL, TABLE2_NAME, TEST_FAM); // load the table so we have some data SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); SnapshotTestingUtils.loadData(UTIL, TABLE2_NAME, DEFAULT_NUM_ROWS, TEST_FAM); final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum); // We'll have one of these per thread class SSRunnable implements Runnable { SnapshotDescription ss; SSRunnable(SnapshotDescription ss) { this.ss = ss; } @Override public void run() { try { LOG.info( "Submitting snapshot request: " + ClientSnapshotDescriptionUtils.toString( ProtobufUtil.createHBaseProtosSnapshotDesc(ss))); admin.takeSnapshotAsync(ss); } catch (Exception e) { LOG.info( "Exception during snapshot request: " + ClientSnapshotDescriptionUtils.toString( ProtobufUtil.createHBaseProtosSnapshotDesc(ss)) + ". This is ok, we expect some", e); } LOG.info( "Submitted snapshot request: " + ClientSnapshotDescriptionUtils.toString( ProtobufUtil.createHBaseProtosSnapshotDesc(ss))); toBeSubmitted.countDown(); } }; // build descriptions SnapshotDescription[] descs = new SnapshotDescription[ssNum]; for (int i = 0; i < ssNum; i++) { if (i % 2 == 0) { descs[i] = new SnapshotDescription("ss" + i, TABLE_NAME, SnapshotType.FLUSH); } else { descs[i] = new SnapshotDescription("ss" + i, TABLE2_NAME, SnapshotType.FLUSH); } } // kick each off its own thread for (int i = 0; i < ssNum; i++) { new Thread(new SSRunnable(descs[i])).start(); } // wait until all have been submitted toBeSubmitted.await(); // loop until all are done. while (true) { int doneCount = 0; for (SnapshotDescription ss : descs) { try { if (admin.isSnapshotFinished(ss)) { doneCount++; } } catch (Exception e) { LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e); doneCount++; } } if (doneCount == descs.length) { break; } Thread.sleep(100); } // dump for debugging UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); List<SnapshotDescription> taken = admin.listSnapshots(); int takenSize = taken.size(); LOG.info("Taken " + takenSize + " snapshots: " + taken); assertTrue( "We expect at least 1 request to be rejected because of we concurrently" + " issued many requests", takenSize < ssNum && takenSize > 0); // Verify that there's at least one snapshot per table int t1SnapshotsCount = 0; int t2SnapshotsCount = 0; for (SnapshotDescription ss : taken) { if (ss.getTableName().equals(TABLE_NAME)) { t1SnapshotsCount++; } else if (ss.getTableName().equals(TABLE2_NAME)) { t2SnapshotsCount++; } } assertTrue("We expect at least 1 snapshot of table1 ", t1SnapshotsCount > 0); assertTrue("We expect at least 1 snapshot of table2 ", t2SnapshotsCount > 0); UTIL.deleteTable(TABLE2_NAME); } private void waitRegionsAfterMerge(final long numRegionsAfterMerge) throws IOException, InterruptedException { // Verify that there's one region less long startTime = System.currentTimeMillis(); while (admin.getTableRegions(TABLE_NAME).size() != numRegionsAfterMerge) { // This may be flaky... if after 15sec the merge is not complete give up // it will fail in the assertEquals(numRegionsAfterMerge). if ((System.currentTimeMillis() - startTime) > 15000) break; Thread.sleep(100); } SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME); } protected void verifyRowCount( final HBaseTestingUtility util, final TableName tableName, long expectedRows) throws IOException { SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows); } protected int countRows(final Table table, final byte[]... families) throws IOException { return UTIL.countRows(table, families); } }
/** * Test all of the data block encoding algorithms for correctness. Most of the class generate data * which will test different branches in code. */ @Category({IOTests.class, LargeTests.class}) @RunWith(Parameterized.class) public class TestDataBlockEncoders { private static final Log LOG = LogFactory.getLog(TestDataBlockEncoders.class); @Rule public final TestRule timeout = CategoryBasedTimeout.builder() .withTimeout(this.getClass()) .withLookingForStuckThread(true) .build(); private static int NUMBER_OF_KV = 10000; private static int NUM_RANDOM_SEEKS = 1000; private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE + DataBlockEncoding.ID_SIZE; private RedundantKVGenerator generator = new RedundantKVGenerator(); private Random randomizer = new Random(42l); private final boolean includesMemstoreTS; private final boolean includesTags; private final boolean useOffheapData; @Parameters public static Collection<Object[]> parameters() { return HBaseTestingUtility.memStoreTSTagsAndOffheapCombination(); } public TestDataBlockEncoders( boolean includesMemstoreTS, boolean includesTag, boolean useOffheapData) { this.includesMemstoreTS = includesMemstoreTS; this.includesTags = includesTag; this.useOffheapData = useOffheapData; } private HFileBlockEncodingContext getEncodingContext( Compression.Algorithm algo, DataBlockEncoding encoding) { DataBlockEncoder encoder = encoding.getEncoder(); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .withCompression(algo) .build(); if (encoder != null) { return encoder.newDataBlockEncodingContext( encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } else { return new HFileBlockDefaultEncodingContext( encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } } /** * Test data block encoding of empty KeyValue. * * @throws IOException On test failure. */ @Test public void testEmptyKeyValues() throws IOException { List<KeyValue> kvList = new ArrayList<KeyValue>(); byte[] row = new byte[0]; byte[] family = new byte[0]; byte[] qualifier = new byte[0]; byte[] value = new byte[0]; if (!includesTags) { kvList.add(new KeyValue(row, family, qualifier, 0l, value)); kvList.add(new KeyValue(row, family, qualifier, 0l, value)); } else { byte[] metaValue1 = Bytes.toBytes("metaValue1"); byte[] metaValue2 = Bytes.toBytes("metaValue2"); kvList.add( new KeyValue( row, family, qualifier, 0l, value, new Tag[] {new ArrayBackedTag((byte) 1, metaValue1)})); kvList.add( new KeyValue( row, family, qualifier, 0l, value, new Tag[] {new ArrayBackedTag((byte) 1, metaValue2)})); } testEncodersOnDataset(kvList, includesMemstoreTS, includesTags); } /** * Test KeyValues with negative timestamp. * * @throws IOException On test failure. */ @Test public void testNegativeTimestamps() throws IOException { List<KeyValue> kvList = new ArrayList<KeyValue>(); byte[] row = new byte[0]; byte[] family = new byte[0]; byte[] qualifier = new byte[0]; byte[] value = new byte[0]; if (includesTags) { byte[] metaValue1 = Bytes.toBytes("metaValue1"); byte[] metaValue2 = Bytes.toBytes("metaValue2"); kvList.add( new KeyValue( row, family, qualifier, 0l, value, new Tag[] {new ArrayBackedTag((byte) 1, metaValue1)})); kvList.add( new KeyValue( row, family, qualifier, 0l, value, new Tag[] {new ArrayBackedTag((byte) 1, metaValue2)})); } else { kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value)); kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value)); } testEncodersOnDataset(kvList, includesMemstoreTS, includesTags); } /** * Test whether compression -> decompression gives the consistent results on pseudorandom sample. * * @throws IOException On test failure. */ @Test public void testExecutionOnSample() throws IOException { List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); testEncodersOnDataset(kvList, includesMemstoreTS, includesTags); } /** Test seeking while file is encoded. */ @Test public void testSeekingOnSample() throws IOException { List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); // create all seekers List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<DataBlockEncoder.EncodedSeeker>(); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { LOG.info("Encoding: " + encoding); // Off heap block data support not added for PREFIX_TREE DBE yet. // TODO remove this once support is added. HBASE-12298 if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue; DataBlockEncoder encoder = encoding.getEncoder(); if (encoder == null) { continue; } LOG.info("Encoder: " + encoder); ByteBuffer encodedBuffer = encodeKeyValues( encoding, sampleKv, getEncodingContext(Compression.Algorithm.NONE, encoding), this.useOffheapData); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .withCompression(Compression.Algorithm.NONE) .build(); DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker( CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer)); encodedSeekers.add(seeker); } LOG.info("Testing it!"); // test it! // try a few random seeks for (boolean seekBefore : new boolean[] {false, true}) { for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) { int keyValueId; if (!seekBefore) { keyValueId = randomizer.nextInt(sampleKv.size()); } else { keyValueId = randomizer.nextInt(sampleKv.size() - 1) + 1; } KeyValue keyValue = sampleKv.get(keyValueId); checkSeekingConsistency(encodedSeekers, seekBefore, keyValue); } } // check edge cases LOG.info("Checking edge cases"); checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0)); for (boolean seekBefore : new boolean[] {false, true}) { checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1)); KeyValue midKv = sampleKv.get(sampleKv.size() / 2); Cell lastMidKv = CellUtil.createLastOnRowCol(midKv); checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv); } LOG.info("Done"); } static ByteBuffer encodeKeyValues( DataBlockEncoding encoding, List<KeyValue> kvs, HFileBlockEncodingContext encodingContext, boolean useOffheapData) throws IOException { DataBlockEncoder encoder = encoding.getEncoder(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER); DataOutputStream dos = new DataOutputStream(baos); encoder.startBlockEncoding(encodingContext, dos); for (KeyValue kv : kvs) { encoder.encode(kv, encodingContext, dos); } encoder.endBlockEncoding(encodingContext, dos, baos.getBuffer()); byte[] encodedData = new byte[baos.size() - ENCODED_DATA_OFFSET]; System.arraycopy(baos.toByteArray(), ENCODED_DATA_OFFSET, encodedData, 0, encodedData.length); if (useOffheapData) { ByteBuffer bb = ByteBuffer.allocateDirect(encodedData.length); bb.put(encodedData); bb.rewind(); return bb; } return ByteBuffer.wrap(encodedData); } @Test public void testNextOnSample() throws IOException { List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { // Off heap block data support not added for PREFIX_TREE DBE yet. // TODO remove this once support is added. HBASE-12298 if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue; if (encoding.getEncoder() == null) { continue; } DataBlockEncoder encoder = encoding.getEncoder(); ByteBuffer encodedBuffer = encodeKeyValues( encoding, sampleKv, getEncodingContext(Compression.Algorithm.NONE, encoding), this.useOffheapData); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .withCompression(Compression.Algorithm.NONE) .build(); DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker( CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); seeker.setCurrentBuffer(new SingleByteBuff(encodedBuffer)); int i = 0; do { KeyValue expectedKeyValue = sampleKv.get(i); Cell cell = seeker.getCell(); if (CellComparator.COMPARATOR.compareKeyIgnoresMvcc(expectedKeyValue, cell) != 0) { int commonPrefix = CellUtil.findCommonPrefixInFlatKey(expectedKeyValue, cell, false, true); fail( String.format( "next() produces wrong results " + "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual %s", encoder.toString(), i, commonPrefix, Bytes.toStringBinary( expectedKeyValue.getBuffer(), expectedKeyValue.getKeyOffset(), expectedKeyValue.getKeyLength()), CellUtil.toString(cell, false))); } i++; } while (seeker.next()); } } /** * Test whether the decompression of first key is implemented correctly. * * @throws IOException */ @Test public void testFirstKeyInBlockOnSample() throws IOException { List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { // Off heap block data support not added for PREFIX_TREE DBE yet. // TODO remove this once support is added. HBASE-12298 if (this.useOffheapData && encoding == DataBlockEncoding.PREFIX_TREE) continue; if (encoding.getEncoder() == null) { continue; } DataBlockEncoder encoder = encoding.getEncoder(); ByteBuffer encodedBuffer = encodeKeyValues( encoding, sampleKv, getEncodingContext(Compression.Algorithm.NONE, encoding), this.useOffheapData); Cell key = encoder.getFirstKeyCellInBlock(new SingleByteBuff(encodedBuffer)); KeyValue firstKv = sampleKv.get(0); if (0 != CellComparator.COMPARATOR.compareKeyIgnoresMvcc(key, firstKv)) { int commonPrefix = CellUtil.findCommonPrefixInFlatKey(key, firstKv, false, true); fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix)); } } } private void checkSeekingConsistency( List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore, Cell keyValue) { Cell expectedKeyValue = null; ByteBuffer expectedKey = null; ByteBuffer expectedValue = null; for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) { seeker.seekToKeyInBlock(keyValue, seekBefore); seeker.rewind(); Cell actualKeyValue = seeker.getCell(); ByteBuffer actualKey = null; if (seeker instanceof PrefixTreeSeeker) { byte[] serializedKey = CellUtil.getCellKeySerializedAsKeyValueKey(seeker.getKey()); actualKey = ByteBuffer.wrap(KeyValueUtil.createKeyValueFromKey(serializedKey).getKey()); } else { actualKey = ByteBuffer.wrap(((KeyValue) seeker.getKey()).getKey()); } ByteBuffer actualValue = seeker.getValueShallowCopy(); if (expectedKeyValue != null) { assertTrue(CellUtil.equals(expectedKeyValue, actualKeyValue)); } else { expectedKeyValue = actualKeyValue; } if (expectedKey != null) { assertEquals(expectedKey, actualKey); } else { expectedKey = actualKey; } if (expectedValue != null) { assertEquals(expectedValue, actualValue); } else { expectedValue = actualValue; } } } private void testEncodersOnDataset( List<KeyValue> kvList, boolean includesMemstoreTS, boolean includesTags) throws IOException { ByteBuffer unencodedDataBuf = RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS); HFileContext fileContext = new HFileContextBuilder() .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .build(); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { DataBlockEncoder encoder = encoding.getEncoder(); if (encoder == null) { continue; } HFileBlockEncodingContext encodingContext = new HFileBlockDefaultEncodingContext( encoding, HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext); ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER); DataOutputStream dos = new DataOutputStream(baos); encoder.startBlockEncoding(encodingContext, dos); for (KeyValue kv : kvList) { encoder.encode(kv, encodingContext, dos); } encoder.endBlockEncoding(encodingContext, dos, baos.getBuffer()); byte[] encodedData = baos.toByteArray(); testAlgorithm(encodedData, unencodedDataBuf, encoder); } } @Test public void testZeroByte() throws IOException { List<KeyValue> kvList = new ArrayList<KeyValue>(); byte[] row = Bytes.toBytes("abcd"); byte[] family = new byte[] {'f'}; byte[] qualifier0 = new byte[] {'b'}; byte[] qualifier1 = new byte[] {'c'}; byte[] value0 = new byte[] {'d'}; byte[] value1 = new byte[] {0x00}; if (includesTags) { kvList.add( new KeyValue( row, family, qualifier0, 0, value0, new Tag[] {new ArrayBackedTag((byte) 1, "value1")})); kvList.add( new KeyValue( row, family, qualifier1, 0, value1, new Tag[] {new ArrayBackedTag((byte) 1, "value1")})); } else { kvList.add(new KeyValue(row, family, qualifier0, 0, Type.Put, value0)); kvList.add(new KeyValue(row, family, qualifier1, 0, Type.Put, value1)); } testEncodersOnDataset(kvList, includesMemstoreTS, includesTags); } private void testAlgorithm( byte[] encodedData, ByteBuffer unencodedDataBuf, DataBlockEncoder encoder) throws IOException { // decode ByteArrayInputStream bais = new ByteArrayInputStream( encodedData, ENCODED_DATA_OFFSET, encodedData.length - ENCODED_DATA_OFFSET); DataInputStream dis = new DataInputStream(bais); ByteBuffer actualDataset; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(includesTags) .withCompression(Compression.Algorithm.NONE) .build(); actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta)); actualDataset.rewind(); // this is because in case of prefix tree the decoded stream will not have // the // mvcc in it. assertEquals( "Encoding -> decoding gives different results for " + encoder, Bytes.toStringBinary(unencodedDataBuf), Bytes.toStringBinary(actualDataset)); } }
/** * Simple test for {@link CellSortReducer} and {@link HFileOutputFormat2}. Sets up and runs a * mapreduce job that writes hfile output. Creates a few inner classes to implement splits and an * inputformat that emits keys and values like those of {@link PerformanceEvaluation}. */ @Category({VerySlowMapReduceTests.class, LargeTests.class}) public class TestHFileOutputFormat2 { @Rule public final TestRule timeout = CategoryBasedTimeout.builder() .withTimeout(this.getClass()) .withLookingForStuckThread(true) .build(); private static final int ROWSPERSPLIT = 1024; private static final byte[][] FAMILIES = { Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(PerformanceEvaluation.FAMILY_NAME, Bytes.toBytes("-B")) }; private static final TableName TABLE_NAME = TableName.valueOf("TestTable"); private HBaseTestingUtility util = new HBaseTestingUtility(); private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class); /** Simple mapper that makes KeyValue output. */ static class RandomKVGeneratingMapper extends Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell> { private int keyLength; private static final int KEYLEN_DEFAULT = 10; private static final String KEYLEN_CONF = "randomkv.key.length"; private int valLength; private static final int VALLEN_DEFAULT = 10; private static final String VALLEN_CONF = "randomkv.val.length"; private static final byte[] QUALIFIER = Bytes.toBytes("data"); @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT); valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT); } @Override protected void map( NullWritable n1, NullWritable n2, Mapper<NullWritable, NullWritable, ImmutableBytesWritable, Cell>.Context context) throws java.io.IOException, InterruptedException { byte keyBytes[] = new byte[keyLength]; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; Random random = new Random(); for (int i = 0; i < ROWSPERSPLIT; i++) { random.nextBytes(keyBytes); // Ensure that unique tasks generate unique keys keyBytes[keyLength - 1] = (byte) (taskId & 0xFF); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : TestHFileOutputFormat2.FAMILIES) { Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes); context.write(key, kv); } } } } private void setupRandomGeneratorMapper(Job job) { job.setInputFormatClass(NMapInputFormat.class); job.setMapperClass(RandomKVGeneratingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); } /** * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if passed a keyvalue whose * timestamp is {@link HConstants#LATEST_TIMESTAMP}. * * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void test_LATEST_TIMESTAMP_isReplaced() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced"); try { Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); final byte[] b = Bytes.toBytes("b"); // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be // changed by call to write. Check all in kv is same but ts. KeyValue kv = new KeyValue(b, b, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertFalse(original.equals(kv)); assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv))); assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv))); assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv))); assertNotSame(original.getTimestamp(), kv.getTimestamp()); assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); // Test 2. Now test passing a kv that has explicit ts. It should not be // changed by call to record write. kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertTrue(original.equals(kv)); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } } private TaskAttemptContext createTestTaskAttemptContext(final Job job) throws Exception { HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class); TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_201402131733_0001_m_000000_0"); return context; } /* * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE * metadata used by time-restricted scans. */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void test_TIMERANGE() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("test_TIMERANGE_present"); LOG.info("Timerange dir writing to dir: " + dir); try { // build a record writer using HFileOutputFormat2 Job job = new Job(conf); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // Pass two key values with explicit times stamps final byte[] b = Bytes.toBytes("b"); // value 1 with timestamp 2000 KeyValue kv = new KeyValue(b, b, b, 2000, b); KeyValue original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // value 2 with timestamp 1000 kv = new KeyValue(b, b, b, 1000, b); original = kv.clone(); writer.write(new ImmutableBytesWritable(), kv); assertEquals(original, kv); // verify that the file has the proper FileInfo. writer.close(context); // the generated file lives 1 directory down from the attempt directory // and is the only file, e.g. // _attempt__0000_r_000000_0/b/1979617994050536795 FileSystem fs = FileSystem.get(conf); Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent(); FileStatus[] sub1 = fs.listStatus(attemptDirectory); FileStatus[] file = fs.listStatus(sub1[0].getPath()); // open as HFile Reader and pull out TIMERANGE FileInfo. HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf); Map<byte[], byte[]> finfo = rd.loadFileInfo(); byte[] range = finfo.get("TIMERANGE".getBytes()); assertNotNull(range); // unmarshall and check values. TimeRangeTracker timeRangeTracker = new TimeRangeTracker(); Writables.copyWritable(range, timeRangeTracker); LOG.info( timeRangeTracker.getMinimumTimestamp() + "...." + timeRangeTracker.getMaximumTimestamp()); assertEquals(1000, timeRangeTracker.getMinimumTimestamp()); assertEquals(2000, timeRangeTracker.getMaximumTimestamp()); rd.close(); } finally { if (writer != null && context != null) writer.close(context); dir.getFileSystem(conf).delete(dir, true); } } /** Run small MR job. */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testWritingPEData() throws Exception { Configuration conf = util.getConfiguration(); Path testDir = util.getDataTestDirOnTestFS("testWritingPEData"); FileSystem fs = testDir.getFileSystem(conf); // Set down this value or we OOME in eclipse. conf.setInt("mapreduce.task.io.sort.mb", 20); // Write a few files. conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024); Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job); // This partitioner doesn't work well for number keys but using it anyways // just to demonstrate how to configure it. byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; Arrays.fill(startKey, (byte) 0); Arrays.fill(endKey, (byte) 0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class); // Set start and end rows for partitioner. SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat2.class); job.setNumReduceTasks(4); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus[] files = fs.listStatus(testDir); assertTrue(files.length > 0); } @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); } private byte[][] generateRandomStartKeys(int numKeys) { Random random = new Random(); byte[][] ret = new byte[numKeys][]; // first region start key is always empty ret[0] = HConstants.EMPTY_BYTE_ARRAY; for (int i = 1; i < numKeys; i++) { ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH); } return ret; } private byte[][] generateRandomSplitKeys(int numKeys) { Random random = new Random(); byte[][] ret = new byte[numKeys][]; for (int i = 0; i < numKeys; i++) { ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH); } return ret; } @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testMRIncrementalLoad() throws Exception { LOG.info("\nStarting test testMRIncrementalLoad\n"); doIncrementalLoadTest(false, false); } @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testMRIncrementalLoadWithSplit() throws Exception { LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n"); doIncrementalLoadTest(true, false); } /** * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true This test could only check the * correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY is set to true. Because * MiniHBaseCluster always run with single hostname (and different ports), it's not possible to * check the region locality by comparing region locations and DN hostnames. When MiniHBaseCluster * supports explicit hostnames parameter (just like MiniDFSCluster does), we could test region * locality features more easily. */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testMRIncrementalLoadWithLocality() throws Exception { LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n"); doIncrementalLoadTest(false, true); doIncrementalLoadTest(true, true); } private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); int hostCount = 1; int regionNum = 5; if (shouldKeepLocality) { // We should change host count higher than hdfs replica count when MiniHBaseCluster supports // explicit hostnames parameter just like MiniDFSCluster does. hostCount = 3; regionNum = 20; } byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); String[] hostnames = new String[hostCount]; for (int i = 0; i < hostCount; ++i) { hostnames[i] = "datanode_" + i; } util.startMiniCluster(1, hostCount, hostnames); Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME); Admin admin = util.getConnection().getAdmin(); ) { assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = r.getStartKeys().length; assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); // Generate the bulk load files runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Check region locality HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) { hbd.add(region.getHDFSBlocksDistribution()); } for (String hostname : hostnames) { float locality = hbd.getBlockLocalityIndex(hostname); LOG.info("locality of [" + hostname + "]: " + locality); assertEquals(100, (int) (locality * 100)); } // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { testDir.getFileSystem(conf).delete(testDir, true); util.deleteTable(TABLE_NAME); util.shutdownMiniCluster(); } } private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); } /** * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap (Configuration)}. * Tests that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyCompressionMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamiliesForCompression(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor()); // read back family specific compression setting from the configuration Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2.createFamilyCompressionMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { assertEquals( "Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForCompression( Table table, Map<String, Compression.Algorithm> familyToCompression) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) { mockTableDescriptor.addFamily( new HColumnDescriptor(entry.getKey()) .setMaxVersions(1) .setCompressionType(entry.getValue()) .setBlockCacheEnabled(false) .setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for testing column family * compression. Column family names have special characters */ private Map<String, Compression.Algorithm> getMockColumnFamiliesForCompression(int numCfs) { Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>(); // use column family names having special characters if (numCfs-- > 0) { familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO); } if (numCfs-- > 0) { familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY); } if (numCfs-- > 0) { familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ); } if (numCfs-- > 0) { familyToCompression.put("Family3", Compression.Algorithm.NONE); } return familyToCompression; } /** * Test for {@link HFileOutputFormat2#configureBloomType(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyBloomTypeMap (Configuration)}. Tests * that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException { for (int numCfs = 0; numCfs <= 2; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, BloomType> familyToBloomType = getMockColumnFamiliesForBloomType(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBloomType(table, familyToBloomType); HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration Map<byte[], BloomType> retrievedFamilyToBloomTypeMap = HFileOutputFormat2.createFamilyBloomTypeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) { assertEquals( "BloomType configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForBloomType( Table table, Map<String, BloomType> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily( new HColumnDescriptor(entry.getKey()) .setMaxVersions(1) .setBloomFilterType(entry.getValue()) .setBlockCacheEnabled(false) .setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for testing column family * compression. Column family names have special characters */ private Map<String, BloomType> getMockColumnFamiliesForBloomType(int numCfs) { Map<String, BloomType> familyToBloomType = new HashMap<String, BloomType>(); // use column family names having special characters if (numCfs-- > 0) { familyToBloomType.put("Family1!@#!@#&", BloomType.ROW); } if (numCfs-- > 0) { familyToBloomType.put("Family2=asdads&!AASD", BloomType.ROWCOL); } if (numCfs-- > 0) { familyToBloomType.put("Family3", BloomType.NONE); } return familyToBloomType; } /** * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap (Configuration)}. Tests * that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration Map<byte[], Integer> retrievedFamilyToBlockSizeMap = HFileOutputFormat2.createFamilyBlockSizeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) { assertEquals( "BlockSize configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForBlockSize( Table table, Map<String, Integer> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily( new HColumnDescriptor(entry.getKey()) .setMaxVersions(1) .setBlocksize(entry.getValue()) .setBlockCacheEnabled(false) .setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for testing column family * compression. Column family names have special characters */ private Map<String, Integer> getMockColumnFamiliesForBlockSize(int numCfs) { Map<String, Integer> familyToBlockSize = new HashMap<String, Integer>(); // use column family names having special characters if (numCfs-- > 0) { familyToBlockSize.put("Family1!@#!@#&", 1234); } if (numCfs-- > 0) { familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE); } if (numCfs-- > 0) { familyToBlockSize.put("Family2=asdads&!AASD", Integer.MAX_VALUE); } if (numCfs-- > 0) { familyToBlockSize.put("Family3", 0); } return familyToBlockSize; } /** * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)} * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. Tests that the * compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, DataBlockEncoding> familyToDataBlockEncoding = getMockColumnFamiliesForDataBlockEncoding(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); HTableDescriptor tableDescriptor = table.getTableDescriptor(); HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap = HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { assertEquals( "DataBlockEncoding configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes())); } } } private void setupMockColumnFamiliesForDataBlockEncoding( Table table, Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException { HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME); for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { mockTableDescriptor.addFamily( new HColumnDescriptor(entry.getKey()) .setMaxVersions(1) .setDataBlockEncoding(entry.getValue()) .setBlockCacheEnabled(false) .setTimeToLive(0)); } Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); } /** * @return a map from column family names to compression algorithms for testing column family * compression. Column family names have special characters */ private Map<String, DataBlockEncoding> getMockColumnFamiliesForDataBlockEncoding(int numCfs) { Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<String, DataBlockEncoding>(); // use column family names having special characters if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.FAST_DIFF); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family2=asdads&!AASD", DataBlockEncoding.PREFIX); } if (numCfs-- > 0) { familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE); } return familyToDataBlockEncoding; } private void setupMockStartKeys(RegionLocator table) throws IOException { byte[][] mockKeys = new byte[][] { HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("aaa"), Bytes.toBytes("ggg"), Bytes.toBytes("zzz") }; Mockito.doReturn(mockKeys).when(table).getStartKeys(); } private void setupMockTableName(RegionLocator table) throws IOException { TableName mockTableName = TableName.valueOf("mock_table"); Mockito.doReturn(mockTableName).when(table).getName(); } /** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } } /** * Write random values to the writer assuming a table created using {@link #FAMILIES} as column * family descriptors */ private void writeRandomKeyValues( RecordWriter<ImmutableBytesWritable, Cell> writer, TaskAttemptContext context, Set<byte[]> families, int numRows) throws IOException, InterruptedException { byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; int valLength = 10; byte valBytes[] = new byte[valLength]; int taskId = context.getTaskAttemptID().getTaskID().getId(); assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; final byte[] qualifier = Bytes.toBytes("data"); Random random = new Random(); for (int i = 0; i < numRows; i++) { Bytes.putInt(keyBytes, 0, i); random.nextBytes(valBytes); ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); for (byte[] family : families) { Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes); writer.write(key, kv); } } } /** * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and * excluded from minor compaction. Without the fix of HBASE-6901, an * ArrayIndexOutOfBoundsException will be thrown. */ @Ignore("Flakey: See HBASE-9051") @Test public void testExcludeAllFromMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); util.startMiniCluster(); try (Connection conn = ConnectionFactory.createConnection(); Admin admin = conn.getAdmin(); Table table = util.createTable(TABLE_NAME, FAMILIES); RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) { final FileSystem fs = util.getDFSCluster().getFileSystem(); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = new Path( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path( admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0]))); assertEquals(0, fs.listStatus(storePath).length); // Generate two bulk load files conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); for (int i = 0; i < 2; i++) { Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); runIncrementalPELoad( conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator); } // Ensure data shows up int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniCluster(); } } @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testExcludeMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); util.startMiniCluster(); try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) { Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction"); final FileSystem fs = util.getDFSCluster().getFileSystem(); Table table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = new Path( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path( admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0]))); assertEquals(0, fs.listStatus(storePath).length); // put some data in it and flush to create a storefile Put p = new Put(Bytes.toBytes("test")); p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1")); table.put(p); admin.flush(TABLE_NAME); assertEquals(1, util.countRows(table)); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); // Generate a bulk load file with more rows conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME); runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows + 1, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniCluster(); } } private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception { int sleepMs = 10; int retries = (int) Math.ceil(((double) waitMs) / sleepMs); while (retries-- > 0) { if (c.call().booleanValue()) { return; } Thread.sleep(sleepMs); } fail(); } public static void main(String args[]) throws Exception { new TestHFileOutputFormat2().manualTest(args); } public void manualTest(String args[]) throws Exception { Configuration conf = HBaseConfiguration.create(); util = new HBaseTestingUtility(conf); if ("newtable".equals(args[0])) { TableName tname = TableName.valueOf(args[1]); byte[][] splitKeys = generateRandomSplitKeys(4); try (Table table = util.createTable(tname, FAMILIES, splitKeys)) {} } else if ("incremental".equals(args[0])) { TableName tname = TableName.valueOf(args[1]); try (Connection c = ConnectionFactory.createConnection(conf); Admin admin = c.getAdmin(); RegionLocator regionLocator = c.getRegionLocator(tname)) { Path outDir = new Path("incremental-out"); runIncrementalPELoad(conf, admin.getTableDescriptor(tname), regionLocator, outDir); } } else { throw new RuntimeException("usage: TestHFileOutputFormat2 newtable | incremental"); } } }