/** * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)} * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. Tests that the * compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, DataBlockEncoding> familyToDataBlockEncoding = getMockColumnFamiliesForDataBlockEncoding(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForDataBlockEncoding(table, familyToDataBlockEncoding); HTableDescriptor tableDescriptor = table.getTableDescriptor(); HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf); // read back family specific data block encoding settings from the // configuration Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap = HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { assertEquals( "DataBlockEncoding configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes())); } } }
@Test public void testJobConfiguration() throws Exception { Job job = new Job(util.getConfiguration()); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testJobConfiguration() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString()); Job job = new Job(conf); job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); setupMockStartKeys(regionLocator); setupMockTableName(regionLocator); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); assertEquals(job.getNumReduceTasks(), 4); }
protected HRegionInfo createRegion( Configuration conf, final Table htbl, byte[] startKey, byte[] endKey) throws IOException { Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME); HTableDescriptor htd = htbl.getTableDescriptor(); HRegionInfo hri = new HRegionInfo(htbl.getName(), startKey, endKey); LOG.info("manually adding regioninfo and hdfs data: " + hri.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(FSUtils.getTableDir(rootDir, htbl.getName()), hri.getEncodedName()); fs.mkdirs(p); Path riPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE); FSDataOutputStream out = fs.create(riPath); out.write(hri.toDelimitedByteArray()); out.close(); // add to meta. MetaTableAccessor.addRegionToMeta(meta, hri); meta.close(); return hri; }
protected void deleteRegion(Configuration conf, final Table tbl, byte[] startKey, byte[] endKey) throws IOException { LOG.info("Before delete:"); HTableDescriptor htd = tbl.getTableDescriptor(); dumpMeta(htd); List<HRegionLocation> regions; try (RegionLocator rl = connection.getRegionLocator(tbl.getName())) { regions = rl.getAllRegionLocations(); } for (HRegionLocation e : regions) { HRegionInfo hri = e.getRegionInfo(); ServerName hsa = e.getServerName(); if (Bytes.compareTo(hri.getStartKey(), startKey) == 0 && Bytes.compareTo(hri.getEndKey(), endKey) == 0) { LOG.info("RegionName: " + hri.getRegionNameAsString()); byte[] deleteRow = hri.getRegionName(); TEST_UTIL.getHBaseAdmin().unassign(deleteRow, true); LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString()); Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()), hri.getEncodedName()); fs.delete(p, true); try (Table meta = this.connection.getTable(TableName.META_TABLE_NAME)) { Delete delete = new Delete(deleteRow); meta.delete(delete); } } LOG.info(hri.toString() + hsa.toString()); } TEST_UTIL.getMetaTableRows(htd.getTableName()); LOG.info("After delete:"); dumpMeta(htd); }
/** * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap (Configuration)}. Tests * that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Integer> familyToBlockSize = getMockColumnFamiliesForBlockSize(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForBlockSize(table, familyToBlockSize); HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf); // read back family specific data block encoding settings from the // configuration Map<byte[], Integer> retrievedFamilyToBlockSizeMap = HFileOutputFormat2.createFamilyBlockSizeMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Integer> entry : familyToBlockSize.entrySet()) { assertEquals( "BlockSize configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes())); } } }
/** * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table, * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap (Configuration)}. * Tests that the compression map is correctly serialized into and deserialized from configuration * * @throws IOException */ @Test public void testSerializeDeserializeFamilyCompressionMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamiliesForCompression(numCfs); Table table = Mockito.mock(HTable.class); setupMockColumnFamiliesForCompression(table, familyToCompression); HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor()); // read back family specific compression setting from the configuration Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2.createFamilyCompressionMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { assertEquals( "Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes())); } } }
/** * Configure a MapReduce Job to perform an incremental load into the given table. This * * <ul> * <li>Inspects the table to configure a total order partitioner * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache * <li>Sets the number of reduce tasks to match the current number of regions * <li>Sets the output key/value class to match HFileOutputFormat2's requirements * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer) * </ul> * * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) throws IOException { configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); }
/** * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and * excluded from minor compaction. Without the fix of HBASE-6901, an * ArrayIndexOutOfBoundsException will be thrown. */ @Ignore("Flakey: See HBASE-9051") @Test public void testExcludeAllFromMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); util.startMiniCluster(); try (Connection conn = ConnectionFactory.createConnection(); Admin admin = conn.getAdmin(); Table table = util.createTable(TABLE_NAME, FAMILIES); RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) { final FileSystem fs = util.getDFSCluster().getFileSystem(); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = new Path( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path( admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0]))); assertEquals(0, fs.listStatus(storePath).length); // Generate two bulk load files conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); for (int i = 0; i < 2; i++) { Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); runIncrementalPELoad( conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator); } // Ensure data shows up int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniCluster(); } }
/** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } }
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); int hostCount = 1; int regionNum = 5; if (shouldKeepLocality) { // We should change host count higher than hdfs replica count when MiniHBaseCluster supports // explicit hostnames parameter just like MiniDFSCluster does. hostCount = 3; regionNum = 20; } byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); String[] hostnames = new String[hostCount]; for (int i = 0; i < hostCount; ++i) { hostnames[i] = "datanode_" + i; } util.startMiniCluster(1, hostCount, hostnames); Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME); Admin admin = util.getConnection().getAdmin(); ) { assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = r.getStartKeys().length; assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); // Generate the bulk load files runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Check region locality HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) { hbd.add(region.getHDFSBlocksDistribution()); } for (String hostname : hostnames) { float locality = hbd.getBlockLocalityIndex(hostname); LOG.info("locality of [" + hostname + "]: " + locality); assertEquals(100, (int) (locality * 100)); } // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { testDir.getFileSystem(conf).delete(testDir, true); util.deleteTable(TABLE_NAME); util.shutdownMiniCluster(); } }
public void doTest(Class<?> regionClass, boolean distributedLogReplay) throws Exception { Configuration c = TEST_UTIL.getConfiguration(); c.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, distributedLogReplay); // Insert our custom region c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class); c.setBoolean("dfs.support.append", true); // Encourage plenty of flushes c.setLong("hbase.hregion.memstore.flush.size", 200000); c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); // Only run compaction when we tell it to c.setInt("hbase.hstore.compactionThreshold", 1000); c.setLong("hbase.hstore.blockingStoreFiles", 1000); // Compact quickly after we tell it to! c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000); LOG.info("Starting mini cluster"); TEST_UTIL.startMiniCluster(1); CompactionBlockerRegion compactingRegion = null; Admin admin = null; try { LOG.info("Creating admin"); admin = TEST_UTIL.getConnection().getAdmin(); LOG.info("Creating table"); TEST_UTIL.createTable(TABLE_NAME, FAMILY); Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME); LOG.info("Loading test table"); // Find the region List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME); assertEquals(1, testRegions.size()); compactingRegion = (CompactionBlockerRegion) testRegions.get(0); LOG.info("Blocking compactions"); compactingRegion.stopCompactions(); long lastFlushTime = compactingRegion.getLastFlushTime(); // Load some rows TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT); // add a compaction from an older (non-existing) region to see whether we successfully skip // those entries HRegionInfo oldHri = new HRegionInfo(table.getName(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor( oldHri, FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")), new Path("store_dir")); WALUtil.writeCompactionMarker( compactingRegion.getWAL(), table.getTableDescriptor(), oldHri, compactionDescriptor, new AtomicLong(Long.MAX_VALUE - 100)); // Wait till flush has happened, otherwise there won't be multiple store files long startWaitTime = System.currentTimeMillis(); while (compactingRegion.getLastFlushTime() <= lastFlushTime || compactingRegion.countStoreFiles() <= 1) { LOG.info("Waiting for the region to flush " + compactingRegion.getRegionNameAsString()); Thread.sleep(1000); assertTrue( "Timed out waiting for the region to flush", System.currentTimeMillis() - startWaitTime < 30000); } assertTrue(compactingRegion.countStoreFiles() > 1); final byte REGION_NAME[] = compactingRegion.getRegionName(); LOG.info("Asking for compaction"); ((HBaseAdmin) admin).majorCompact(TABLE_NAME.getName()); LOG.info("Waiting for compaction to be about to start"); compactingRegion.waitForCompactionToBlock(); LOG.info("Starting a new server"); RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer(); final HRegionServer newServer = newServerThread.getRegionServer(); LOG.info("Killing region server ZK lease"); TEST_UTIL.expireRegionServerSession(0); CompactionBlockerRegion newRegion = null; startWaitTime = System.currentTimeMillis(); LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME)); // wait for region to be assigned and to go out of log replay if applicable Waiter.waitFor( c, 60000, new Waiter.Predicate<Exception>() { @Override public boolean evaluate() throws Exception { HRegion newRegion = newServer.getOnlineRegion(REGION_NAME); return newRegion != null && !newRegion.isRecovering(); } }); newRegion = (CompactionBlockerRegion) newServer.getOnlineRegion(REGION_NAME); LOG.info("Allowing compaction to proceed"); compactingRegion.allowCompactions(); while (compactingRegion.compactCount == 0) { Thread.sleep(1000); } // The server we killed stays up until the compaction that was started before it was killed // completes. In logs // you should see the old regionserver now going down. LOG.info("Compaction finished"); // After compaction of old region finishes on the server that was going down, make sure that // all the files we expect are still working when region is up in new location. FileSystem fs = newRegion.getFilesystem(); for (String f : newRegion.getStoreFileList(new byte[][] {FAMILY})) { assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f))); } // If we survive the split keep going... // Now we make sure that the region isn't totally confused. Load up more rows. TEST_UTIL.loadNumericRows( table, FAMILY, FIRST_BATCH_COUNT, FIRST_BATCH_COUNT + SECOND_BATCH_COUNT); ((HBaseAdmin) admin).majorCompact(TABLE_NAME.getName()); startWaitTime = System.currentTimeMillis(); while (newRegion.compactCount == 0) { Thread.sleep(1000); assertTrue( "New region never compacted", System.currentTimeMillis() - startWaitTime < 180000); } assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table)); } finally { if (compactingRegion != null) { compactingRegion.allowCompactions(); } admin.close(); TEST_UTIL.shutdownMiniCluster(); } }