@Test public void testTableWithCFNameStartWithUnderScore() throws Exception { Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore"); FileSystem fs = util.getTestFileSystem(); dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); String family = "_cf"; Path familyDir = new Path(dir, family); byte[] from = Bytes.toBytes("begin"); byte[] to = Bytes.toBytes("end"); Configuration conf = util.getConfiguration(); String tableName = "mytable_cfNameStartWithUnderScore"; Table table = util.createTable(TableName.valueOf(tableName), family); HFileTestUtil.createHFile( conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family), QUALIFIER, from, to, 1000); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); String[] args = {dir.toString(), tableName}; try { loader.run(args); assertEquals(1000, util.countRows(table)); } finally { if (null != table) { table.close(); } } }
private void runIncrementalPELoad( Configuration conf, HTableDescriptor tableDescriptor, RegionLocator regionLocator, Path outDir) throws IOException, UnsupportedEncodingException, InterruptedException, ClassNotFoundException { Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator); FileOutputFormat.setOutputPath(job, outDir); assertFalse(util.getTestFileSystem().exists(outDir)); assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); assertTrue(job.waitForCompletion(true)); }
@Test public void testLoadTooMayHFiles() throws Exception { Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles"); FileSystem fs = util.getTestFileSystem(); dir = dir.makeQualified(fs); Path familyDir = new Path(dir, Bytes.toString(FAMILY)); byte[] from = Bytes.toBytes("begin"); byte[] to = Bytes.toBytes("end"); for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) { HFileTestUtil.createHFile( util.getConfiguration(), fs, new Path(familyDir, "hfile_" + i), FAMILY, QUALIFIER, from, to, 1000); } LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()); String[] args = {dir.toString(), "mytable_testLoadTooMayHFiles"}; try { loader.run(args); fail("Bulk loading too many files should fail"); } catch (IOException ie) { assertTrue( ie.getMessage() .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles")); } }
@Test public void testSplitStoreFile() throws IOException { Path dir = util.getDataTestDirOnTestFS("testSplitHFile"); FileSystem fs = util.getTestFileSystem(); Path testIn = new Path(dir, "testhfile"); HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY); HFileTestUtil.createHFile( util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000); Path bottomOut = new Path(dir, "bottom.out"); Path topOut = new Path(dir, "top.out"); LoadIncrementalHFiles.splitStoreFile( util.getConfiguration(), testIn, familyDesc, Bytes.toBytes("ggg"), bottomOut, topOut); int rowCount = verifyHFile(bottomOut); rowCount += verifyHFile(topOut); assertEquals(1000, rowCount); }
@Test public void bulkLoadHFileTest() throws Exception { String testName = TestRegionObserverInterface.class.getName() + ".bulkLoadHFileTest"; TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + ".bulkLoadHFileTest"); Configuration conf = util.getConfiguration(); HTable table = util.createTable(tableName, new byte[][] {A, B, C}); try { verifyMethodResult( SimpleRegionObserver.class, new String[] {"hadPreBulkLoadHFile", "hadPostBulkLoadHFile"}, tableName, new Boolean[] {false, false}); FileSystem fs = util.getTestFileSystem(); final Path dir = util.getDataTestDirOnTestFS(testName).makeQualified(fs); Path familyDir = new Path(dir, Bytes.toString(A)); createHFile(util.getConfiguration(), fs, new Path(familyDir, Bytes.toString(A)), A, A); // Bulk load new LoadIncrementalHFiles(conf).doBulkLoad(dir, new HTable(conf, tableName)); verifyMethodResult( SimpleRegionObserver.class, new String[] {"hadPreBulkLoadHFile", "hadPostBulkLoadHFile"}, tableName, new Boolean[] {true, true}); } finally { util.deleteTable(tableName); table.close(); } }
public void doAnAction() throws Exception { long iteration = numBulkLoads.getAndIncrement(); Path dir = UTIL.getDataTestDirOnTestFS(String.format("bulkLoad_%08d", iteration)); // create HFiles for different column families FileSystem fs = UTIL.getTestFileSystem(); byte[] val = Bytes.toBytes(String.format("%010d", iteration)); final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>(NUM_CFS); for (int i = 0; i < NUM_CFS; i++) { Path hfile = new Path(dir, family(i)); byte[] fam = Bytes.toBytes(family(i)); createHFile(fs, hfile, fam, QUAL, val, 1000); famPaths.add(new Pair<>(fam, hfile.toString())); } // bulk load HFiles final ClusterConnection conn = (ClusterConnection) UTIL.getAdmin().getConnection(); RegionServerCallable<Void> callable = new RegionServerCallable<Void>(conn, tableName, Bytes.toBytes("aaa")) { @Override public Void call(int callTimeout) throws Exception { LOG.debug( "Going to connect to server " + getLocation() + " for row " + Bytes.toStringBinary(getRow())); byte[] regionName = getLocation().getRegionInfo().getRegionName(); BulkLoadHFileRequest request = RequestConverter.buildBulkLoadHFileRequest(famPaths, regionName, true); getStub().bulkLoadHFile(null, request); return null; } }; RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(conf); RpcRetryingCaller<Void> caller = factory.<Void>newCaller(); caller.callWithRetries(callable, Integer.MAX_VALUE); // Periodically do compaction to reduce the number of open file handles. if (numBulkLoads.get() % 5 == 0) { // 5 * 50 = 250 open file handles! callable = new RegionServerCallable<Void>(conn, tableName, Bytes.toBytes("aaa")) { @Override public Void call(int callTimeout) throws Exception { LOG.debug( "compacting " + getLocation() + " for row " + Bytes.toStringBinary(getRow())); AdminProtos.AdminService.BlockingInterface server = conn.getAdmin(getLocation().getServerName()); CompactRegionRequest request = RequestConverter.buildCompactRegionRequest( getLocation().getRegionInfo().getRegionName(), true, null); server.compactRegion(null, request); numCompactions.incrementAndGet(); return null; } }; caller.callWithRetries(callable, Integer.MAX_VALUE); } }
private void runTest( String testName, HTableDescriptor htd, BloomType bloomType, boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges) throws Exception { Path dir = util.getDataTestDirOnTestFS(testName); FileSystem fs = util.getTestFileSystem(); dir = dir.makeQualified(fs); Path familyDir = new Path(dir, Bytes.toString(FAMILY)); int hfileIdx = 0; for (byte[][] range : hfileRanges) { byte[] from = range[0]; byte[] to = range[1]; HFileTestUtil.createHFile( util.getConfiguration(), fs, new Path(familyDir, "hfile_" + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000); } int expectedRows = hfileIdx * 1000; if (preCreateTable) { util.getHBaseAdmin().createTable(htd, tableSplitKeys); } final TableName tableName = htd.getTableName(); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(util.getConfiguration()); String[] args = {dir.toString(), tableName.toString()}; loader.run(args); Table table = new HTable(util.getConfiguration(), tableName); try { assertEquals(expectedRows, util.countRows(table)); } finally { table.close(); } // verify staging folder has been cleaned up Path stagingBasePath = SecureBulkLoadUtil.getBaseStagingDir(util.getConfiguration()); if (fs.exists(stagingBasePath)) { FileStatus[] files = fs.listStatus(stagingBasePath); for (FileStatus file : files) { assertTrue( "Folder=" + file.getPath() + " is not cleaned up.", file.getPath().getName() != "DONOTERASE"); } } util.deleteTable(tableName); }
@BeforeClass public static void setUpBeforeClass() throws Exception { CONF = TEST_UTIL.getConfiguration(); TEST_UTIL.startMiniDFSCluster(1); CLUSTER = TEST_UTIL.getDFSCluster(); FS = CLUSTER.getFileSystem(); DIR = TEST_UTIL.getDataTestDirOnTestFS("TestDurability"); FSUtils.setRootDir(CONF, DIR); }
/** Run small MR job. */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testWritingPEData() throws Exception { Configuration conf = util.getConfiguration(); Path testDir = util.getDataTestDirOnTestFS("testWritingPEData"); FileSystem fs = testDir.getFileSystem(conf); // Set down this value or we OOME in eclipse. conf.setInt("mapreduce.task.io.sort.mb", 20); // Write a few files. conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024); Job job = new Job(conf, "testWritingPEData"); setupRandomGeneratorMapper(job); // This partitioner doesn't work well for number keys but using it anyways // just to demonstrate how to configure it. byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; Arrays.fill(startKey, (byte) 0); Arrays.fill(endKey, (byte) 0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class); // Set start and end rows for partitioner. SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat2.class); job.setNumReduceTasks(4); job.getConfiguration() .setStrings( "io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); FileOutputFormat.setOutputPath(job, testDir); assertTrue(job.waitForCompletion(false)); FileStatus[] files = fs.listStatus(testDir); assertTrue(files.length > 0); }
/** * Write a random data file and a non-file in a dir with a valid family name but not part of the * table families. we should we able to bulkload without getting the unmatched family exception. * HBASE-13037/HBASE-13227 */ private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception { Path dir = util.getDataTestDirOnTestFS(tableName); FileSystem fs = util.getTestFileSystem(); dir = dir.makeQualified(fs); Path familyDir = new Path(dir, Bytes.toString(FAMILY)); HFileTestUtil.createHFile( util.getConfiguration(), fs, new Path(familyDir, "hfile_0"), FAMILY, QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500); createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024); final String NON_FAMILY_FOLDER = "_logs"; Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER); fs.mkdirs(nonFamilyDir); fs.mkdirs(new Path(nonFamilyDir, "non-file")); createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024); Table table = null; try { if (preCreateTable) { table = util.createTable(TableName.valueOf(tableName), FAMILY); } else { table = util.getConnection().getTable(TableName.valueOf(tableName)); } final String[] args = {dir.toString(), tableName}; new LoadIncrementalHFiles(util.getConfiguration()).run(args); assertEquals(500, util.countRows(table)); } finally { if (table != null) { table.close(); } fs.delete(dir, true); } }
/** * Run an ImportTsv job and perform basic validation on the results. Returns the ImportTsv <code> * Tool</code> instance so that other tests can inspect it for further validation as necessary. * This method is static to insure non-reliance on instance's util/conf facilities. * * @param args Any arguments to pass BEFORE inputFile path is appended. * @param dataAvailable * @return The Tool instance used to run the test. */ private Tool doMROnTableTest( HBaseTestingUtility util, String family, String data, String[] args, int valueMultiplier, boolean dataAvailable) throws Exception { String table = args[args.length - 1]; Configuration conf = new Configuration(util.getConfiguration()); // populate input file FileSystem fs = FileSystem.get(conf); Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat")); FSDataOutputStream op = fs.create(inputPath, true); op.write(Bytes.toBytes(data)); op.close(); LOG.debug(String.format("Wrote test data to file: %s", inputPath)); if (conf.getBoolean(FORCE_COMBINER_CONF, true)) { LOG.debug("Forcing combiner."); conf.setInt("mapreduce.map.combine.minspills", 1); } // run the import List<String> argv = new ArrayList<String>(Arrays.asList(args)); argv.add(inputPath.toString()); Tool tool = new ImportTsv(); LOG.debug("Running ImportTsv with arguments: " + argv); assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args))); validateTable(conf, TableName.valueOf(table), family, valueMultiplier, dataAvailable); if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) { LOG.debug("Deleting test subdirectory"); util.cleanupDataTestDirOnTestFS(table); } return tool; }
@Override public int run(String[] args) throws Exception { Path rootRegionDir = null; int numThreads = 1; long numIterations = 1000000; int numFamilies = 1; int syncInterval = 0; boolean noSync = false; boolean verify = false; boolean verbose = false; boolean cleanup = true; boolean noclosefs = false; long roll = Long.MAX_VALUE; boolean compress = false; String cipher = null; int numRegions = 1; String spanReceivers = getConf().get("hbase.trace.spanreceiver.classes"); boolean trace = spanReceivers != null && !spanReceivers.isEmpty(); double traceFreq = 1.0; // Process command line args for (int i = 0; i < args.length; i++) { String cmd = args[i]; try { if (cmd.equals("-threads")) { numThreads = Integer.parseInt(args[++i]); } else if (cmd.equals("-iterations")) { numIterations = Long.parseLong(args[++i]); } else if (cmd.equals("-path")) { rootRegionDir = new Path(args[++i]); } else if (cmd.equals("-families")) { numFamilies = Integer.parseInt(args[++i]); } else if (cmd.equals("-qualifiers")) { numQualifiers = Integer.parseInt(args[++i]); } else if (cmd.equals("-keySize")) { keySize = Integer.parseInt(args[++i]); } else if (cmd.equals("-valueSize")) { valueSize = Integer.parseInt(args[++i]); } else if (cmd.equals("-syncInterval")) { syncInterval = Integer.parseInt(args[++i]); } else if (cmd.equals("-nosync")) { noSync = true; } else if (cmd.equals("-verify")) { verify = true; } else if (cmd.equals("-verbose")) { verbose = true; } else if (cmd.equals("-nocleanup")) { cleanup = false; } else if (cmd.equals("-noclosefs")) { noclosefs = true; } else if (cmd.equals("-roll")) { roll = Long.parseLong(args[++i]); } else if (cmd.equals("-compress")) { compress = true; } else if (cmd.equals("-encryption")) { cipher = args[++i]; } else if (cmd.equals("-regions")) { numRegions = Integer.parseInt(args[++i]); } else if (cmd.equals("-traceFreq")) { traceFreq = Double.parseDouble(args[++i]); } else if (cmd.equals("-h")) { printUsageAndExit(); } else if (cmd.equals("--help")) { printUsageAndExit(); } else { System.err.println("UNEXPECTED: " + cmd); printUsageAndExit(); } } catch (Exception e) { printUsageAndExit(); } } if (compress) { Configuration conf = getConf(); conf.setBoolean(HConstants.ENABLE_WAL_COMPRESSION, true); } if (cipher != null) { // Set up WAL for encryption Configuration conf = getConf(); conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName()); conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase"); conf.setClass( "hbase.regionserver.hlog.reader.impl", SecureProtobufLogReader.class, WAL.Reader.class); conf.setClass( "hbase.regionserver.hlog.writer.impl", SecureProtobufLogWriter.class, Writer.class); conf.setBoolean(HConstants.ENABLE_WAL_ENCRYPTION, true); conf.set(HConstants.CRYPTO_WAL_ALGORITHM_CONF_KEY, cipher); } if (numThreads < numRegions) { LOG.warn("Number of threads is less than the number of regions; some regions will sit idle."); } // Internal config. goes off number of threads; if more threads than handlers, stuff breaks. // In regionserver, number of handlers == number of threads. getConf().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, numThreads); // Run WAL Performance Evaluation // First set the fs from configs. In case we are on hadoop1 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf())); FileSystem fs = FileSystem.get(getConf()); LOG.info("FileSystem: " + fs); SpanReceiverHost receiverHost = trace ? SpanReceiverHost.getInstance(getConf()) : null; TraceScope scope = Trace.startSpan("WALPerfEval", trace ? Sampler.ALWAYS : Sampler.NEVER); try { if (rootRegionDir == null) { rootRegionDir = TEST_UTIL.getDataTestDirOnTestFS("WALPerformanceEvaluation"); } rootRegionDir = rootRegionDir.makeQualified(fs); cleanRegionRootDir(fs, rootRegionDir); FSUtils.setRootDir(getConf(), rootRegionDir); final WALFactory wals = new WALFactory(getConf(), null, "wals"); final HRegion[] regions = new HRegion[numRegions]; final Runnable[] benchmarks = new Runnable[numRegions]; final MockRegionServerServices mockServices = new MockRegionServerServices(getConf()); final LogRoller roller = new LogRoller(mockServices, mockServices); Threads.setDaemonThreadRunning(roller.getThread(), "WALPerfEval.logRoller"); try { for (int i = 0; i < numRegions; i++) { // Initialize Table Descriptor // a table per desired region means we can avoid carving up the key space final HTableDescriptor htd = createHTableDescriptor(i, numFamilies); regions[i] = openRegion(fs, rootRegionDir, htd, wals, roll, roller); benchmarks[i] = Trace.wrap( new WALPutBenchmark( regions[i], htd, numIterations, noSync, syncInterval, traceFreq)); } ConsoleReporter.enable(this.metrics, 30, TimeUnit.SECONDS); long putTime = runBenchmark(benchmarks, numThreads); logBenchmarkResult( "Summary: threads=" + numThreads + ", iterations=" + numIterations + ", syncInterval=" + syncInterval, numIterations * numThreads, putTime); for (int i = 0; i < numRegions; i++) { if (regions[i] != null) { closeRegion(regions[i]); regions[i] = null; } } if (verify) { LOG.info("verifying written log entries."); Path dir = new Path( FSUtils.getRootDir(getConf()), DefaultWALProvider.getWALDirectoryName("wals")); long editCount = 0; FileStatus[] fsss = fs.listStatus(dir); if (fsss.length == 0) throw new IllegalStateException("No WAL found"); for (FileStatus fss : fsss) { Path p = fss.getPath(); if (!fs.exists(p)) throw new IllegalStateException(p.toString()); editCount += verify(wals, p, verbose); } long expected = numIterations * numThreads; if (editCount != expected) { throw new IllegalStateException("Counted=" + editCount + ", expected=" + expected); } } } finally { mockServices.stop("test clean up."); for (int i = 0; i < numRegions; i++) { if (regions[i] != null) { closeRegion(regions[i]); } } if (null != roller) { LOG.info("shutting down log roller."); Threads.shutdown(roller.getThread()); } wals.shutdown(); // Remove the root dir for this test region if (cleanup) cleanRegionRootDir(fs, rootRegionDir); } } finally { // We may be called inside a test that wants to keep on using the fs. if (!noclosefs) fs.close(); scope.close(); if (receiverHost != null) receiverHost.closeReceivers(); } return (0); }
/** * Test that {@link HFileOutputFormat2} RecordWriter uses compression and bloom filter settings * from the column family descriptor */ @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test public void testColumnFamilySettings() throws Exception { Configuration conf = new Configuration(this.util.getConfiguration()); RecordWriter<ImmutableBytesWritable, Cell> writer = null; TaskAttemptContext context = null; Path dir = util.getDataTestDir("testColumnFamilySettings"); // Setup table descriptor Table table = Mockito.mock(Table.class); RegionLocator regionLocator = Mockito.mock(RegionLocator.class); HTableDescriptor htd = new HTableDescriptor(TABLE_NAME); Mockito.doReturn(htd).when(table).getTableDescriptor(); for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) { htd.addFamily(hcd); } // set up the table to return some mock keys setupMockStartKeys(regionLocator); try { // partial map red setup to get an operational writer for testing // We turn off the sequence file compression, because DefaultCodec // pollutes the GZip codec pool with an incompatible compressor. conf.set("io.seqfile.compression.type", "NONE"); conf.set("hbase.fs.tmp.dir", dir.toString()); // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); Job job = new Job(conf, "testLocalMRIncrementalLoad"); job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); setupRandomGeneratorMapper(job); HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); FileOutputFormat.setOutputPath(job, dir); context = createTestTaskAttemptContext(job); HFileOutputFormat2 hof = new HFileOutputFormat2(); writer = hof.getRecordWriter(context); // write out random rows writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); writer.close(context); // Make sure that a directory was created for every CF FileSystem fs = dir.getFileSystem(conf); // commit so that the filesystem has one directory per column family hof.getOutputCommitter(context).commitTask(context); hof.getOutputCommitter(context).commitJob(context); FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); assertEquals(htd.getFamilies().size(), families.length); for (FileStatus f : families) { String familyStr = f.getPath().getName(); HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); // verify that the compression on this file matches the configured // compression Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf); Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY); if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); assertEquals( "Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); assertEquals( "Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); } } finally { dir.getFileSystem(conf).delete(dir, true); } }
/** * This test is to test the scenario happened in HBASE-6901. All files are bulk loaded and * excluded from minor compaction. Without the fix of HBASE-6901, an * ArrayIndexOutOfBoundsException will be thrown. */ @Ignore("Flakey: See HBASE-9051") @Test public void testExcludeAllFromMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); util.startMiniCluster(); try (Connection conn = ConnectionFactory.createConnection(); Admin admin = conn.getAdmin(); Table table = util.createTable(TABLE_NAME, FAMILIES); RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) { final FileSystem fs = util.getDFSCluster().getFileSystem(); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = new Path( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path( admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0]))); assertEquals(0, fs.listStatus(storePath).length); // Generate two bulk load files conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); for (int i = 0; i < 2; i++) { Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); runIncrementalPELoad( conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator); } // Ensure data shows up int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { @Override public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniCluster(); } }
@Override public int run(String[] args) throws Exception { Path rootRegionDir = null; int numThreads = 1; long numIterations = 10000; int numFamilies = 1; boolean noSync = false; boolean verify = false; boolean verbose = false; boolean cleanup = true; boolean noclosefs = false; long roll = Long.MAX_VALUE; // Process command line args for (int i = 0; i < args.length; i++) { String cmd = args[i]; try { if (cmd.equals("-threads")) { numThreads = Integer.parseInt(args[++i]); } else if (cmd.equals("-iterations")) { numIterations = Long.parseLong(args[++i]); } else if (cmd.equals("-path")) { rootRegionDir = new Path(args[++i]); } else if (cmd.equals("-families")) { numFamilies = Integer.parseInt(args[++i]); } else if (cmd.equals("-qualifiers")) { numQualifiers = Integer.parseInt(args[++i]); } else if (cmd.equals("-keySize")) { keySize = Integer.parseInt(args[++i]); } else if (cmd.equals("-valueSize")) { valueSize = Integer.parseInt(args[++i]); } else if (cmd.equals("-nosync")) { noSync = true; } else if (cmd.equals("-verify")) { verify = true; } else if (cmd.equals("-verbose")) { verbose = true; } else if (cmd.equals("-nocleanup")) { cleanup = false; } else if (cmd.equals("-noclosefs")) { noclosefs = true; } else if (cmd.equals("-roll")) { roll = Long.parseLong(args[++i]); } else if (cmd.equals("-h")) { printUsageAndExit(); } else if (cmd.equals("--help")) { printUsageAndExit(); } else { System.err.println("UNEXPECTED: " + cmd); printUsageAndExit(); } } catch (Exception e) { printUsageAndExit(); } } // Run HLog Performance Evaluation // First set the fs from configs. In case we are on hadoop1 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf())); FileSystem fs = FileSystem.get(getConf()); LOG.info("FileSystem: " + fs); try { if (rootRegionDir == null) { rootRegionDir = TEST_UTIL.getDataTestDirOnTestFS("HLogPerformanceEvaluation"); } rootRegionDir = rootRegionDir.makeQualified(fs); cleanRegionRootDir(fs, rootRegionDir); // Initialize Table Descriptor HTableDescriptor htd = createHTableDescriptor(numFamilies); final long whenToRoll = roll; HLog hlog = new FSHLog(fs, rootRegionDir, "wals", getConf()) { int appends = 0; @Override protected void doWrite( HRegionInfo info, HLogKey logKey, WALEdit logEdit, HTableDescriptor htd) throws IOException { this.appends++; if (this.appends % whenToRoll == 0) { LOG.info("Rolling after " + appends + " edits"); rollWriter(); } super.doWrite(info, logKey, logEdit, htd); }; }; hlog.rollWriter(); HRegion region = null; try { region = openRegion(fs, rootRegionDir, htd, hlog); long putTime = runBenchmark(new HLogPutBenchmark(region, htd, numIterations, noSync), numThreads); logBenchmarkResult( "Summary: threads=" + numThreads + ", iterations=" + numIterations, numIterations * numThreads, putTime); if (region != null) { closeRegion(region); region = null; } if (verify) { Path dir = ((FSHLog) hlog).getDir(); long editCount = 0; FileStatus[] fsss = fs.listStatus(dir); if (fsss.length == 0) throw new IllegalStateException("No WAL found"); for (FileStatus fss : fsss) { Path p = fss.getPath(); if (!fs.exists(p)) throw new IllegalStateException(p.toString()); editCount += verify(p, verbose); } long expected = numIterations * numThreads; if (editCount != expected) { throw new IllegalStateException("Counted=" + editCount + ", expected=" + expected); } } } finally { if (region != null) closeRegion(region); // Remove the root dir for this test region if (cleanup) cleanRegionRootDir(fs, rootRegionDir); } } finally { // We may be called inside a test that wants to keep on using the fs. if (!noclosefs) fs.close(); } return (0); }
private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); int hostCount = 1; int regionNum = 5; if (shouldKeepLocality) { // We should change host count higher than hdfs replica count when MiniHBaseCluster supports // explicit hostnames parameter just like MiniDFSCluster does. hostCount = 3; regionNum = 20; } byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); String[] hostnames = new String[hostCount]; for (int i = 0; i < hostCount; ++i) { hostnames[i] = "datanode_" + i; } util.startMiniCluster(1, hostCount, hostnames); Table table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); try (RegionLocator r = util.getConnection().getRegionLocator(TABLE_NAME); Admin admin = util.getConnection().getAdmin(); ) { assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = r.getStartKeys().length; assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); // Generate the bulk load files runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (util.getConnection().getRegionLocator(TABLE_NAME).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Check region locality HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) { hbd.add(region.getHDFSBlocksDistribution()); } for (String hostname : hostnames) { float locality = hbd.getBlockLocalityIndex(hostname); LOG.info("locality of [" + hostname + "]: " + locality); assertEquals(100, (int) (locality * 100)); } // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { testDir.getFileSystem(conf).delete(testDir, true); util.deleteTable(TABLE_NAME); util.shutdownMiniCluster(); } }
@Test public void testExcludeMinorCompaction() throws Exception { Configuration conf = util.getConfiguration(); conf.setInt("hbase.hstore.compaction.min", 2); generateRandomStartKeys(5); try { util.startMiniCluster(); Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction"); final FileSystem fs = util.getDFSCluster().getFileSystem(); Admin admin = util.getHBaseAdmin(); HTable table = util.createTable(TABLE_NAME, FAMILIES); assertEquals("Should start with empty table", 0, util.countRows(table)); // deep inspection: get the StoreFile dir final Path storePath = HStore.getStoreHomedir( FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), admin.getTableRegions(TABLE_NAME).get(0), FAMILIES[0]); assertEquals(0, fs.listStatus(storePath).length); // put some data in it and flush to create a storefile Put p = new Put(Bytes.toBytes("test")); p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1")); table.put(p); admin.flush(TABLE_NAME); assertEquals(1, util.countRows(table)); quickPoll( new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); // Generate a bulk load file with more rows conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true); util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table, testDir); // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows + 1, util.countRows(table)); // should have a second StoreFile now assertEquals(2, fs.listStatus(storePath).length); // minor compactions shouldn't get rid of the file admin.compact(TABLE_NAME); try { quickPoll( new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); throw new IOException("SF# = " + fs.listStatus(storePath).length); } catch (AssertionError ae) { // this is expected behavior } // a major compaction should work though admin.majorCompact(TABLE_NAME); quickPoll( new Callable<Boolean>() { public Boolean call() throws Exception { return fs.listStatus(storePath).length == 1; } }, 5000); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
private void doIncrementalLoadTest(boolean shouldChangeRegions) throws Exception { util = new HBaseTestingUtility(); Configuration conf = util.getConfiguration(); byte[][] splitKeys = generateRandomSplitKeys(4); util.startMiniCluster(); try { HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys); Admin admin = table.getConnection().getAdmin(); Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); assertEquals("Should start with empty table", 0, util.countRows(table)); int numRegions = -1; try (RegionLocator r = table.getRegionLocator()) { numRegions = r.getStartKeys().length; } assertEquals("Should make 5 regions", numRegions, 5); // Generate the bulk load files util.startMiniMapReduceCluster(); runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir); // This doesn't write into the table, just makes files assertEquals("HFOF should not touch actual table", 0, util.countRows(table)); // Make sure that a directory was created for every CF int dir = 0; for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) { for (byte[] family : FAMILIES) { if (Bytes.toString(family).equals(f.getPath().getName())) { ++dir; } } } assertEquals("Column family not found in FS.", FAMILIES.length, dir); // handle the split case if (shouldChangeRegions) { LOG.info("Changing regions in table"); admin.disableTable(table.getName()); while (util.getMiniHBaseCluster() .getMaster() .getAssignmentManager() .getRegionStates() .isRegionsInTransition()) { Threads.sleep(200); LOG.info("Waiting on table to finish disabling"); } util.deleteTable(table.getName()); byte[][] newSplitKeys = generateRandomSplitKeys(14); table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys); while (table.getRegionLocator().getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) { Thread.sleep(200); LOG.info("Waiting for new region assignment to happen"); } } // Perform the actual load new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table); // Ensure data shows up int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; assertEquals( "LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table)); Scan scan = new Scan(); ResultScanner results = table.getScanner(scan); for (Result res : results) { assertEquals(FAMILIES.length, res.rawCells().length); Cell first = res.rawCells()[0]; for (Cell kv : res.rawCells()) { assertTrue(CellUtil.matchingRow(first, kv)); assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); } } results.close(); String tableDigestBefore = util.checksumRows(table); // Cause regions to reopen admin.disableTable(TABLE_NAME); while (!admin.isTableDisabled(TABLE_NAME)) { Thread.sleep(200); LOG.info("Waiting for table to disable"); } admin.enableTable(TABLE_NAME); util.waitTableAvailable(TABLE_NAME); assertEquals( "Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table)); } finally { util.shutdownMiniMapReduceCluster(); util.shutdownMiniCluster(); } }
@Test(timeout = 30000) public void testBulkLoad() throws IOException { // Create table then get the single region for our new table. LOG.debug("Creating test table"); HTableDescriptor hdt = HTU.createTableDescriptor("testBulkLoad"); hdt.setRegionReplication(NB_SERVERS); hdt.addCoprocessor(SlowMeCopro.class.getName()); Table table = HTU.createTable(hdt, new byte[][] {f}, HTU.getConfiguration()); // create hfiles to load. LOG.debug("Creating test data"); Path dir = HTU.getDataTestDirOnTestFS("testBulkLoad"); final int numRows = 10; final byte[] qual = Bytes.toBytes("qual"); final byte[] val = Bytes.toBytes("val"); final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>(); for (HColumnDescriptor col : hdt.getColumnFamilies()) { Path hfile = new Path(dir, col.getNameAsString()); TestHRegionServerBulkLoad.createHFile( HTU.getTestFileSystem(), hfile, col.getName(), qual, val, numRows); famPaths.add(new Pair<byte[], String>(col.getName(), hfile.toString())); } // bulk load HFiles LOG.debug("Loading test data"); @SuppressWarnings("deprecation") final HConnection conn = HTU.getHBaseAdmin().getConnection(); RegionServerCallable<Void> callable = new RegionServerCallable<Void>( conn, hdt.getTableName(), TestHRegionServerBulkLoad.rowkey(0)) { @Override public Void call(int timeout) throws Exception { LOG.debug( "Going to connect to server " + getLocation() + " for row " + Bytes.toStringBinary(getRow())); byte[] regionName = getLocation().getRegionInfo().getRegionName(); BulkLoadHFileRequest request = RequestConverter.buildBulkLoadHFileRequest(famPaths, regionName, true); getStub().bulkLoadHFile(null, request); return null; } }; RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(HTU.getConfiguration()); RpcRetryingCaller<Void> caller = factory.<Void>newCaller(); caller.callWithRetries(callable, 10000); // verify we can read them from the primary LOG.debug("Verifying data load"); for (int i = 0; i < numRows; i++) { byte[] row = TestHRegionServerBulkLoad.rowkey(i); Get g = new Get(row); Result r = table.get(g); Assert.assertFalse(r.isStale()); } // verify we can read them from the replica LOG.debug("Verifying replica queries"); try { SlowMeCopro.cdl.set(new CountDownLatch(1)); for (int i = 0; i < numRows; i++) { byte[] row = TestHRegionServerBulkLoad.rowkey(i); Get g = new Get(row); g.setConsistency(Consistency.TIMELINE); Result r = table.get(g); Assert.assertTrue(r.isStale()); } SlowMeCopro.cdl.get().countDown(); } finally { SlowMeCopro.cdl.get().countDown(); SlowMeCopro.sleepTime.set(0); } HTU.getHBaseAdmin().disableTable(hdt.getTableName()); HTU.deleteTable(hdt.getTableName()); }