@BeforeClass public static void setUpBeforeClass() throws Exception { // Start up our mini cluster on top of an 0.92 root.dir that has data from // a 0.92 hbase run -- it has a table with 100 rows in it -- and see if // we can migrate from 0.92 TEST_UTIL.startMiniZKCluster(); TEST_UTIL.startMiniDFSCluster(1); Path testdir = TEST_UTIL.getDataTestDir("TestMetaMigrationConvertToPB"); // Untar our test dir. File untar = untar(new File(testdir.toString())); // Now copy the untar up into hdfs so when we start hbase, we'll run from it. Configuration conf = TEST_UTIL.getConfiguration(); FsShell shell = new FsShell(conf); FileSystem fs = FileSystem.get(conf); // find where hbase will root itself, so we can copy filesystem there Path hbaseRootDir = TEST_UTIL.getDefaultRootDirPath(); if (!fs.isDirectory(hbaseRootDir.getParent())) { // mkdir at first fs.mkdirs(hbaseRootDir.getParent()); } doFsCommand(shell, new String[] {"-put", untar.toURI().toString(), hbaseRootDir.toString()}); // windows fix: tgz file has .META. directory renamed as -META- since the original is an illegal // name under windows. So we rename it back. See // src/test/data//TestMetaMigrationConvertingToPB.README and // https://issues.apache.org/jira/browse/HBASE-6821 doFsCommand( shell, new String[] { "-mv", new Path(hbaseRootDir, "-META-").toString(), new Path(hbaseRootDir, ".META.").toString() }); // See whats in minihdfs. doFsCommand(shell, new String[] {"-lsr", "/"}); TEST_UTIL.startMiniHBaseCluster(1, 1); // Assert we are running against the copied-up filesystem. The copied-up // rootdir should have had a table named 'TestTable' in it. Assert it // present. HTable t = new HTable(TEST_UTIL.getConfiguration(), TESTTABLE); ResultScanner scanner = t.getScanner(new Scan()); int count = 0; while (scanner.next() != null) { count++; } // Assert that we find all 100 rows that are in the data we loaded. If // so then we must have migrated it from 0.90 to 0.92. Assert.assertEquals(ROW_COUNT, count); scanner.close(); t.close(); }
@Before public void setUp() throws Exception { File tempDir = Files.createTempDir(); tempDir.deleteOnExit(); htu = HBaseTestingUtility.createLocalHTU(); try { htu.cleanupTestDir(); htu.startMiniZKCluster(); htu.startMiniHBaseCluster(1, 1); try { htu.deleteTable(Bytes.toBytes(tableName)); } catch (Exception e) { Log.info(" - no table " + tableName + " found"); } htu.createTable(Bytes.toBytes(tableName), colFam); dao = new Hbase1OffsetStore.Builder() .setHbaseConfiguration(htu.getConfiguration()) .setOffsetTable(tableName) .build(); } catch (Exception e1) { throw new RuntimeException(e1); } KOM = new KafkaOffsetManager.Builder() .setOffsetManager(dao) .setKafkaBrokerList("localhost:" + kafkaRule.kafkaBrokerPort()) .setGroupID(testGroupID) .setTopic(testTopicName) .build(); }
@Test public void testReducerNumEstimation() throws Exception { // Skip the test for Tez. Tez use a different mechanism. // Equivalent test is in TestTezAutoParallelism Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType())); // use the estimation Configuration conf = HBaseConfiguration.create(new Configuration()); HBaseTestingUtility util = new HBaseTestingUtility(conf); int clientPort = util.startMiniZKCluster().getClientPort(); util.startMiniHBaseCluster(1, 1); String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort)); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jc = jcc.compile(mrPlan, "Test"); Job job = jc.getWaitingJobs().get(0); long reducer = Math.min( (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0), 10); Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf()); // use the PARALLEL key word, it will override the estimated reducer number query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf()); final byte[] COLUMNFAMILY = Bytes.toBytes("pig"); util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY); // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as // hbase query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" + "b = group a by $0 ;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf()); util.deleteTable(Bytes.toBytesBinary("test_table")); // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster() // here instead. MiniHBaseCluster hbc = util.getHBaseCluster(); if (hbc != null) { hbc.shutdown(); hbc.join(); } util.shutdownMiniZKCluster(); }