@Test public void testHostRank() throws Exception { if (System.getProperty("prop.mapred.job.tracker") != null) { if (LOG.isInfoEnabled()) LOG.info("testHBaseInputOutput: Ignore this test if not local mode."); return; } File jarTest = new File(System.getProperty("prop.jarLocation")); if (!jarTest.exists()) { fail( "Could not find Giraph jar at " + "location specified by 'prop.jarLocation'. " + "Make sure you built the main Giraph artifact?."); } MiniHBaseCluster cluster = null; MiniZooKeeperCluster zkCluster = null; FileSystem fs = null; try { // using the restart method allows us to avoid having the hbase // root directory overwritten by /home/$username zkCluster = testUtil.startMiniZKCluster(); testUtil.restartHBaseCluster(2); cluster = testUtil.getMiniHBaseCluster(); final byte[] OL_BYTES = Bytes.toBytes("ol"); final byte[] S_BYTES = Bytes.toBytes("s"); final byte[] METADATA_BYTES = Bytes.toBytes("mtdt"); final byte[] HR_BYTES = Bytes.toBytes("_hr_"); final byte[] TAB = Bytes.toBytes(TABLE_NAME); Configuration conf = cluster.getConfiguration(); HTableDescriptor desc = new HTableDescriptor(TAB); desc.addFamily(new HColumnDescriptor(OL_BYTES)); desc.addFamily(new HColumnDescriptor(S_BYTES)); desc.addFamily(new HColumnDescriptor(METADATA_BYTES)); HBaseAdmin hbaseAdmin = new HBaseAdmin(conf); if (hbaseAdmin.isTableAvailable(TABLE_NAME)) { hbaseAdmin.disableTable(TABLE_NAME); hbaseAdmin.deleteTable(TABLE_NAME); } hbaseAdmin.createTable(desc); /** * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing */ HTable table = new HTable(conf, TABLE_NAME); Put p1 = new Put(Bytes.toBytes("com.google.www")); p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab")); Put p2 = new Put(Bytes.toBytes("com.google.www")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac")); p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1")); p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2")); Put p3 = new Put(Bytes.toBytes("com.yahoo.www")); p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc")); // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4")); Put p4 = new Put(Bytes.toBytes("com.bing.www")); // TODO: Handle below case. use apache isValid method. p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5")); p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); Put p5 = new Put(Bytes.toBytes("dummy")); p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d)); table.put(p1); table.put(p2); table.put(p3); table.put(p4); table.put(p5); // Set Giraph configuration // now operate over HBase using Vertex I/O formats conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME); // Start the giraph job GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName()); GiraphConfiguration giraphConf = giraphJob.getConfiguration(); giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum()); setupConfiguration(giraphJob); giraphConf.setComputationClass(LinkRankComputation.class); giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class); giraphConf.setOutEdgesClass(ByteArrayEdges.class); giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class); giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class); giraphConf.setInt("giraph.linkRank.superstepCount", 10); giraphConf.setInt("giraph.linkRank.scale", 10); giraphConf.set("giraph.linkRank.family", "mtdt"); giraphConf.set("giraph.linkRank.qualifier", "_hr_"); giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class); assertTrue(giraphJob.run(true)); if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier."); /** Check the results * */ Result result; String key; byte[] calculatedScoreByte; HashMap expectedValues = new HashMap<String, Double>(); expectedValues.put("com.google.www", 1.3515060339386287d); expectedValues.put("com.yahoo.www", 4.144902009567587d); expectedValues.put("com.bing.www", 9.063893290511482d); for (Object keyObject : expectedValues.keySet()) { key = keyObject.toString(); result = table.get(new Get(key.getBytes())); calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES); assertNotNull(calculatedScoreByte); assertTrue(calculatedScoreByte.length > 0); Assert.assertEquals( "Scores are not the same", (Double) expectedValues.get(key), Bytes.toDouble(calculatedScoreByte), DELTA); } } finally { if (cluster != null) { cluster.shutdown(); } if (zkCluster != null) { zkCluster.shutdown(); } // clean test files if (fs != null) { fs.delete(hbaseRootdir); } } }
@Test public void testReducerNumEstimation() throws Exception { // Skip the test for Tez. Tez use a different mechanism. // Equivalent test is in TestTezAutoParallelism Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType())); // use the estimation Configuration conf = HBaseConfiguration.create(new Configuration()); HBaseTestingUtility util = new HBaseTestingUtility(conf); int clientPort = util.startMiniZKCluster().getClientPort(); util.startMiniHBaseCluster(1, 1); String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';"; PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties()); PhysicalPlan pp = Util.buildPp(ps, query); MROperPlan mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort)); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); JobControlCompiler jcc = new JobControlCompiler(pc, conf); JobControl jc = jcc.compile(mrPlan, "Test"); Job job = jc.getWaitingJobs().get(0); long reducer = Math.min( (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0), 10); Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf()); // use the PARALLEL key word, it will override the estimated reducer number query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf()); final byte[] COLUMNFAMILY = Bytes.toBytes("pig"); util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY); // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as // hbase query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" + "b = group a by $0 ;" + "store b into 'output';"; pp = Util.buildPp(ps, query); mrPlan = Util.buildMRPlan(pp, pc); pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100"); pc.getConf().setProperty("pig.exec.reducers.max", "10"); ConfigurationValidator.validatePigProperties(pc.getProperties()); conf = ConfigurationUtil.toConfiguration(pc.getProperties()); jcc = new JobControlCompiler(pc, conf); jc = jcc.compile(mrPlan, "Test"); job = jc.getWaitingJobs().get(0); Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf()); util.deleteTable(Bytes.toBytesBinary("test_table")); // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster() // here instead. MiniHBaseCluster hbc = util.getHBaseCluster(); if (hbc != null) { hbc.shutdown(); hbc.join(); } util.shutdownMiniZKCluster(); }
/** This tests retaining assignments on a cluster restart */ @Test(timeout = 300000) public void testRetainAssignmentOnRestart() throws Exception { UTIL.startMiniCluster(2); while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { Threads.sleep(1); } // Turn off balancer UTIL.getMiniHBaseCluster().getMaster().getMasterRpcServices().synchronousBalanceSwitch(false); LOG.info("\n\nCreating tables"); for (byte[] TABLE : TABLES) { UTIL.createTable(TABLE, FAMILY); } for (byte[] TABLE : TABLES) { UTIL.waitTableEnabled(TABLE); } HMaster master = UTIL.getMiniHBaseCluster().getMaster(); UTIL.waitUntilNoRegionsInTransition(120000); // We don't have to use SnapshotOfRegionAssignmentFromMeta. // We use it here because AM used to use it to load all user region placements SnapshotOfRegionAssignmentFromMeta snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection()); snapshot.initialize(); Map<HRegionInfo, ServerName> regionToRegionServerMap = snapshot.getRegionToRegionServerMap(); MiniHBaseCluster cluster = UTIL.getHBaseCluster(); List<JVMClusterUtil.RegionServerThread> threads = cluster.getLiveRegionServerThreads(); assertEquals(2, threads.size()); int[] rsPorts = new int[3]; for (int i = 0; i < 2; i++) { rsPorts[i] = threads.get(i).getRegionServer().getServerName().getPort(); } rsPorts[2] = cluster.getMaster().getServerName().getPort(); for (ServerName serverName : regionToRegionServerMap.values()) { boolean found = false; // Test only, no need to optimize for (int k = 0; k < 3 && !found; k++) { found = serverName.getPort() == rsPorts[k]; } assertTrue(found); } LOG.info("\n\nShutting down HBase cluster"); cluster.shutdown(); cluster.waitUntilShutDown(); LOG.info("\n\nSleeping a bit"); Thread.sleep(2000); LOG.info("\n\nStarting cluster the second time with the same ports"); try { cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 4); master = cluster.startMaster().getMaster(); for (int i = 0; i < 3; i++) { cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, rsPorts[i]); cluster.startRegionServer(); } } finally { // Reset region server port so as not to conflict with other tests cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, 0); cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 2); } // Make sure live regionservers are on the same host/port List<ServerName> localServers = master.getServerManager().getOnlineServersList(); assertEquals(4, localServers.size()); for (int i = 0; i < 3; i++) { boolean found = false; for (ServerName serverName : localServers) { if (serverName.getPort() == rsPorts[i]) { found = true; break; } } assertTrue(found); } // Wait till master is initialized and all regions are assigned RegionStates regionStates = master.getAssignmentManager().getRegionStates(); int expectedRegions = regionToRegionServerMap.size() + 1; while (!master.isInitialized() || regionStates.getRegionAssignments().size() != expectedRegions) { Threads.sleep(100); } snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection()); snapshot.initialize(); Map<HRegionInfo, ServerName> newRegionToRegionServerMap = snapshot.getRegionToRegionServerMap(); assertEquals(regionToRegionServerMap.size(), newRegionToRegionServerMap.size()); for (Map.Entry<HRegionInfo, ServerName> entry : newRegionToRegionServerMap.entrySet()) { if (TableName.NAMESPACE_TABLE_NAME.equals(entry.getKey().getTable())) continue; ServerName oldServer = regionToRegionServerMap.get(entry.getKey()); ServerName currentServer = entry.getValue(); assertEquals(oldServer.getHostAndPort(), currentServer.getHostAndPort()); assertNotEquals(oldServer.getStartcode(), currentServer.getStartcode()); } }