Ejemplo n.º 1
0
  @Test
  public void testHostRank() throws Exception {

    if (System.getProperty("prop.mapred.job.tracker") != null) {
      if (LOG.isInfoEnabled())
        LOG.info("testHBaseInputOutput: Ignore this test if not local mode.");
      return;
    }

    File jarTest = new File(System.getProperty("prop.jarLocation"));
    if (!jarTest.exists()) {
      fail(
          "Could not find Giraph jar at "
              + "location specified by 'prop.jarLocation'. "
              + "Make sure you built the main Giraph artifact?.");
    }

    MiniHBaseCluster cluster = null;
    MiniZooKeeperCluster zkCluster = null;
    FileSystem fs = null;

    try {
      // using the restart method allows us to avoid having the hbase
      // root directory overwritten by /home/$username
      zkCluster = testUtil.startMiniZKCluster();
      testUtil.restartHBaseCluster(2);
      cluster = testUtil.getMiniHBaseCluster();

      final byte[] OL_BYTES = Bytes.toBytes("ol");
      final byte[] S_BYTES = Bytes.toBytes("s");
      final byte[] METADATA_BYTES = Bytes.toBytes("mtdt");
      final byte[] HR_BYTES = Bytes.toBytes("_hr_");
      final byte[] TAB = Bytes.toBytes(TABLE_NAME);

      Configuration conf = cluster.getConfiguration();
      HTableDescriptor desc = new HTableDescriptor(TAB);
      desc.addFamily(new HColumnDescriptor(OL_BYTES));
      desc.addFamily(new HColumnDescriptor(S_BYTES));
      desc.addFamily(new HColumnDescriptor(METADATA_BYTES));
      HBaseAdmin hbaseAdmin = new HBaseAdmin(conf);
      if (hbaseAdmin.isTableAvailable(TABLE_NAME)) {
        hbaseAdmin.disableTable(TABLE_NAME);
        hbaseAdmin.deleteTable(TABLE_NAME);
      }
      hbaseAdmin.createTable(desc);

      /**
       * Enter the initial data (a,b), (b,c), (a,c) a = 1.0 - google b = 1.0 - yahoo c = 1.0 - bing
       */
      HTable table = new HTable(conf, TABLE_NAME);

      Put p1 = new Put(Bytes.toBytes("com.google.www"));
      p1.add(OL_BYTES, Bytes.toBytes("www.yahoo.com"), Bytes.toBytes("ab"));

      Put p2 = new Put(Bytes.toBytes("com.google.www"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("ac"));
      p2.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("invalid1"));
      p2.add(OL_BYTES, Bytes.toBytes("www.google.com"), Bytes.toBytes("invalid2"));

      Put p3 = new Put(Bytes.toBytes("com.yahoo.www"));
      p3.add(OL_BYTES, Bytes.toBytes("www.bing.com"), Bytes.toBytes("bc"));
      // p3.add(OL_BYTES, Bytes.toBytes(""), Bytes.toBytes("invalid4"));

      Put p4 = new Put(Bytes.toBytes("com.bing.www"));
      // TODO: Handle below case. use apache isValid method.
      p4.add(OL_BYTES, Bytes.toBytes("http://invalidurl"), Bytes.toBytes("invalid5"));
      p4.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      Put p5 = new Put(Bytes.toBytes("dummy"));
      p5.add(S_BYTES, S_BYTES, Bytes.toBytes(10.0d));

      table.put(p1);
      table.put(p2);
      table.put(p3);
      table.put(p4);
      table.put(p5);

      // Set Giraph configuration
      // now operate over HBase using Vertex I/O formats
      conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
      conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE_NAME);

      // Start the giraph job
      GiraphJob giraphJob = new GiraphJob(conf, BspCase.getCallingMethodName());
      GiraphConfiguration giraphConf = giraphJob.getConfiguration();
      giraphConf.setZooKeeperConfiguration(cluster.getMaster().getZooKeeper().getQuorum());
      setupConfiguration(giraphJob);
      giraphConf.setComputationClass(LinkRankComputation.class);
      giraphConf.setMasterComputeClass(LinkRankVertexMasterCompute.class);
      giraphConf.setOutEdgesClass(ByteArrayEdges.class);
      giraphConf.setVertexInputFormatClass(Nutch2HostInputFormat.class);
      giraphConf.setVertexOutputFormatClass(Nutch2HostOutputFormat.class);
      giraphConf.setInt("giraph.linkRank.superstepCount", 10);
      giraphConf.setInt("giraph.linkRank.scale", 10);
      giraphConf.set("giraph.linkRank.family", "mtdt");
      giraphConf.set("giraph.linkRank.qualifier", "_hr_");
      giraphConf.setVertexInputFilterClass(HostRankVertexFilter.class);
      assertTrue(giraphJob.run(true));

      if (LOG.isInfoEnabled()) LOG.info("Giraph job successful. Checking output qualifier.");

      /** Check the results * */
      Result result;
      String key;
      byte[] calculatedScoreByte;
      HashMap expectedValues = new HashMap<String, Double>();
      expectedValues.put("com.google.www", 1.3515060339386287d);
      expectedValues.put("com.yahoo.www", 4.144902009567587d);
      expectedValues.put("com.bing.www", 9.063893290511482d);

      for (Object keyObject : expectedValues.keySet()) {
        key = keyObject.toString();
        result = table.get(new Get(key.getBytes()));
        calculatedScoreByte = result.getValue(METADATA_BYTES, HR_BYTES);
        assertNotNull(calculatedScoreByte);
        assertTrue(calculatedScoreByte.length > 0);
        Assert.assertEquals(
            "Scores are not the same",
            (Double) expectedValues.get(key),
            Bytes.toDouble(calculatedScoreByte),
            DELTA);
      }
    } finally {
      if (cluster != null) {
        cluster.shutdown();
      }
      if (zkCluster != null) {
        zkCluster.shutdown();
      }
      // clean test files
      if (fs != null) {
        fs.delete(hbaseRootdir);
      }
    }
  }
Ejemplo n.º 2
0
  @Test
  public void testReducerNumEstimation() throws Exception {
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);

    String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc = jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer =
        Math.min(
            (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as
    // hbase
    query =
        "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = group a by $0 ;"
            + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
      hbc.shutdown();
      hbc.join();
    }
    util.shutdownMiniZKCluster();
  }
Ejemplo n.º 3
0
  /** This tests retaining assignments on a cluster restart */
  @Test(timeout = 300000)
  public void testRetainAssignmentOnRestart() throws Exception {
    UTIL.startMiniCluster(2);
    while (!UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
      Threads.sleep(1);
    }
    // Turn off balancer
    UTIL.getMiniHBaseCluster().getMaster().getMasterRpcServices().synchronousBalanceSwitch(false);
    LOG.info("\n\nCreating tables");
    for (byte[] TABLE : TABLES) {
      UTIL.createTable(TABLE, FAMILY);
    }
    for (byte[] TABLE : TABLES) {
      UTIL.waitTableEnabled(TABLE);
    }

    HMaster master = UTIL.getMiniHBaseCluster().getMaster();
    UTIL.waitUntilNoRegionsInTransition(120000);

    // We don't have to use SnapshotOfRegionAssignmentFromMeta.
    // We use it here because AM used to use it to load all user region placements
    SnapshotOfRegionAssignmentFromMeta snapshot =
        new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection());
    snapshot.initialize();
    Map<HRegionInfo, ServerName> regionToRegionServerMap = snapshot.getRegionToRegionServerMap();

    MiniHBaseCluster cluster = UTIL.getHBaseCluster();
    List<JVMClusterUtil.RegionServerThread> threads = cluster.getLiveRegionServerThreads();
    assertEquals(2, threads.size());
    int[] rsPorts = new int[3];
    for (int i = 0; i < 2; i++) {
      rsPorts[i] = threads.get(i).getRegionServer().getServerName().getPort();
    }
    rsPorts[2] = cluster.getMaster().getServerName().getPort();
    for (ServerName serverName : regionToRegionServerMap.values()) {
      boolean found = false; // Test only, no need to optimize
      for (int k = 0; k < 3 && !found; k++) {
        found = serverName.getPort() == rsPorts[k];
      }
      assertTrue(found);
    }

    LOG.info("\n\nShutting down HBase cluster");
    cluster.shutdown();
    cluster.waitUntilShutDown();

    LOG.info("\n\nSleeping a bit");
    Thread.sleep(2000);

    LOG.info("\n\nStarting cluster the second time with the same ports");
    try {
      cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 4);
      master = cluster.startMaster().getMaster();
      for (int i = 0; i < 3; i++) {
        cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, rsPorts[i]);
        cluster.startRegionServer();
      }
    } finally {
      // Reset region server port so as not to conflict with other tests
      cluster.getConf().setInt(HConstants.REGIONSERVER_PORT, 0);
      cluster.getConf().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 2);
    }

    // Make sure live regionservers are on the same host/port
    List<ServerName> localServers = master.getServerManager().getOnlineServersList();
    assertEquals(4, localServers.size());
    for (int i = 0; i < 3; i++) {
      boolean found = false;
      for (ServerName serverName : localServers) {
        if (serverName.getPort() == rsPorts[i]) {
          found = true;
          break;
        }
      }
      assertTrue(found);
    }

    // Wait till master is initialized and all regions are assigned
    RegionStates regionStates = master.getAssignmentManager().getRegionStates();
    int expectedRegions = regionToRegionServerMap.size() + 1;
    while (!master.isInitialized()
        || regionStates.getRegionAssignments().size() != expectedRegions) {
      Threads.sleep(100);
    }

    snapshot = new SnapshotOfRegionAssignmentFromMeta(master.getShortCircuitConnection());
    snapshot.initialize();
    Map<HRegionInfo, ServerName> newRegionToRegionServerMap = snapshot.getRegionToRegionServerMap();
    assertEquals(regionToRegionServerMap.size(), newRegionToRegionServerMap.size());
    for (Map.Entry<HRegionInfo, ServerName> entry : newRegionToRegionServerMap.entrySet()) {
      if (TableName.NAMESPACE_TABLE_NAME.equals(entry.getKey().getTable())) continue;
      ServerName oldServer = regionToRegionServerMap.get(entry.getKey());
      ServerName currentServer = entry.getValue();
      assertEquals(oldServer.getHostAndPort(), currentServer.getHostAndPort());
      assertNotEquals(oldServer.getStartcode(), currentServer.getStartcode());
    }
  }