@BeforeClass
  public static void setUpBeforeClass() throws Exception {
    // Start up our mini cluster on top of an 0.92 root.dir that has data from
    // a 0.92 hbase run -- it has a table with 100 rows in it  -- and see if
    // we can migrate from 0.92
    TEST_UTIL.startMiniZKCluster();
    TEST_UTIL.startMiniDFSCluster(1);
    Path testdir = TEST_UTIL.getDataTestDir("TestMetaMigrationConvertToPB");
    // Untar our test dir.
    File untar = untar(new File(testdir.toString()));
    // Now copy the untar up into hdfs so when we start hbase, we'll run from it.
    Configuration conf = TEST_UTIL.getConfiguration();
    FsShell shell = new FsShell(conf);
    FileSystem fs = FileSystem.get(conf);
    // find where hbase will root itself, so we can copy filesystem there
    Path hbaseRootDir = TEST_UTIL.getDefaultRootDirPath();
    if (!fs.isDirectory(hbaseRootDir.getParent())) {
      // mkdir at first
      fs.mkdirs(hbaseRootDir.getParent());
    }
    doFsCommand(shell, new String[] {"-put", untar.toURI().toString(), hbaseRootDir.toString()});

    // windows fix: tgz file has .META. directory renamed as -META- since the original is an illegal
    // name under windows. So we rename it back. See
    // src/test/data//TestMetaMigrationConvertingToPB.README and
    // https://issues.apache.org/jira/browse/HBASE-6821
    doFsCommand(
        shell,
        new String[] {
          "-mv",
          new Path(hbaseRootDir, "-META-").toString(),
          new Path(hbaseRootDir, ".META.").toString()
        });
    // See whats in minihdfs.
    doFsCommand(shell, new String[] {"-lsr", "/"});
    TEST_UTIL.startMiniHBaseCluster(1, 1);
    // Assert we are running against the copied-up filesystem.  The copied-up
    // rootdir should have had a table named 'TestTable' in it.  Assert it
    // present.
    HTable t = new HTable(TEST_UTIL.getConfiguration(), TESTTABLE);
    ResultScanner scanner = t.getScanner(new Scan());
    int count = 0;
    while (scanner.next() != null) {
      count++;
    }
    // Assert that we find all 100 rows that are in the data we loaded.  If
    // so then we must have migrated it from 0.90 to 0.92.
    Assert.assertEquals(ROW_COUNT, count);
    scanner.close();
    t.close();
  }
  @Before
  public void setUp() throws Exception {

    File tempDir = Files.createTempDir();
    tempDir.deleteOnExit();

    htu = HBaseTestingUtility.createLocalHTU();
    try {
      htu.cleanupTestDir();

      htu.startMiniZKCluster();
      htu.startMiniHBaseCluster(1, 1);

      try {
        htu.deleteTable(Bytes.toBytes(tableName));
      } catch (Exception e) {
        Log.info(" - no table " + tableName + " found");
      }
      htu.createTable(Bytes.toBytes(tableName), colFam);

      dao =
          new Hbase1OffsetStore.Builder()
              .setHbaseConfiguration(htu.getConfiguration())
              .setOffsetTable(tableName)
              .build();

    } catch (Exception e1) {
      throw new RuntimeException(e1);
    }

    KOM =
        new KafkaOffsetManager.Builder()
            .setOffsetManager(dao)
            .setKafkaBrokerList("localhost:" + kafkaRule.kafkaBrokerPort())
            .setGroupID(testGroupID)
            .setTopic(testTopicName)
            .build();
  }
Beispiel #3
0
  @Test
  public void testReducerNumEstimation() throws Exception {
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ", Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);

    String query = "a = load '/passwd';" + "b = group a by $0;" + "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc = jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer =
        Math.min(
            (long) Math.ceil(new File("test/org/apache/pig/test/data/passwd").length() / 100.0),
            10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = group a by $0 PARALLEL 2;" + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as
    // hbase
    query =
        "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');"
            + "b = group a by $0 ;"
            + "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc = jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, 1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
      hbc.shutdown();
      hbc.join();
    }
    util.shutdownMiniZKCluster();
  }