コード例 #1
0
  @Test
  public void testDeadWorker() throws Exception {
    LOG.info("testDeadWorker");

    conf.setLong("hbase.splitlog.max.resubmit", 0);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    if (tot_mgr_heartbeat.get() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    slm.handleDeadWorker(worker1);
    if (tot_mgr_resubmit.get() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
    if (tot_mgr_resubmit_dead_server_task.get() == 0) {
      waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, to + to / 2);
    }

    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));
    return;
  }
コード例 #2
0
  @Test
  public void testOrphanTaskAcquisition() throws Exception {
    LOG.info("TestOrphanTaskAcquisition");

    String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash");
    zkw.getRecoverableZooKeeper()
        .create(
            tasknode,
            TaskState.TASK_OWNED.get("dummy-worker"),
            Ids.OPEN_ACL_UNSAFE,
            CreateMode.PERSISTENT);

    int to = 1000;
    conf.setInt("hbase.splitlog.manager.timeout", to);
    conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100);
    to = to + 2 * 100;

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100);
    Task task = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task.isOrphan());
    waitForCounter(tot_mgr_heartbeat, 0, 1, 100);
    assertFalse(task.isUnassigned());
    long curt = System.currentTimeMillis();
    assertTrue((task.last_update <= curt) && (task.last_update > (curt - 1000)));
    LOG.info("waiting for manager to resubmit the orphan task");
    waitForCounter(tot_mgr_resubmit, 0, 1, to + 100);
    assertTrue(task.isUnassigned());
    waitForCounter(tot_mgr_rescan, 0, 1, to + 100);
  }
コード例 #3
0
  @Test
  public void testUnassignedOrphan() throws Exception {
    LOG.info("TestUnassignedOrphan - an unassigned task is resubmitted at" + " startup");
    String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash");
    // create an unassigned orphan task
    zkw.getRecoverableZooKeeper()
        .create(
            tasknode,
            TaskState.TASK_UNASSIGNED.get("dummy-worker"),
            Ids.OPEN_ACL_UNSAFE,
            CreateMode.PERSISTENT);
    int version = ZKUtil.checkExists(zkw, tasknode);

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100);
    Task task = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task.isOrphan());
    assertTrue(task.isUnassigned());
    // wait for RESCAN node to be created
    waitForCounter(tot_mgr_rescan, 0, 1, 500);
    Task task2 = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task == task2);
    LOG.debug("task = " + task);
    assertEquals(1L, tot_mgr_resubmit.get());
    assertEquals(1, task.incarnation);
    assertEquals(0, task.unforcedResubmits);
    assertTrue(task.isOrphan());
    assertTrue(task.isUnassigned());
    assertTrue(ZKUtil.checkExists(zkw, tasknode) > version);
  }
コード例 #4
0
 @Test
 public void testEmptyLogDir() throws Exception {
   LOG.info("testEmptyLogDir");
   slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
   slm.finishInitialization();
   FileSystem fs = TEST_UTIL.getTestFileSystem();
   Path emptyLogDirPath = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
   fs.mkdirs(emptyLogDirPath);
   slm.splitLogDistributed(emptyLogDirPath);
   assertFalse(fs.exists(emptyLogDirPath));
 }
コード例 #5
0
  @Test
  public void testUnassignedTimeout() throws Exception {
    LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit");

    // create an orphan task in OWNED state
    String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1");
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    zkw.getRecoverableZooKeeper()
        .create(tasknode1, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to / 2);

    // submit another task which will stay in unassigned mode
    TaskBatch batch = new TaskBatch();
    submitTaskAndWait(batch, "foo/1");

    // keep updating the orphan owned node every to/2 seconds
    for (int i = 0; i < (3 * to) / 100; i++) {
      Thread.sleep(100);
      final ServerName worker2 = new ServerName("worker1,1,1");
      slt = new SplitLogTask.Owned(worker2);
      ZKUtil.setData(zkw, tasknode1, slt.toByteArray());
    }

    // since we have stopped heartbeating the owned node therefore it should
    // get resubmitted
    LOG.info("waiting for manager to resubmit the orphan task");
    waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);

    // now all the nodes are unassigned. manager should post another rescan
    waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + to / 2);
  }
コード例 #6
0
  @Test
  public void testTaskResigned() throws Exception {
    LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state");
    assertEquals(tot_mgr_resubmit.get(), 0);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    assertEquals(tot_mgr_resubmit.get(), 0);
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    assertEquals(tot_mgr_resubmit.get(), 0);
    final ServerName worker1 = new ServerName("worker1,1,1");
    assertEquals(tot_mgr_resubmit.get(), 0);
    SplitLogTask slt = new SplitLogTask.Resigned(worker1);
    assertEquals(tot_mgr_resubmit.get(), 0);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    int version = ZKUtil.checkExists(zkw, tasknode);
    // Could be small race here.
    if (tot_mgr_resubmit.get() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to / 2);
    assertEquals(tot_mgr_resubmit.get(), 1);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue("version1=" + version1 + ", version=" + version, version1 > version);

    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));
  }
コード例 #7
0
  @Test
  public void testMultipleResubmits() throws Exception {
    LOG.info("TestMultipleResbmits - no indefinite resubmissions");

    int to = 1000;
    conf.setInt("hbase.splitlog.manager.timeout", to);
    conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100);
    to = to + 2 * 100;

    conf.setInt("hbase.splitlog.max.resubmit", 2);
    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);

    ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker1"));
    waitForCounter(tot_mgr_heartbeat, 0, 1, 1000);
    waitForCounter(tot_mgr_resubmit, 0, 1, to + 100);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker2"));
    waitForCounter(tot_mgr_heartbeat, 1, 2, 1000);
    waitForCounter(tot_mgr_resubmit, 1, 2, to + 100);
    int version2 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version2 > version1);
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker3"));
    waitForCounter(tot_mgr_heartbeat, 1, 2, 1000);
    waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + 100);
    Thread.sleep(to + 100);
    assertEquals(2L, tot_mgr_resubmit.get());
  }
コード例 #8
0
  @Test
  public void testRescanCleanup() throws Exception {
    LOG.info("TestRescanCleanup - ensure RESCAN nodes are cleaned up");

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    waitForCounter(
        new Expr() {
          @Override
          public long eval() {
            return (tot_mgr_resubmit.get() + tot_mgr_resubmit_failed.get());
          }
        },
        0,
        1,
        5 * 60000); // wait long enough
    Assert.assertEquals(
        "Could not run test. Lost ZK connection?", 0, tot_mgr_resubmit_failed.get());
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));

    waitForCounter(tot_mgr_rescan_deleted, 0, 1, to / 2);
  }
コード例 #9
0
  @Test
  public void testMultipleResubmits() throws Exception {
    LOG.info("TestMultipleResbmits - no indefinite resubmissions");

    conf.setInt("hbase.splitlog.max.resubmit", 2);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = new ServerName("worker1,1,1");
    final ServerName worker2 = new ServerName("worker2,1,1");
    final ServerName worker3 = new ServerName("worker3,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    slt = new SplitLogTask.Owned(worker2);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 1, 2, to / 2);
    waitForCounter(tot_mgr_resubmit, 1, 2, to + to / 2);
    int version2 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version2 > version1);
    slt = new SplitLogTask.Owned(worker3);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 1, 2, to / 2);
    waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + to / 2);
    Thread.sleep(to + to / 2);
    assertEquals(2L, tot_mgr_resubmit.get());
  }
コード例 #10
0
 @Test
 public void testVanishingTaskZNode() throws Exception {
   LOG.info("testVanishingTaskZNode");
   conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0);
   slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
   slm.finishInitialization();
   FileSystem fs = TEST_UTIL.getTestFileSystem();
   final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
   fs.mkdirs(logDir);
   Path logFile = new Path(logDir, UUID.randomUUID().toString());
   fs.createNewFile(logFile);
   new Thread() {
     public void run() {
       try {
         // this call will block because there are no SplitLogWorkers
         slm.splitLogDistributed(logDir);
       } catch (Exception e) {
         LOG.warn("splitLogDistributed failed", e);
         fail();
       }
     }
   }.start();
   waitForCounter(tot_mgr_node_create_result, 0, 1, 10000);
   String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString());
   // remove the task znode
   ZKUtil.deleteNode(zkw, znode);
   waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000);
   waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000);
   assertTrue(fs.exists(logFile));
   fs.delete(logDir, true);
 }
コード例 #11
0
  private String submitTaskAndWait(TaskBatch batch, String name)
      throws KeeperException, InterruptedException {
    String tasknode = ZKSplitLog.getEncodedNodeName(zkw, name);
    NodeCreationListener listener = new NodeCreationListener(zkw, tasknode);
    zkw.registerListener(listener);
    ZKUtil.watchAndCheckExists(zkw, tasknode);

    slm.installTask(name, batch);
    assertEquals(1, batch.installed);
    assertTrue(slm.findOrCreateOrphanTask(tasknode).batch == batch);
    assertEquals(1L, tot_mgr_node_create_queued.get());

    LOG.debug("waiting for task node creation");
    listener.waitForCreation();
    LOG.debug("task created");
    return tasknode;
  }
コード例 #12
0
  @Test
  public void testOrphanTaskAcquisition() throws Exception {
    LOG.info("TestOrphanTaskAcquisition");

    String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash");
    SplitLogTask slt = new SplitLogTask.Owned(DUMMY_MASTER);
    zkw.getRecoverableZooKeeper()
        .create(tasknode, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100);
    Task task = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task.isOrphan());
    waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    assertFalse(task.isUnassigned());
    long curt = System.currentTimeMillis();
    assertTrue((task.last_update <= curt) && (task.last_update > (curt - 1000)));
    LOG.info("waiting for manager to resubmit the orphan task");
    waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
    assertTrue(task.isUnassigned());
    waitForCounter(tot_mgr_rescan, 0, 1, to + to / 2);
  }
コード例 #13
0
  /**
   * Test whether the splitlog correctly creates a task in zookeeper
   *
   * @throws Exception
   */
  @Test
  public void testTaskCreation() throws Exception {
    LOG.info("TestTaskCreation - test the creation of a task in zk");

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");

    byte[] data = ZKUtil.getData(zkw, tasknode);
    LOG.info("Task node created " + new String(data));
    assertTrue(TaskState.TASK_UNASSIGNED.equals(data, "dummy-master"));
  }
コード例 #14
0
  @Test
  public void testDeadWorker() throws Exception {
    LOG.info("testDeadWorker");

    conf.setLong("hbase.splitlog.max.resubmit", 0);
    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);

    ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker1"));
    waitForCounter(tot_mgr_heartbeat, 0, 1, 1000);
    slm.handleDeadWorker("worker1");
    waitForCounter(tot_mgr_resubmit, 0, 1, 1000);
    waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, 1000);

    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    assertTrue(Arrays.equals(TaskState.TASK_UNASSIGNED.get("dummy-master"), taskstate));
    return;
  }
コード例 #15
0
  /**
   * Test whether the splitlog correctly creates a task in zookeeper
   *
   * @throws Exception
   */
  @Test
  public void testTaskCreation() throws Exception {

    LOG.info("TestTaskCreation - test the creation of a task in zk");
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");

    byte[] data = ZKUtil.getData(zkw, tasknode);
    SplitLogTask slt = SplitLogTask.parseFrom(data);
    LOG.info("Task node created " + slt.toString());
    assertTrue(slt.isUnassigned(DUMMY_MASTER));
  }
コード例 #16
0
  @Test
  public void testTaskDone() throws Exception {
    LOG.info("TestTaskDone - cleanup task node once in DONE state");

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_DONE.get("worker"));
    synchronized (batch) {
      while (batch.installed != batch.done) {
        batch.wait();
      }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, 1000);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
  }
コード例 #17
0
  @Test(timeout = 45000)
  public void testVanishingTaskZNode() throws Exception {
    LOG.info("testVanishingTaskZNode");

    conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0);

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    FileSystem fs = TEST_UTIL.getTestFileSystem();
    final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
    fs.mkdirs(logDir);
    Thread thread = null;
    try {
      Path logFile = new Path(logDir, UUID.randomUUID().toString());
      fs.createNewFile(logFile);
      thread =
          new Thread() {
            public void run() {
              try {
                // this call will block because there are no SplitLogWorkers,
                // until the task znode is deleted below. Then the call will
                // complete successfully, assuming the log is split.
                slm.splitLogDistributed(logDir);
              } catch (Exception e) {
                LOG.warn("splitLogDistributed failed", e);
              }
            }
          };
      thread.start();
      waitForCounter(tot_mgr_node_create_result, 0, 1, 10000);
      String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString());
      // remove the task znode, to finish the distributed log splitting
      ZKUtil.deleteNode(zkw, znode);
      waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000);
      waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000);
      assertTrue(fs.exists(logFile));
    } finally {
      if (thread != null) {
        // interrupt the thread in case the test fails in the middle.
        // it has no effect if the thread is already terminated.
        thread.interrupt();
      }
      fs.delete(logDir, true);
    }
  }
コード例 #18
0
  @Test
  public void testTaskResigned() throws Exception {
    LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state");

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_RESIGNED.get("worker"));
    int version = ZKUtil.checkExists(zkw, tasknode);

    waitForCounter(tot_mgr_resubmit, 0, 1, 1000);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);

    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    assertTrue(Arrays.equals(taskstate, TaskState.TASK_UNASSIGNED.get("dummy-master")));
  }
コード例 #19
0
  @Test
  public void testTaskDone() throws Exception {
    LOG.info("TestTaskDone - cleanup task node once in DONE state");

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Done(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    synchronized (batch) {
      while (batch.installed != batch.done) {
        batch.wait();
      }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, to / 2);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
  }
コード例 #20
0
  @Test
  public void testTaskErr() throws Exception {
    LOG.info("TestTaskErr - cleanup task node once in ERR state");

    conf.setInt("hbase.splitlog.max.resubmit", 0);
    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_ERR.get("worker"));
    synchronized (batch) {
      while (batch.installed != batch.error) {
        batch.wait();
      }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, 1000);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
    conf.setInt("hbase.splitlog.max.resubmit", ZKSplitLog.DEFAULT_MAX_RESUBMIT);
  }
コード例 #21
0
  @Test
  public void testUnassignedTimeout() throws Exception {
    LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit");

    // create an orphan task in OWNED state
    String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1");
    zkw.getRecoverableZooKeeper()
        .create(
            tasknode1,
            TaskState.TASK_OWNED.get("dummy-worker"),
            Ids.OPEN_ACL_UNSAFE,
            CreateMode.PERSISTENT);

    int to = 1000;
    conf.setInt("hbase.splitlog.manager.timeout", to);
    conf.setInt("hbase.splitlog.manager.unassigned.timeout", 2 * to);
    conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100);

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100);

    // submit another task which will stay in unassigned mode
    TaskBatch batch = new TaskBatch();
    submitTaskAndWait(batch, "foo/1");

    // keep updating the orphan owned node every to/2 seconds
    for (int i = 0; i < (3 * to) / 100; i++) {
      Thread.sleep(100);
      ZKUtil.setData(zkw, tasknode1, TaskState.TASK_OWNED.get("dummy-worker"));
    }

    // since we have stopped heartbeating the owned node therefore it should
    // get resubmitted
    LOG.info("waiting for manager to resubmit the orphan task");
    waitForCounter(tot_mgr_resubmit, 0, 1, to + 500);

    // now all the nodes are unassigned. manager should post another rescan
    waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + 500);
  }
コード例 #22
0
  @Test
  public void testRescanCleanup() throws Exception {
    LOG.info("TestRescanCleanup - ensure RESCAN nodes are cleaned up");

    conf.setInt("hbase.splitlog.manager.timeout", 1000);
    conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100);
    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);

    ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker1"));
    waitForCounter(tot_mgr_heartbeat, 0, 1, 1000);
    waitForCounter(
        new Expr() {
          @Override
          public long eval() {
            return (tot_mgr_resubmit.get() + tot_mgr_resubmit_failed.get());
          }
        },
        0,
        1,
        5 * 60000); // wait long enough
    if (tot_mgr_resubmit_failed.get() == 0) {
      int version1 = ZKUtil.checkExists(zkw, tasknode);
      assertTrue(version1 > version);
      byte[] taskstate = ZKUtil.getData(zkw, tasknode);
      assertTrue(Arrays.equals(TaskState.TASK_UNASSIGNED.get("dummy-master"), taskstate));

      waitForCounter(tot_mgr_rescan_deleted, 0, 1, 1000);
    } else {
      LOG.warn("Could not run test. Lost ZK connection?");
    }

    return;
  }
コード例 #23
0
  @Test
  public void testTaskErr() throws Exception {
    LOG.info("TestTaskErr - cleanup task node once in ERR state");

    conf.setInt("hbase.splitlog.max.resubmit", 0);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Err(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());

    synchronized (batch) {
      while (batch.installed != batch.error) {
        batch.wait();
      }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, to / 2);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
    conf.setInt("hbase.splitlog.max.resubmit", SplitLogManager.DEFAULT_MAX_RESUBMIT);
  }
コード例 #24
0
  @Test
  public void testWorkerCrash() throws Exception {
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    final ServerName worker1 = new ServerName("worker1,1,1");

    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    if (tot_mgr_heartbeat.get() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);

    // Not yet resubmitted.
    Assert.assertEquals(0, tot_mgr_resubmit.get());

    // This server becomes dead
    Mockito.when(sm.isServerOnline(worker1)).thenReturn(false);

    Thread.sleep(1300); // The timeout checker is done every 1000 ms (hardcoded).

    // It has been resubmitted
    Assert.assertEquals(1, tot_mgr_resubmit.get());
  }
コード例 #25
0
  public void splitLog(final List<ServerName> serverNames) throws IOException {
    long splitTime = 0, splitLogSize = 0;
    List<Path> logDirs = new ArrayList<Path>();
    for(ServerName serverName: serverNames){
      Path logDir = new Path(this.rootdir,
        HLog.getHLogDirectoryName(serverName.toString()));
      Path splitDir = logDir.suffix(HLog.SPLITTING_EXT);
      // rename the directory so a rogue RS doesn't create more HLogs
      if (fs.exists(logDir)) {
        if (!this.fs.rename(logDir, splitDir)) {
          throw new IOException("Failed fs.rename for log split: " + logDir);
        }
        logDir = splitDir;
        LOG.debug("Renamed region directory: " + splitDir);
      } else if (!fs.exists(splitDir)) {
        LOG.info("Log dir for server " + serverName + " does not exist");
        continue;
      }
      logDirs.add(splitDir);
    }

    if (logDirs.isEmpty()) {
      LOG.info("No logs to split");
      return;
    }

    if (distributedLogSplitting) {
      splitLogManager.handleDeadWorkers(serverNames);
      splitTime = EnvironmentEdgeManager.currentTimeMillis();
      splitLogSize = splitLogManager.splitLogDistributed(logDirs);
      splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime;
    } else {
      for(Path logDir: logDirs){
        // splitLogLock ensures that dead region servers' logs are processed
        // one at a time
        this.splitLogLock.lock();
        try {
          HLogSplitter splitter = HLogSplitter.createLogSplitter(
            conf, rootdir, logDir, oldLogDir, this.fs);
          try {
            // If FS is in safe mode, just wait till out of it.
            FSUtils.waitOnSafeMode(conf, conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000));
            splitter.splitLog();
          } catch (OrphanHLogAfterSplitException e) {
            LOG.warn("Retrying splitting because of:", e);
            //An HLogSplitter instance can only be used once.  Get new instance.
            splitter = HLogSplitter.createLogSplitter(conf, rootdir, logDir,
              oldLogDir, this.fs);
            splitter.splitLog();
          }
          splitTime = splitter.getTime();
          splitLogSize = splitter.getSize();
        } finally {
          this.splitLogLock.unlock();
        }
      }
    }

    if (this.metrics != null) {
      this.metrics.addSplit(splitTime, splitLogSize);
    }
  }
コード例 #26
0
 @After
 public void teardown() throws IOException, KeeperException {
   stopper.stop("");
   slm.stop();
   TEST_UTIL.shutdownMiniZKCluster();
 }