@Test public void testMultipleResubmits() throws Exception { LOG.info("TestMultipleResbmits - no indefinite resubmissions"); int to = 1000; conf.setInt("hbase.splitlog.manager.timeout", to); conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100); to = to + 2 * 100; conf.setInt("hbase.splitlog.max.resubmit", 2); slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null); slm.finishInitialization(); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker1")); waitForCounter(tot_mgr_heartbeat, 0, 1, 1000); waitForCounter(tot_mgr_resubmit, 0, 1, to + 100); int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker2")); waitForCounter(tot_mgr_heartbeat, 1, 2, 1000); waitForCounter(tot_mgr_resubmit, 1, 2, to + 100); int version2 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version2 > version1); ZKUtil.setData(zkw, tasknode, TaskState.TASK_OWNED.get("worker3")); waitForCounter(tot_mgr_heartbeat, 1, 2, 1000); waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + 100); Thread.sleep(to + 100); assertEquals(2L, tot_mgr_resubmit.get()); }
private void waitForCounter(Expr e, long oldval, long newval, long timems) { long curt = System.currentTimeMillis(); long endt = curt + timems; while (curt < endt) { if (e.eval() == oldval) { try { Thread.sleep(10); } catch (InterruptedException eintr) { } curt = System.currentTimeMillis(); } else { assertEquals(newval, e.eval()); return; } } assertTrue(false); }
@Test public void testUnassignedTimeout() throws Exception { LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit"); // create an orphan task in OWNED state String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1"); zkw.getRecoverableZooKeeper() .create( tasknode1, TaskState.TASK_OWNED.get("dummy-worker"), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); int to = 1000; conf.setInt("hbase.splitlog.manager.timeout", to); conf.setInt("hbase.splitlog.manager.unassigned.timeout", 2 * to); conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100); slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null); slm.finishInitialization(); waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100); // submit another task which will stay in unassigned mode TaskBatch batch = new TaskBatch(); submitTaskAndWait(batch, "foo/1"); // keep updating the orphan owned node every to/2 seconds for (int i = 0; i < (3 * to) / 100; i++) { Thread.sleep(100); ZKUtil.setData(zkw, tasknode1, TaskState.TASK_OWNED.get("dummy-worker")); } // since we have stopped heartbeating the owned node therefore it should // get resubmitted LOG.info("waiting for manager to resubmit the orphan task"); waitForCounter(tot_mgr_resubmit, 0, 1, to + 500); // now all the nodes are unassigned. manager should post another rescan waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + 500); }