private void waitForNewProcedures() {
   // watch for new procedues that we need to start subprocedures for
   LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'");
   List<String> runningProcedures = null;
   try {
     runningProcedures =
         ZKUtil.listChildrenAndWatchForNewChildren(
             zkController.getWatcher(), zkController.getAcquiredBarrier());
     if (runningProcedures == null) {
       LOG.debug("No running procedures.");
       return;
     }
   } catch (KeeperException e) {
     member.controllerConnectionFailure(
         "General failure when watching for new procedures", e, null);
   }
   if (runningProcedures == null) {
     LOG.debug("No running procedures.");
     return;
   }
   for (String procName : runningProcedures) {
     // then read in the procedure information
     String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName);
     startNewSubprocedure(path);
   }
 }
Example #2
0
 public void updateRecord(MetaRecord rec) {
   try {
     long ts = System.currentTimeMillis();
     putRecord(rec, ts);
     String node = ZKUtil.joinZNode(H2MetaTableTracker.NODE_NAME, Integer.toString(rec.getId()));
     // setData会异步触发所有机器(包括本机)上的H2MetaTableTracker.nodeDataChanged
     // 然后触发下列调用:
     // =>org.h2.engine.Database.updateDatabaseObject(int)
     //  =>org.h2.engine.Database.update(Session, DbObject)
     //      =>org.h2.engine.Database.addMeta0(Session, DbObject, boolean)
     //          =>又到此方法
     // 所以会造成循环
     synchronized (this) { // 避免setData后立刻触发nodeDataChanged,此时IdVersion还未更新
       ZKUtil.setData(watcher, node, Bytes.toBytes(ts));
       // setData后watch不见了,所以要继续watch,监听其他人对此node的修改
       // ZKUtil.watchAndCheckExists(watcher, node);
       Stat stat = new Stat();
       ZKUtil.getDataAndWatch(watcher, node, stat);
       // 这里记录下id的最新版本,触发nodeDataChanged时再检查一下是否版本一样,
       // 如果不大于这里的版本那么就不再执行updateDatabaseObject操作
       tracker.updateIdVersion(rec.getId(), stat.getVersion());
     }
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
  @Override
  public Configuration getPeerConf(String peerId) throws ReplicationException {
    String znode = ZKUtil.joinZNode(this.peersZNode, peerId);
    byte[] data = null;
    try {
      data = ZKUtil.getData(this.zookeeper, znode);
    } catch (KeeperException e) {
      throw new ReplicationException("Error getting configuration for peer with id=" + peerId, e);
    }
    if (data == null) {
      LOG.error("Could not get configuration for peer because it doesn't exist. peerId=" + peerId);
      return null;
    }
    String otherClusterKey = "";
    try {
      otherClusterKey = parsePeerFrom(data);
    } catch (DeserializationException e) {
      LOG.warn(
          "Failed to parse cluster key from peerId="
              + peerId
              + ", specifically the content from the following znode: "
              + znode);
      return null;
    }

    Configuration otherConf = new Configuration(this.conf);
    try {
      ZKUtil.applyClusterKeyToConf(otherConf, otherClusterKey);
    } catch (IOException e) {
      LOG.error("Can't get peer configuration for peerId=" + peerId + " because:", e);
      return null;
    }
    return otherConf;
  }
  @Test
  public void testDeadWorker() throws Exception {
    LOG.info("testDeadWorker");

    conf.setLong("hbase.splitlog.max.resubmit", 0);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    if (tot_mgr_heartbeat.get() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    slm.handleDeadWorker(worker1);
    if (tot_mgr_resubmit.get() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
    if (tot_mgr_resubmit_dead_server_task.get() == 0) {
      waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, to + to / 2);
    }

    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));
    return;
  }
  /**
   * Helper method to connect to a peer
   *
   * @param peerId peer's identifier
   * @return object representing the peer
   * @throws ReplicationException
   */
  private ReplicationPeer getPeer(String peerId) throws ReplicationException {
    Configuration peerConf = getPeerConf(peerId);
    if (peerConf == null) {
      return null;
    }
    if (this.ourClusterKey.equals(ZKUtil.getZooKeeperClusterKey(peerConf))) {
      LOG.debug("Not connecting to " + peerId + " because it's us");
      return null;
    }

    ReplicationPeer peer =
        new ReplicationPeer(peerConf, peerId, ZKUtil.getZooKeeperClusterKey(peerConf));
    try {
      peer.startStateTracker(this.zookeeper, this.getPeerStateNode(peerId));
    } catch (KeeperException e) {
      throw new ReplicationException(
          "Error starting the peer state tracker for peerId=" + peerId, e);
    }

    try {
      peer.startTableCFsTracker(this.zookeeper, this.getTableCFsNode(peerId));
    } catch (KeeperException e) {
      throw new ReplicationException(
          "Error starting the peer tableCFs tracker for peerId=" + peerId, e);
    }

    peer.getZkw().registerListener(new PeerRegionServerListener(peer));
    return peer;
  }
  @Override
  public void addPeer(String id, String clusterKey, String tableCFs) throws ReplicationException {
    try {
      if (peerExists(id)) {
        throw new IllegalArgumentException(
            "Cannot add a peer with id=" + id + " because that id already exists.");
      }
      ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
      ZKUtil.createAndWatch(
          this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id), toByteArray(clusterKey));
      // There is a race b/w PeerWatcher and ReplicationZookeeper#add method to create the
      // peer-state znode. This happens while adding a peer.
      // The peer state data is set as "ENABLED" by default.
      ZKUtil.createNodeIfNotExistsAndWatch(
          this.zookeeper, getPeerStateNode(id), ENABLED_ZNODE_BYTES);
      // A peer is enabled by default

      String tableCFsStr = (tableCFs == null) ? "" : tableCFs;
      ZKUtil.createNodeIfNotExistsAndWatch(
          this.zookeeper, getTableCFsNode(id), Bytes.toBytes(tableCFsStr));
    } catch (KeeperException e) {
      throw new ReplicationException(
          "Could not add peer with id=" + id + ", clusterKey=" + clusterKey, e);
    }
  }
  @Test
  public void testRescanCleanup() throws Exception {
    LOG.info("TestRescanCleanup - ensure RESCAN nodes are cleaned up");

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();

    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    waitForCounter(
        new Expr() {
          @Override
          public long eval() {
            return (tot_mgr_resubmit.get() + tot_mgr_resubmit_failed.get());
          }
        },
        0,
        1,
        5 * 60000); // wait long enough
    Assert.assertEquals(
        "Could not run test. Lost ZK connection?", 0, tot_mgr_resubmit_failed.get());
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));

    waitForCounter(tot_mgr_rescan_deleted, 0, 1, to / 2);
  }
  @Test
  public void testUnassignedOrphan() throws Exception {
    LOG.info("TestUnassignedOrphan - an unassigned task is resubmitted at" + " startup");
    String tasknode = ZKSplitLog.getEncodedNodeName(zkw, "orphan/test/slash");
    // create an unassigned orphan task
    zkw.getRecoverableZooKeeper()
        .create(
            tasknode,
            TaskState.TASK_UNASSIGNED.get("dummy-worker"),
            Ids.OPEN_ACL_UNSAFE,
            CreateMode.PERSISTENT);
    int version = ZKUtil.checkExists(zkw, tasknode);

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, 100);
    Task task = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task.isOrphan());
    assertTrue(task.isUnassigned());
    // wait for RESCAN node to be created
    waitForCounter(tot_mgr_rescan, 0, 1, 500);
    Task task2 = slm.findOrCreateOrphanTask(tasknode);
    assertTrue(task == task2);
    LOG.debug("task = " + task);
    assertEquals(1L, tot_mgr_resubmit.get());
    assertEquals(1, task.incarnation);
    assertEquals(0, task.unforcedResubmits);
    assertTrue(task.isOrphan());
    assertTrue(task.isUnassigned());
    assertTrue(ZKUtil.checkExists(zkw, tasknode) > version);
  }
  /**
   * This attempts to create an acquired state znode for the procedure (snapshot name).
   *
   * <p>It then looks for the reached znode to trigger in-barrier execution. If not present we have
   * a watcher, if present then trigger the in-barrier action.
   */
  @Override
  public void sendMemberAcquired(Subprocedure sub) throws IOException {
    String procName = sub.getName();
    try {
      LOG.debug(
          "Member: '"
              + memberName
              + "' joining acquired barrier for procedure ("
              + procName
              + ") in zk");
      String acquiredZNode =
          ZKUtil.joinZNode(
              ZKProcedureUtil.getAcquireBarrierNode(zkController, procName), memberName);
      ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode);

      // watch for the complete node for this snapshot
      String reachedBarrier = zkController.getReachedBarrierNode(procName);
      LOG.debug("Watch for global barrier reached:" + reachedBarrier);
      if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), reachedBarrier)) {
        receivedReachedGlobalBarrier(reachedBarrier);
      }
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to acquire barrier for procedure: " + procName + " and member: " + memberName,
          e,
          procName);
    }
  }
  @Test
  public void testTaskResigned() throws Exception {
    LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state");
    assertEquals(tot_mgr_resubmit.get(), 0);
    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    assertEquals(tot_mgr_resubmit.get(), 0);
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    assertEquals(tot_mgr_resubmit.get(), 0);
    final ServerName worker1 = new ServerName("worker1,1,1");
    assertEquals(tot_mgr_resubmit.get(), 0);
    SplitLogTask slt = new SplitLogTask.Resigned(worker1);
    assertEquals(tot_mgr_resubmit.get(), 0);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    int version = ZKUtil.checkExists(zkw, tasknode);
    // Could be small race here.
    if (tot_mgr_resubmit.get() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to / 2);
    assertEquals(tot_mgr_resubmit.get(), 1);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue("version1=" + version1 + ", version=" + version, version1 > version);

    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(DUMMY_MASTER));
  }
  @Before
  public void setup() throws Exception {
    TEST_UTIL = new HBaseTestingUtility();
    TEST_UTIL.startMiniZKCluster();
    conf = TEST_UTIL.getConfiguration();
    // Use a different ZK wrapper instance for each tests.
    zkw =
        new ZooKeeperWatcher(conf, "split-log-manager-tests" + UUID.randomUUID().toString(), null);
    ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
    ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
    assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
    LOG.debug(zkw.baseZNode + " created");
    ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
    assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
    LOG.debug(zkw.splitLogZNode + " created");

    stopped = false;
    resetCounters();

    // By default, we let the test manage the error as before, so the server
    //  does not appear as dead from the master point of view, only from the split log pov.
    Mockito.when(sm.isServerOnline(Mockito.any(ServerName.class))).thenReturn(true);
    Mockito.when(master.getServerManager()).thenReturn(sm);

    to = 4000;
    conf.setInt("hbase.splitlog.manager.timeout", to);
    conf.setInt("hbase.splitlog.manager.unassigned.timeout", 2 * to);
    conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100);
    to = to + 4 * 100;
  }
Example #12
0
 public void addRecord(MetaRecord rec) {
   try {
     putRecord(rec, System.currentTimeMillis());
     String node = ZKUtil.joinZNode(H2MetaTableTracker.NODE_NAME, Integer.toString(rec.getId()));
     ZKUtil.createAndWatch(watcher, node, EMPTY_BYTE_ARRAY);
     tracker.updateIdVersion(rec.getId(), 0);
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
 @Override
 public void init() throws ReplicationException {
   try {
     if (ZKUtil.checkExists(this.zookeeper, this.peersZNode) < 0) {
       ZKUtil.createWithParents(this.zookeeper, this.peersZNode);
     }
   } catch (KeeperException e) {
     throw new ReplicationException("Could not initialize replication peers", e);
   }
   connectExistingPeers();
 }
Example #14
0
  /**
   * Test waiting on meta w/ no timeout specified.
   *
   * @throws Exception
   */
  @Ignore // Can't make it work reliably on all platforms; mockito gets confused
  // Throwing: org.mockito.exceptions.misusing.WrongTypeOfReturnValue:
  // Result cannot be returned by locateRegion()
  // If you plug locateRegion, it then throws for incCounter, and if you plug
  // that ... and so one.
  @Test
  public void testNoTimeoutWaitForMeta() throws Exception {
    // Mock an HConnection and a HRegionInterface implementation.  Have the
    // HConnection return the HRI.  Have the HRI return a few mocked up responses
    // to make our test work.
    // Mock an HRegionInterface.
    final HRegionInterface implementation = Mockito.mock(HRegionInterface.class);
    HConnection connection = mockConnection(implementation);

    // Now the ct is up... set into the mocks some answers that make it look
    // like things have been getting assigned. Make it so we'll return a
    // location (no matter what the Get is). Same for getHRegionInfo -- always
    // just return the meta region.
    final Result result = getMetaTableRowResult();

    // TODO: Refactor.  This method has been moved out of HConnection.
    // It works for now but has been deprecated.
    Mockito.when(connection.getRegionServerWithRetries((ServerCallable<Result>) Mockito.any()))
        .thenReturn(result);
    Mockito.when(implementation.getRegionInfo((byte[]) Mockito.any()))
        .thenReturn(HRegionInfo.FIRST_META_REGIONINFO);
    final CatalogTracker ct = constructAndStartCatalogTracker(connection);
    ServerName hsa = ct.getMetaLocation();
    Assert.assertNull(hsa);

    // Now test waiting on meta location getting set.
    Thread t =
        new WaitOnMetaThread(ct) {
          @Override
          void doWaiting() throws InterruptedException {
            this.ct.waitForMeta();
          }
        };
    startWaitAliveThenWaitItLives(t, 1000);

    // This should trigger wake up of meta wait (Its the removal of the meta
    // region unassigned node that triggers catalogtrackers that a meta has
    // been assigned).
    String node = ct.getMetaNodeTracker().getNode();
    ZKUtil.createAndFailSilent(this.watcher, node);
    MetaEditor.updateMetaLocation(ct, HRegionInfo.FIRST_META_REGIONINFO, SN);
    ZKUtil.deleteNode(this.watcher, node);
    // Go get the new meta location. waitForMeta gets and verifies meta.
    Assert.assertTrue(ct.waitForMeta(10000).equals(SN));
    // Join the thread... should exit shortly.
    t.join();
    // Now meta is available.
    Assert.assertTrue(ct.waitForMeta(10000).equals(SN));
  }
 @Override
 public void removePeer(String id) throws ReplicationException {
   try {
     if (!peerExists(id)) {
       throw new IllegalArgumentException(
           "Cannot remove peer with id=" + id + " because that id does not exist.");
     }
     ZKUtil.deleteNodeRecursively(this.zookeeper, ZKUtil.joinZNode(this.peersZNode, id));
   } catch (KeeperException e) {
     throw new ReplicationException("Could not remove peer with id=" + id, e);
   }
 }
Example #16
0
 public void removeRecord(int id) {
   try {
     // System.out.println("removeRecord id: " + id);
     // new Error().printStackTrace();
     Delete delete = new Delete(Bytes.toBytes(id));
     table.delete(delete);
     ZKUtil.deleteNodeFailSilent(
         watcher, ZKUtil.joinZNode(H2MetaTableTracker.NODE_NAME, Integer.toString(id)));
     tracker.removeId(id);
   } catch (Exception e) {
     throw new RuntimeException(e);
   }
 }
 private static HRegionServer setDrainingServer(final HRegionServer hrs) throws KeeperException {
   LOG.info(
       "Making "
           + hrs.getServerName()
           + " the draining server; "
           + "it has "
           + hrs.getNumberOfOnlineRegions()
           + " online regions");
   ZooKeeperWatcher zkw = hrs.getZooKeeper();
   String hrsDrainingZnode = ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
   ZKUtil.createWithParents(zkw, hrsDrainingZnode);
   return hrs;
 }
Example #18
0
 public int getRedoPos(boolean watch) {
   try {
     byte[] data = null;
     if (watch) data = ZKUtil.getDataAndWatch(watcher, ZooKeeperAdmin.METATABLE_NODE);
     else data = ZKUtil.getData(watcher, ZooKeeperAdmin.METATABLE_NODE);
     if (data != null && data.length > 0) {
       return Bytes.toInt(data);
     }
     return 1;
   } catch (Exception e) {
     throw new MetaTableTrackerException(e);
   }
 }
  /**
   * Kick off a new sub-procedure on the listener with the data stored in the passed znode.
   *
   * <p>Will attempt to create the same procedure multiple times if an procedure znode with the same
   * name is created. It is left up the coordinator to ensure this doesn't occur.
   *
   * @param path full path to the znode for the procedure to start
   */
  private synchronized void startNewSubprocedure(String path) {
    LOG.debug("Found procedure znode: " + path);
    String opName = ZKUtil.getNodeName(path);
    // start watching for an abort notification for the procedure
    String abortZNode = zkController.getAbortZNode(opName);
    try {
      if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), abortZNode)) {
        LOG.debug("Not starting:" + opName + " because we already have an abort notification.");
        return;
      }
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to get the abort znode (" + abortZNode + ") for procedure :" + opName, e, opName);
      return;
    }

    // get the data for the procedure
    Subprocedure subproc = null;
    try {
      byte[] data = ZKUtil.getData(zkController.getWatcher(), path);
      if (!ProtobufUtil.isPBMagicPrefix(data)) {
        String msg =
            "Data in for starting procuedure "
                + opName
                + " is illegally formatted (no pb magic). "
                + "Killing the procedure: "
                + Bytes.toString(data);
        LOG.error(msg);
        throw new IllegalArgumentException(msg);
      }
      LOG.debug("start proc data length is " + data.length);
      data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length);
      LOG.debug("Found data for znode:" + path);
      subproc = member.createSubprocedure(opName, data);
      member.submitSubprocedure(subproc);
    } catch (IllegalArgumentException iae) {
      LOG.error("Illegal argument exception", iae);
      sendMemberAborted(subproc, new ForeignException(getMemberName(), iae));
    } catch (IllegalStateException ise) {
      LOG.error("Illegal state exception ", ise);
      sendMemberAborted(subproc, new ForeignException(getMemberName(), ise));
    } catch (KeeperException e) {
      member.controllerConnectionFailure(
          "Failed to get data for new procedure:" + opName, e, opName);
    } catch (InterruptedException e) {
      member.controllerConnectionFailure(
          "Failed to get data for new procedure:" + opName, e, opName);
      Thread.currentThread().interrupt();
    }
  }
Example #20
0
  public void loadMetaRecords(List<MetaRecord> records) throws Exception {
    MetaRecord rec;
    for (Result r : table.getScanner(new Scan())) {
      if (r.isEmpty()) continue;
      rec = getMetaRecord(r);
      records.add(rec);

      if (!tracker.contains(rec.getId()))
        ZKUtil.createNodeIfNotExistsAndWatch(
            watcher,
            ZKUtil.joinZNode(H2MetaTableTracker.NODE_NAME, Integer.toString(rec.getId())),
            EMPTY_BYTE_ARRAY);
    }
  }
 private void watchForAbortedProcedures() {
   LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'");
   try {
     // this is the list of the currently aborted procedues
     for (String node :
         ZKUtil.listChildrenAndWatchForNewChildren(
             zkController.getWatcher(), zkController.getAbortZnode())) {
       String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node);
       abort(abortNode);
     }
   } catch (KeeperException e) {
     member.controllerConnectionFailure(
         "Failed to list children for abort node:" + zkController.getAbortZnode(), e, null);
   }
 }
  @Before
  public void setup() throws Exception {
    TEST_UTIL.startMiniZKCluster();
    conf = TEST_UTIL.getConfiguration();
    zkw = new ZooKeeperWatcher(conf, "split-log-manager-tests", null);
    ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
    ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
    assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
    LOG.debug(zkw.baseZNode + " created");
    ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
    assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
    LOG.debug(zkw.splitLogZNode + " created");

    stopped = false;
    resetCounters();
  }
 /**
  * endTask() can fail and the only way to recover out of it is for the {@link
  * org.apache.hadoop.hbase.master.SplitLogManager} to timeout the task node.
  *
  * @param slt
  * @param ctr
  */
 @Override
 public void endTask(SplitLogTask slt, AtomicLong ctr, SplitTaskDetails details) {
   ZkSplitTaskDetails zkDetails = (ZkSplitTaskDetails) details;
   String task = zkDetails.getTaskNode();
   int taskZKVersion = zkDetails.getCurTaskZKVersion().intValue();
   try {
     if (ZKUtil.setData(watcher, task, slt.toByteArray(), taskZKVersion)) {
       LOG.info("successfully transitioned task " + task + " to final state " + slt);
       ctr.incrementAndGet();
       return;
     }
     LOG.warn(
         "failed to transistion task "
             + task
             + " to end state "
             + slt
             + " because of version mismatch ");
   } catch (KeeperException.BadVersionException bve) {
     LOG.warn(
         "transisition task " + task + " to " + slt + " failed because of version mismatch", bve);
   } catch (KeeperException.NoNodeException e) {
     LOG.fatal(
         "logic error - end task " + task + " " + slt + " failed because task doesn't exist", e);
   } catch (KeeperException e) {
     LOG.warn("failed to end task, " + task + " " + slt, e);
   }
   SplitLogCounters.tot_wkr_final_transition_failed.incrementAndGet();
 }
 @Test
 public void testVanishingTaskZNode() throws Exception {
   LOG.info("testVanishingTaskZNode");
   conf.setInt("hbase.splitlog.manager.unassigned.timeout", 0);
   slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
   slm.finishInitialization();
   FileSystem fs = TEST_UTIL.getTestFileSystem();
   final Path logDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
   fs.mkdirs(logDir);
   Path logFile = new Path(logDir, UUID.randomUUID().toString());
   fs.createNewFile(logFile);
   new Thread() {
     public void run() {
       try {
         // this call will block because there are no SplitLogWorkers
         slm.splitLogDistributed(logDir);
       } catch (Exception e) {
         LOG.warn("splitLogDistributed failed", e);
         fail();
       }
     }
   }.start();
   waitForCounter(tot_mgr_node_create_result, 0, 1, 10000);
   String znode = ZKSplitLog.getEncodedNodeName(zkw, logFile.toString());
   // remove the task znode
   ZKUtil.deleteNode(zkw, znode);
   waitForCounter(tot_mgr_get_data_nonode, 0, 1, 30000);
   waitForCounter(tot_mgr_log_split_batch_success, 0, 1, 1000);
   assertTrue(fs.exists(logFile));
   fs.delete(logDir, true);
 }
Example #25
0
 /**
  * endTask() can fail and the only way to recover out of it is for the {@link SplitLogManager} to
  * timeout the task node.
  *
  * @param slt
  * @param ctr
  */
 public static void endTask(
     ZooKeeperWatcher zkw, SplitLogTask slt, AtomicLong ctr, String task, int taskZKVersion) {
   try {
     if (ZKUtil.setData(zkw, task, slt.toByteArray(), taskZKVersion)) {
       LOG.info("successfully transitioned task " + task + " to final state " + slt);
       ctr.incrementAndGet();
       return;
     }
     LOG.warn(
         "failed to transistion task "
             + task
             + " to end state "
             + slt
             + " because of version mismatch ");
   } catch (KeeperException.BadVersionException bve) {
     LOG.warn(
         "transisition task " + task + " to " + slt + " failed because of version mismatch", bve);
   } catch (KeeperException.NoNodeException e) {
     LOG.fatal(
         "logic error - end task " + task + " " + slt + " failed because task doesn't exist", e);
   } catch (KeeperException e) {
     LOG.warn("failed to end task, " + task + " " + slt, e);
   }
   SplitLogCounters.tot_wkr_final_transition_failed.incrementAndGet();
 }
  @Test
  public void testUnassignedTimeout() throws Exception {
    LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit");

    // create an orphan task in OWNED state
    String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1");
    final ServerName worker1 = new ServerName("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    zkw.getRecoverableZooKeeper()
        .create(tasknode1, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);

    slm = new SplitLogManager(zkw, conf, stopper, master, DUMMY_MASTER, null);
    slm.finishInitialization();
    waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to / 2);

    // submit another task which will stay in unassigned mode
    TaskBatch batch = new TaskBatch();
    submitTaskAndWait(batch, "foo/1");

    // keep updating the orphan owned node every to/2 seconds
    for (int i = 0; i < (3 * to) / 100; i++) {
      Thread.sleep(100);
      final ServerName worker2 = new ServerName("worker1,1,1");
      slt = new SplitLogTask.Owned(worker2);
      ZKUtil.setData(zkw, tasknode1, slt.toByteArray());
    }

    // since we have stopped heartbeating the owned node therefore it should
    // get resubmitted
    LOG.info("waiting for manager to resubmit the orphan task");
    waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);

    // now all the nodes are unassigned. manager should post another rescan
    waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + to / 2);
  }
Example #27
0
  @Test
  public void TestMap() throws Exception {
    String prefix = "0000";
    final String fileName = "19691231f2cd014ea28f42788214560a21a44cef";
    final String mobFilePath = prefix + fileName;

    ImmutableBytesWritable r = new ImmutableBytesWritable(Bytes.toBytes("r"));
    final KeyValue[] kvList = new KeyValue[1];
    kvList[0] =
        new KeyValue(
            Bytes.toBytes("row"),
            Bytes.toBytes("family"),
            Bytes.toBytes("column"),
            Bytes.toBytes(mobFilePath));

    Result columns = mock(Result.class);
    when(columns.rawCells()).thenReturn(kvList);

    Configuration configuration = new Configuration(TEST_UTIL.getConfiguration());
    ZooKeeperWatcher zkw = new ZooKeeperWatcher(configuration, "1", new DummyMobAbortable());
    TableName tn = TableName.valueOf("testSweepMapper");
    TableName lockName = MobUtils.getTableLockName(tn);
    String znode = ZKUtil.joinZNode(zkw.tableLockZNode, lockName.getNameAsString());
    configuration.set(SweepJob.SWEEP_JOB_ID, "1");
    configuration.set(SweepJob.SWEEP_JOB_TABLE_NODE, znode);
    ServerName serverName = SweepJob.getCurrentServerName(configuration);
    configuration.set(SweepJob.SWEEP_JOB_SERVERNAME, serverName.toString());

    TableLockManager tableLockManager =
        TableLockManager.createTableLockManager(configuration, zkw, serverName);
    TableLock lock = tableLockManager.writeLock(lockName, "Run sweep tool");
    lock.acquire();
    try {
      Mapper<ImmutableBytesWritable, Result, Text, KeyValue>.Context ctx =
          mock(Mapper.Context.class);
      when(ctx.getConfiguration()).thenReturn(configuration);
      SweepMapper map = new SweepMapper();
      doAnswer(
              new Answer<Void>() {

                @Override
                public Void answer(InvocationOnMock invocation) throws Throwable {
                  Text text = (Text) invocation.getArguments()[0];
                  KeyValue kv = (KeyValue) invocation.getArguments()[1];

                  assertEquals(Bytes.toString(text.getBytes(), 0, text.getLength()), fileName);
                  assertEquals(0, Bytes.compareTo(kv.getKey(), kvList[0].getKey()));

                  return null;
                }
              })
          .when(ctx)
          .write(any(Text.class), any(KeyValue.class));

      map.map(r, columns, ctx);
    } finally {
      lock.release();
    }
  }
Example #28
0
 public static void deletePgPortEphemeralNode(ServerName sn, int port, boolean isMaster) {
   try {
     ZKUtil.deleteNode(
         ZooKeeperAdmin.getZooKeeperWatcher(), getPgPortEphemeralNodePath(sn, port, isMaster));
   } catch (KeeperException e) {
     throw DbException.convert(e);
   }
 }
  @Test
  public void testTaskDone() throws Exception {
    LOG.info("TestTaskDone - cleanup task node once in DONE state");

    slm = new SplitLogManager(zkw, conf, stopper, "dummy-master", null);
    slm.finishInitialization();
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    ZKUtil.setData(zkw, tasknode, TaskState.TASK_DONE.get("worker"));
    synchronized (batch) {
      while (batch.installed != batch.done) {
        batch.wait();
      }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, 1000);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
  }
Example #30
0
 private static String getPgPortEphemeralNodePath(ServerName sn, int port, boolean isMaster) {
   String znode =
       (isMaster ? "M" : "S")
           + ":"
           + sn.getHostAndPort()
           + Addressing.HOSTNAME_PORT_SEPARATOR
           + port;
   return ZKUtil.joinZNode(ZooKeeperAdmin.PG_SERVER_NODE, znode);
 }