@Test
  public void testMailboxTracker() throws Exception {
    ZooKeeper zk = getClient(0);
    MailboxTracker tracker = new MailboxTracker(zk, handler);
    MailboxPublisher publisher = new MailboxPublisher(VoltZK.mailboxes + "/1");

    VoltZK.createPersistentZKNodes(zk);

    publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(1L, 0));
    publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(2L, 1));
    publisher.publish(zk);

    // start the mailbox tracker and watch all the changes
    tracker.start();
    while (handler.m_handleCount.get() == 0) {
      Thread.sleep(1);
    }
    Map<MailboxType, List<MailboxNodeContent>> value = handler.m_mailboxes;
    assertTrue(value.containsKey(MailboxType.ExecutionSite));
    List<MailboxNodeContent> list = value.get(MailboxType.ExecutionSite);
    assertEquals(2, list.size());
    MailboxNodeContent node1 = list.get(0);
    assertEquals(1, node1.HSId.longValue());
    assertEquals(0, node1.partitionId.intValue());
    MailboxNodeContent node2 = list.get(1);
    assertEquals(2, node2.HSId.longValue());
    assertEquals(1, node2.partitionId.intValue());
    tracker.shutdown();
  }
  @Test
  public void testUpdate() throws Exception {
    ZooKeeper zk = getClient(0);
    ZooKeeper zk2 = getClient(0);
    MailboxTracker tracker = new MailboxTracker(zk, handler);
    MailboxPublisher publisher = new MailboxPublisher(VoltZK.mailboxes + "/1");

    VoltZK.createPersistentZKNodes(zk);

    publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(1L, 0));
    publisher.publish(zk2);

    publisher = new MailboxPublisher(VoltZK.mailboxes + "/2");
    publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(2L, 1));
    publisher.publish(zk);

    tracker.start();

    // The ephemaral node just created will disappear and we should get an update
    zk2.close();
    while (handler.m_handleCount.get() < 2) {
      Thread.sleep(1);
    }

    Map<MailboxType, List<MailboxNodeContent>> value = handler.m_mailboxes;
    assertTrue(value.containsKey(MailboxType.ExecutionSite));
    List<MailboxNodeContent> list = value.get(MailboxType.ExecutionSite);
    assertEquals(1, list.size());
    assertEquals(2, list.get(0).HSId.longValue());
    assertEquals(1, list.get(0).partitionId.intValue());
    tracker.shutdown();
  }
Exemple #3
0
    @Override
    public void run(List<String> children) {
      List<Long> updatedHSIds = VoltZK.childrenToReplicaHSIds(children);
      // compute previously unseen HSId set in the callback list
      Set<Long> newHSIds = new HashSet<Long>(updatedHSIds);
      newHSIds.removeAll(m_replicas);
      tmLog.debug("Newly seen replicas: " + CoreUtils.hsIdCollectionToString(newHSIds));
      // compute previously seen but now vanished from the callback list HSId set
      Set<Long> missingHSIds = new HashSet<Long>(m_replicas);
      missingHSIds.removeAll(updatedHSIds);
      tmLog.debug("Newly dead replicas: " + CoreUtils.hsIdCollectionToString(missingHSIds));

      tmLog.debug(
          "Handling babysitter callback for partition "
              + m_partitionId
              + ": children: "
              + CoreUtils.hsIdCollectionToString(updatedHSIds));
      if (m_state.get() == AppointerState.CLUSTER_START) {
        // We can't yet tolerate a host failure during startup.  Crash it all
        if (missingHSIds.size() > 0) {
          VoltDB.crashGlobalVoltDB("Node failure detected during startup.", false, null);
        }
        // ENG-3166: Eventually we would like to get rid of the extra replicas beyond k_factor,
        // but for now we just look to see how many replicas of this partition we actually expect
        // and gate leader assignment on that many copies showing up.
        int replicaCount = m_kfactor + 1;
        JSONArray parts;
        try {
          parts = m_topo.getJSONArray("partitions");
          for (int p = 0; p < parts.length(); p++) {
            JSONObject aPartition = parts.getJSONObject(p);
            int pid = aPartition.getInt("partition_id");
            if (pid == m_partitionId) {
              replicaCount = aPartition.getJSONArray("replicas").length();
            }
          }
        } catch (JSONException e) {
          // Ignore and just assume the normal number of replicas
        }
        if (children.size() == replicaCount) {
          m_currentLeader = assignLeader(m_partitionId, updatedHSIds);
        } else {
          tmLog.info(
              "Waiting on "
                  + ((m_kfactor + 1) - children.size())
                  + " more nodes "
                  + "for k-safety before startup");
        }
      } else {
        // Check for k-safety
        if (!isClusterKSafe()) {
          VoltDB.crashGlobalVoltDB(
              "Some partitions have no replicas.  Cluster has become unviable.", false, null);
        }
        // Check if replay has completed
        if (m_replayComplete.get() == false) {
          VoltDB.crashGlobalVoltDB(
              "Detected node failure during command log replay. Cluster will shut down.",
              false,
              null);
        }
        // Check to see if there's been a possible network partition and we're not already handling
        // it
        if (m_partitionDetectionEnabled && !m_partitionDetected) {
          doPartitionDetectionActivities();
        }
        // If we survived the above gauntlet of fail, appoint a new leader for this partition.
        if (missingHSIds.contains(m_currentLeader)) {
          m_currentLeader = assignLeader(m_partitionId, updatedHSIds);
        }
      }
      m_replicas.clear();
      m_replicas.addAll(updatedHSIds);
    }