Example #1
0
  @Override
  public void acceptPromotion() {
    try {

      long startTime = System.currentTimeMillis();
      Boolean success = false;
      m_term =
          createTerm(
              m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami);
      m_term.start();
      while (!success) {
        RepairAlgo repair = null;
        repair = createPromoteAlgo(m_term.getInterestingHSIds(), m_initiatorMailbox, m_whoami);

        // if rejoining, a promotion can not be accepted. If the rejoin is
        // in-progress, the loss of the master will terminate the rejoin
        // anyway. If the rejoin has transferred data but not left the rejoining
        // state, it will respond REJOINING to new work which will break
        // the MPI and/or be unexpected to external clients.
        if (!m_initiatorMailbox.acceptPromotion()) {
          tmLog.error(m_whoami + "rejoining site can not be promoted to leader. Terminating.");
          VoltDB.crashLocalVoltDB("A rejoining site can not be promoted to leader.", false, null);
          return;
        }
        m_initiatorMailbox.setRepairAlgo(repair);
        // term syslogs the start of leader promotion.
        Pair<Boolean, Long> result = repair.start().get();
        success = result.getFirst();
        if (success) {
          m_initiatorMailbox.setLeaderState(result.getSecond());
          tmLog.info(
              m_whoami
                  + "finished leader promotion. Took "
                  + (System.currentTimeMillis() - startTime)
                  + " ms.");

          // THIS IS where map cache should be updated, not
          // in the promotion algorithm.
          LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode);
          iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId());
        } else {
          // The only known reason to fail is a failed replica during
          // recovery; that's a bounded event (by k-safety).
          // CrashVoltDB here means one node failure causing another.
          // Don't create a cascading failure - just try again.
          tmLog.info(
              m_whoami
                  + "interrupted during leader promotion after "
                  + (System.currentTimeMillis() - startTime)
                  + " ms. of "
                  + "trying. Retrying.");
        }
      }
      // Tag along and become the export master too
      ExportManager.instance().acceptMastership(m_partitionId);
    } catch (Exception e) {
      VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e);
    }
  }
Example #2
0
 @Override
 public void run() {
   // Let the Export system read its configuration from the catalog.
   try {
     ExportManager.initialize(
         m_rvdb.m_myHostId,
         m_rvdb.m_catalogContext,
         m_isRejoin,
         m_rvdb.m_messenger,
         m_rvdb.m_partitionsToSitesAtStartupForExportInit);
   } catch (Throwable t) {
     VoltDB.crashLocalVoltDB("Error setting up export", true, t);
   }
 }
Example #3
0
 @Override
 public void noticedTopologyChange() {
   if (ExportManager.instance() != null) {
     ExportManager.instance().notifyOfClusterTopologyChange();
   }
 }