@Override public void acceptPromotion() { try { long startTime = System.currentTimeMillis(); Boolean success = false; m_term = createTerm( m_messenger.getZK(), m_partitionId, getInitiatorHSId(), m_initiatorMailbox, m_whoami); m_term.start(); while (!success) { RepairAlgo repair = null; repair = createPromoteAlgo(m_term.getInterestingHSIds(), m_initiatorMailbox, m_whoami); // if rejoining, a promotion can not be accepted. If the rejoin is // in-progress, the loss of the master will terminate the rejoin // anyway. If the rejoin has transferred data but not left the rejoining // state, it will respond REJOINING to new work which will break // the MPI and/or be unexpected to external clients. if (!m_initiatorMailbox.acceptPromotion()) { tmLog.error(m_whoami + "rejoining site can not be promoted to leader. Terminating."); VoltDB.crashLocalVoltDB("A rejoining site can not be promoted to leader.", false, null); return; } m_initiatorMailbox.setRepairAlgo(repair); // term syslogs the start of leader promotion. Pair<Boolean, Long> result = repair.start().get(); success = result.getFirst(); if (success) { m_initiatorMailbox.setLeaderState(result.getSecond()); tmLog.info( m_whoami + "finished leader promotion. Took " + (System.currentTimeMillis() - startTime) + " ms."); // THIS IS where map cache should be updated, not // in the promotion algorithm. LeaderCacheWriter iv2masters = new LeaderCache(m_messenger.getZK(), m_zkMailboxNode); iv2masters.put(m_partitionId, m_initiatorMailbox.getHSId()); } else { // The only known reason to fail is a failed replica during // recovery; that's a bounded event (by k-safety). // CrashVoltDB here means one node failure causing another. // Don't create a cascading failure - just try again. tmLog.info( m_whoami + "interrupted during leader promotion after " + (System.currentTimeMillis() - startTime) + " ms. of " + "trying. Retrying."); } } // Tag along and become the export master too ExportManager.instance().acceptMastership(m_partitionId); } catch (Exception e) { VoltDB.crashLocalVoltDB("Terminally failed leader promotion.", true, e); } }
@Override public void run() { // Let the Export system read its configuration from the catalog. try { ExportManager.initialize( m_rvdb.m_myHostId, m_rvdb.m_catalogContext, m_isRejoin, m_rvdb.m_messenger, m_rvdb.m_partitionsToSitesAtStartupForExportInit); } catch (Throwable t) { VoltDB.crashLocalVoltDB("Error setting up export", true, t); } }
@Override public void noticedTopologyChange() { if (ExportManager.instance() != null) { ExportManager.instance().notifyOfClusterTopologyChange(); } }