@Override public void setRejoinComplete(RejoinProducer.ReplayCompletionAction replayComplete) { // transition from kStateRejoining to live rejoin replay. // pass through this transition in all cases; if not doing // live rejoin, will transfer to kStateRunning as usual // as the rejoin task log will be empty. assert (m_rejoinState == kStateRejoining); if (replayComplete == null) { throw new RuntimeException("Null Replay Complete Action."); } m_rejoinState = kStateReplayingRejoin; m_replayCompletionAction = replayComplete; if (m_rejoinTaskLog != null) { m_rejoinTaskLog.setEarliestTxnId(m_replayCompletionAction.getSnapshotTxnId()); } }
@Override public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog taskLog) throws IOException { if (!m_txnState.isReadOnly() && !m_completeMsg.isRollback()) { // ENG-5276: Need to set the last committed spHandle so that the rejoining site gets the // accurate // per-partition txnId set for the next snapshot. Normally, this is done through undo log // truncation. // Since the task is not run here, we need to set the last committed spHandle explicitly. // // How does this work? // - Blocking rejoin with idle cluster: The spHandle is updated here with the spHandle of the // stream // snapshot that transfers the rejoin data. So the snapshot right after rejoin should have // the spHandle // passed here. // - Live rejoin with idle cluster: Same as blocking rejoin. // - Live rejoin with workload: Transactions will be logged and replayed afterward. The // spHandle will be // updated when they commit and truncate undo logs. So at the end of replay, // the spHandle should have the latest value. If all replayed transactions rolled back, // the spHandle is still guaranteed to be the spHandle of the stream snapshot that // transfered the // rejoin data, which is the correct value. siteConnection.setSpHandleForSnapshotDigest(m_txnState.m_spHandle); } if (!m_completeMsg.isRestart()) { // future: offer to siteConnection.IBS for replay. doCommonSPICompleteActions(); } if (!m_txnState.isReadOnly()) { // We need to log the restarting message to the task log so we'll replay the whole // stream faithfully taskLog.logTask(m_completeMsg); } final CompleteTransactionResponseMessage resp = new CompleteTransactionResponseMessage(m_completeMsg); resp.setIsRecovering(true); resp.m_sourceHSId = m_initiator.getHSId(); m_initiator.deliver(resp); }
void replayFromTaskLog() throws IOException { // not yet time to catch-up. if (m_rejoinState != kStateReplayingRejoin) { return; } // replay 10:1 in favor of replay for (int i = 0; i < 10; ++i) { if (m_rejoinTaskLog.isEmpty()) { break; } TransactionInfoBaseMessage tibm = m_rejoinTaskLog.getNextMessage(); if (tibm == null) { break; } // Apply the readonly / sysproc filter. With Iv2 read optimizations, // reads should not reach here; the cost of post-filtering shouldn't // be particularly high (vs pre-filtering). if (filter(tibm)) { continue; } if (tibm instanceof Iv2InitiateTaskMessage) { Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage) tibm; SpProcedureTask t = new SpProcedureTask(m_initiatorMailbox, m.getStoredProcedureName(), null, m, null); t.runFromTaskLog(this); } else if (tibm instanceof FragmentTaskMessage) { FragmentTaskMessage m = (FragmentTaskMessage) tibm; if (global_replay_mpTxn == null) { global_replay_mpTxn = new ParticipantTransactionState(m.getTxnId(), m); } else if (global_replay_mpTxn.txnId != m.getTxnId()) { VoltDB.crashLocalVoltDB( "Started a MP transaction during replay before completing " + " open transaction.", false, null); } FragmentTask t = new FragmentTask(m_initiatorMailbox, m, global_replay_mpTxn); t.runFromTaskLog(this); } else if (tibm instanceof CompleteTransactionMessage) { // Needs improvement: completes for sysprocs aren't filterable as sysprocs. // Only complete transactions that are open... if (global_replay_mpTxn != null) { CompleteTransactionMessage m = (CompleteTransactionMessage) tibm; CompleteTransactionTask t = new CompleteTransactionTask(global_replay_mpTxn, null, m, null); if (!m.isRestart()) { global_replay_mpTxn = null; } t.runFromTaskLog(this); } } else { VoltDB.crashLocalVoltDB( "Can not replay message type " + tibm + " during live rejoin. Unexpected error.", false, null); } } // exit replay being careful not to exit in the middle of a multi-partititon // transaction. The SPScheduler doesn't have a valid transaction state for a // partially replayed MP txn and in case of rollback the scheduler's undo token // is wrong. Run MP txns fully kStateRejoining or fully kStateRunning. if (m_rejoinTaskLog.isEmpty() && global_replay_mpTxn == null) { setReplayRejoinComplete(); } }