@Override public void run(SiteProcedureConnection siteConnection) { hostLog.debug("STARTING: " + this); if (!m_txnState.isReadOnly()) { // the truncation point token SHOULD be part of m_txn. However, the // legacy interaces don't work this way and IV2 hasn't changed this // ownership yet. But truncateUndoLog is written assuming the right // eventual encapsulation. siteConnection.truncateUndoLog( m_completeMsg.isRollback(), m_txnState.getBeginUndoToken(), m_txnState.m_spHandle, m_txnState.getUndoLog()); } if (!m_completeMsg.isRestart()) { doCommonSPICompleteActions(); // Log invocation to DR logToDR(siteConnection.getDRGateway()); hostLog.debug("COMPLETE: " + this); } else { // If we're going to restart the transaction, then reset the begin undo token so the // first FragmentTask will set it correctly. Otherwise, don't set the Done state or // flush the queue; we want the TransactionTaskQueue to stay blocked on this TXN ID // for the restarted fragments. m_txnState.setBeginUndoToken(Site.kInvalidUndoToken); hostLog.debug("RESTART: " + this); } final CompleteTransactionResponseMessage resp = new CompleteTransactionResponseMessage(m_completeMsg); resp.m_sourceHSId = m_initiator.getHSId(); m_initiator.deliver(resp); }
@Override public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog taskLog) throws IOException { if (!m_txnState.isReadOnly() && !m_completeMsg.isRollback()) { // ENG-5276: Need to set the last committed spHandle so that the rejoining site gets the // accurate // per-partition txnId set for the next snapshot. Normally, this is done through undo log // truncation. // Since the task is not run here, we need to set the last committed spHandle explicitly. // // How does this work? // - Blocking rejoin with idle cluster: The spHandle is updated here with the spHandle of the // stream // snapshot that transfers the rejoin data. So the snapshot right after rejoin should have // the spHandle // passed here. // - Live rejoin with idle cluster: Same as blocking rejoin. // - Live rejoin with workload: Transactions will be logged and replayed afterward. The // spHandle will be // updated when they commit and truncate undo logs. So at the end of replay, // the spHandle should have the latest value. If all replayed transactions rolled back, // the spHandle is still guaranteed to be the spHandle of the stream snapshot that // transfered the // rejoin data, which is the correct value. siteConnection.setSpHandleForSnapshotDigest(m_txnState.m_spHandle); } if (!m_completeMsg.isRestart()) { // future: offer to siteConnection.IBS for replay. doCommonSPICompleteActions(); } if (!m_txnState.isReadOnly()) { // We need to log the restarting message to the task log so we'll replay the whole // stream faithfully taskLog.logTask(m_completeMsg); } final CompleteTransactionResponseMessage resp = new CompleteTransactionResponseMessage(m_completeMsg); resp.setIsRecovering(true); resp.m_sourceHSId = m_initiator.getHSId(); m_initiator.deliver(resp); }
public long initialize(int sourceCount, FixedDBBPool bufferPool) { // Expect sourceCount number of EOFs at the end m_expectedEOFs.set(sourceCount); m_in = new StreamSnapshotDataReceiver(m_mb, bufferPool); m_inThread = new Thread(m_in, "Snapshot data receiver"); m_inThread.setDaemon(true); m_ack = new StreamSnapshotAckSender(m_mb); m_ackThread = new Thread(m_ack, "Snapshot ack sender"); m_inThread.start(); m_ackThread.start(); return m_mb.getHSId(); }
/** * Blocking transfer all partitioned table data and notify the coordinator. * * @param siteConnection */ private void runForBlockingDataTransfer(SiteProcedureConnection siteConnection) { boolean sourcesReady = false; RestoreWork restoreWork = m_dataSink.poll(m_snapshotBufferAllocator); if (restoreWork != null) { restoreBlock(restoreWork, siteConnection); sourcesReady = true; } // The completion monitor may fire even if m_dataSink has not reached EOF in the case that // there's no // replicated table in the database, so check for both conditions. if (m_dataSink.isEOF() || m_snapshotCompletionMonitor.isDone()) { // No more data from this data sink, close and remove it from the list m_dataSink.close(); if (m_streamSnapshotMb != null) { VoltDB.instance().getHostMessenger().removeMailbox(m_streamSnapshotMb.getHSId()); } JOINLOG.debug(m_whoami + " data transfer is finished"); if (m_snapshotCompletionMonitor.isDone()) { try { SnapshotCompletionEvent event = m_snapshotCompletionMonitor.get(); assert (event != null); JOINLOG.debug("P" + m_partitionId + " noticed data transfer completion"); m_completionAction.setSnapshotTxnId(event.multipartTxnId); setJoinComplete( siteConnection, event.exportSequenceNumbers, event.drSequenceNumbers, false /* requireExistingSequenceNumbers */); } catch (InterruptedException e) { // isDone() already returned true, this shouldn't happen VoltDB.crashLocalVoltDB("Impossible interruption happend", true, e); } catch (ExecutionException e) { VoltDB.crashLocalVoltDB("Error waiting for snapshot to finish", true, e); } } else { m_taskQueue.offer(this); } } else { // The sources are not set up yet, don't block the site, // return here and retry later. returnToTaskQueue(sourcesReady); } }
/** * An implementation of run() that does not block the site thread. The Site has responsibility for * transactions that occur between schedulings of this task. */ @Override public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog m_taskLog) throws IOException { RestoreWork rejoinWork = m_rejoinSiteProcessor.poll(m_snapshotBufferAllocator); if (rejoinWork != null) { restoreBlock(rejoinWork, siteConnection); } if (m_rejoinSiteProcessor.isEOF() == false) { m_taskQueue.offer(this); } else { REJOINLOG.debug(m_whoami + "Rejoin snapshot transfer is finished"); m_rejoinSiteProcessor.close(); Preconditions.checkNotNull(m_streamSnapshotMb); VoltDB.instance().getHostMessenger().removeMailbox(m_streamSnapshotMb.getHSId()); doFinishingTask(siteConnection); } }
private void createAndRegisterAckMailboxes( final Set<Integer> localPartitions, HostMessenger messenger) { m_zk = messenger.getZK(); m_mailboxesZKPath = VoltZK.exportGenerations + "/" + m_timestamp + "/" + "mailboxes"; m_mbox = new LocalMailbox(messenger) { @Override public void deliver(VoltMessage message) { if (message instanceof BinaryPayloadMessage) { BinaryPayloadMessage bpm = (BinaryPayloadMessage) message; ByteBuffer buf = ByteBuffer.wrap(bpm.m_payload); final int partition = buf.getInt(); final int length = buf.getInt(); byte stringBytes[] = new byte[length]; buf.get(stringBytes); String signature = new String(stringBytes, Constants.UTF8ENCODING); final long ackUSO = buf.getLong(); final HashMap<String, ExportDataSource> partitionSources = m_dataSourcesByPartition.get(partition); if (partitionSources == null) { exportLog.error( "Received an export ack for partition " + partition + " which does not exist on this node"); return; } final ExportDataSource eds = partitionSources.get(signature); if (eds == null) { exportLog.error( "Received an export ack for partition " + partition + " source signature " + signature + " which does not exist on this node"); return; } try { eds.ack(ackUSO); } catch (RejectedExecutionException ignoreIt) { // ignore it: as it is already shutdown } } else { exportLog.error("Receive unexpected message " + message + " in export subsystem"); } } }; messenger.createMailbox(null, m_mbox); for (Integer partition : localPartitions) { final String partitionDN = m_mailboxesZKPath + "/" + partition; ZKUtil.asyncMkdirs(m_zk, partitionDN); ZKUtil.StringCallback cb = new ZKUtil.StringCallback(); m_zk.create( partitionDN + "/" + m_mbox.getHSId(), null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, cb, null); } ListenableFuture<?> fut = m_childUpdatingThread.submit( new Runnable() { @Override public void run() { List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks = new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>(); for (Integer partition : localPartitions) { ZKUtil.ChildrenCallback callback = new ZKUtil.ChildrenCallback(); m_zk.getChildren( m_mailboxesZKPath + "/" + partition, constructMailboxChildWatcher(), callback, null); callbacks.add(Pair.of(partition, callback)); } for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) { final Integer partition = p.getFirst(); List<String> children = null; try { children = p.getSecond().getChildren(); } catch (InterruptedException e) { Throwables.propagate(e); } catch (KeeperException e) { Throwables.propagate(e); } ImmutableList.Builder<Long> mailboxes = ImmutableList.builder(); for (String child : children) { if (child.equals(Long.toString(m_mbox.getHSId()))) continue; mailboxes.add(Long.valueOf(child)); } ImmutableList<Long> mailboxHsids = mailboxes.build(); for (ExportDataSource eds : m_dataSourcesByPartition.get(partition).values()) { eds.updateAckMailboxes(Pair.of(m_mbox, mailboxHsids)); } } } }); try { fut.get(); } catch (Throwable t) { Throwables.propagate(t); } }