public void handleCompleteTransactionMessage(CompleteTransactionMessage message) { if (m_isLeader) { CompleteTransactionMessage replmsg = new CompleteTransactionMessage(message); // Set the spHandle so that on repair the new master will set the max seen spHandle // correctly advanceTxnEgo(); replmsg.setSpHandle(getCurrentTxnId()); if (m_sendToHSIds.length > 0) { m_mailbox.send(m_sendToHSIds, replmsg); } } else { setMaxSeenTxnId(message.getSpHandle()); } TransactionState txn = m_outstandingTxns.get(message.getTxnId()); // We can currently receive CompleteTransactionMessages for multipart procedures // which only use the buddy site (replicated table read). Ignore them for // now, fix that later. if (txn != null) { Iv2Trace.logCompleteTransactionMessage(message, m_mailbox.getHSId()); final CompleteTransactionTask task = new CompleteTransactionTask(txn, m_pendingTasks, message, m_drGateway); queueOrOfferMPTask(task); // If this is a restart, then we need to leave the transaction state around if (!message.isRestart()) { m_outstandingTxns.remove(message.getTxnId()); } } }
// SpSchedulers will see FragmentTaskMessage for: // - The scatter fragment(s) of a multi-part transaction (normal or sysproc) // - Borrow tasks to do the local fragment work if this partition is the // buddy of the MPI. Borrow tasks may include input dependency tables for // aggregation fragments, or not, if it's a replicated table read. // For multi-batch MP transactions, we'll need to look up the transaction state // that gets created when the first batch arrives. // During command log replay a new SP handle is going to be generated, but it really // doesn't matter, it isn't going to be used for anything. void handleFragmentTaskMessage(FragmentTaskMessage message) { FragmentTaskMessage msg = message; long newSpHandle; if (m_isLeader) { // Quick hack to make progress...we need to copy the FragmentTaskMessage // before we start mucking with its state (SPHANDLE). We need to revisit // all the messaging mess at some point. msg = new FragmentTaskMessage( message.getInitiatorHSId(), message.getCoordinatorHSId(), message); // Not going to use the timestamp from the new Ego because the multi-part timestamp is what // should be used TxnEgo ego = advanceTxnEgo(); newSpHandle = ego.getTxnId(); msg.setSpHandle(newSpHandle); if (msg.getInitiateTask() != null) { msg.getInitiateTask().setSpHandle(newSpHandle); // set the handle msg.setInitiateTask( msg.getInitiateTask()); // Trigger reserialization so the new handle is used } /* * If there a replicas to send it to, forward it! * Unless... it's read only AND not a sysproc. Read only sysprocs may expect to be sent * everywhere. * In that case don't propagate it to avoid a determinism check and extra messaging overhead */ if (m_sendToHSIds.length > 0 && (!msg.isReadOnly() || msg.isSysProcTask())) { FragmentTaskMessage replmsg = new FragmentTaskMessage(m_mailbox.getHSId(), m_mailbox.getHSId(), msg); m_mailbox.send(m_sendToHSIds, replmsg); DuplicateCounter counter; /* * Non-determinism should be impossible to happen with MP fragments. * if you see "MP_DETERMINISM_ERROR" as procedure name in the crash logs * something has horribly gone wrong. */ if (message.getFragmentTaskType() != FragmentTaskMessage.SYS_PROC_PER_SITE) { counter = new DuplicateCounter( msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR"); } else { counter = new SysProcDuplicateCounter( msg.getCoordinatorHSId(), msg.getTxnId(), m_replicaHSIds, "MP_DETERMINISM_ERROR"); } m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter); } } else { newSpHandle = msg.getSpHandle(); setMaxSeenTxnId(newSpHandle); } Iv2Trace.logFragmentTaskMessage(message, m_mailbox.getHSId(), newSpHandle, false); doLocalFragmentOffer(msg); }
// BorrowTaskMessages encapsulate a FragmentTaskMessage along with // input dependency tables. The MPI issues borrows to a local site // to perform replicated reads or aggregation fragment work. private void handleBorrowTaskMessage(BorrowTaskMessage message) { // borrows do not advance the sp handle. The handle would // move backwards anyway once the next message is received // from the SP leader. long newSpHandle = getCurrentTxnId(); Iv2Trace.logFragmentTaskMessage( message.getFragmentTaskMessage(), m_mailbox.getHSId(), newSpHandle, true); TransactionState txn = m_outstandingTxns.get(message.getTxnId()); if (txn == null) { // If the borrow is the first fragment for a transaction, run it as // a single partition fragment; Must not engage/pause this // site on a MP transaction before the SP instructs to do so. // Do not track the borrow task as outstanding - it completes // immediately and is not a valid transaction state for // full MP participation (it claims everything can run as SP). txn = new BorrowTransactionState(newSpHandle, message); } if (message.getFragmentTaskMessage().isSysProcTask()) { final SysprocFragmentTask task = new SysprocFragmentTask( m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, message.getFragmentTaskMessage(), message.getInputDepMap()); m_pendingTasks.offer(task); } else { final FragmentTask task = new FragmentTask( m_mailbox, (ParticipantTransactionState) txn, m_pendingTasks, message.getFragmentTaskMessage(), message.getInputDepMap()); m_pendingTasks.offer(task); } }
// SpScheduler expects to see InitiateTaskMessages corresponding to single-partition // procedures only. public void handleIv2InitiateTaskMessage(Iv2InitiateTaskMessage message) { if (!message.isSinglePartition()) { throw new RuntimeException( "SpScheduler.handleIv2InitiateTaskMessage " + "should never receive multi-partition initiations."); } final String procedureName = message.getStoredProcedureName(); long newSpHandle; long uniqueId = Long.MIN_VALUE; Iv2InitiateTaskMessage msg = message; if (m_isLeader || message.isReadOnly()) { /* * A short circuit read is a read where the client interface is local to * this node. The CI will let a replica perform a read in this case and * it does looser tracking of client handles since it can't be * partitioned from the local replica. */ if (!m_isLeader && CoreUtils.getHostIdFromHSId(msg.getInitiatorHSId()) != CoreUtils.getHostIdFromHSId(m_mailbox.getHSId())) { VoltDB.crashLocalVoltDB("Only allowed to do short circuit reads locally", true, null); } /* * If this is for CL replay or DR, update the unique ID generator */ if (message.isForReplay()) { uniqueId = message.getUniqueId(); try { m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(uniqueId); } catch (Exception e) { hostLog.fatal(e.getMessage()); hostLog.fatal("Invocation: " + message); VoltDB.crashLocalVoltDB(e.getMessage(), true, e); } } else if (message.isForDR()) { uniqueId = message.getStoredProcedureInvocation().getOriginalUniqueId(); // @LoadSinglepartitionTable does not have a valid uid if (UniqueIdGenerator.getPartitionIdFromUniqueId(uniqueId) == m_partitionId) { m_uniqueIdGenerator.updateMostRecentlyGeneratedUniqueId(uniqueId); } } /* * If this is CL replay use the txnid from the CL and also * update the txnid to match the one from the CL */ if (message.isForReplay()) { newSpHandle = message.getTxnId(); setMaxSeenTxnId(newSpHandle); } else if (m_isLeader) { TxnEgo ego = advanceTxnEgo(); newSpHandle = ego.getTxnId(); uniqueId = m_uniqueIdGenerator.getNextUniqueId(); } else { /* * The short circuit read case. Since we are not a master * we can't create new transaction IDs, so reuse the last seen * txnid. For a timestamp, might as well give a reasonable one * for a read heavy workload so time isn't bursty. */ uniqueId = UniqueIdGenerator.makeIdFromComponents( Math.max(System.currentTimeMillis(), m_uniqueIdGenerator.lastUsedTime), 0, m_uniqueIdGenerator.partitionId); // Don't think it wise to make a new one for a short circuit read newSpHandle = getCurrentTxnId(); } // Need to set the SP handle on the received message // Need to copy this or the other local sites handling // the same initiate task message will overwrite each // other's memory -- the message isn't copied on delivery // to other local mailboxes. msg = new Iv2InitiateTaskMessage( message.getInitiatorHSId(), message.getCoordinatorHSId(), m_repairLogTruncationHandle, message.getTxnId(), message.getUniqueId(), message.isReadOnly(), message.isSinglePartition(), message.getStoredProcedureInvocation(), message.getClientInterfaceHandle(), message.getConnectionId(), message.isForReplay()); msg.setSpHandle(newSpHandle); // Also, if this is a vanilla single-part procedure, make the TXNID // be the SpHandle (for now) // Only system procedures are every-site, so we'll check through the SystemProcedureCatalog if (SystemProcedureCatalog.listing.get(procedureName) == null || !SystemProcedureCatalog.listing.get(procedureName).getEverysite()) { msg.setTxnId(newSpHandle); msg.setUniqueId(uniqueId); } // Don't replicate reads, this really assumes that DML validation // is going to be integrated soonish if (m_isLeader && !msg.isReadOnly() && m_sendToHSIds.length > 0) { Iv2InitiateTaskMessage replmsg = new Iv2InitiateTaskMessage( m_mailbox.getHSId(), m_mailbox.getHSId(), m_repairLogTruncationHandle, msg.getTxnId(), msg.getUniqueId(), msg.isReadOnly(), msg.isSinglePartition(), msg.getStoredProcedureInvocation(), msg.getClientInterfaceHandle(), msg.getConnectionId(), msg.isForReplay()); // Update the handle in the copy since the constructor doesn't set it replmsg.setSpHandle(newSpHandle); m_mailbox.send(m_sendToHSIds, replmsg); DuplicateCounter counter = new DuplicateCounter( msg.getInitiatorHSId(), msg.getTxnId(), m_replicaHSIds, msg.getStoredProcedureName()); m_duplicateCounters.put(new DuplicateCounterKey(msg.getTxnId(), newSpHandle), counter); } } else { setMaxSeenTxnId(msg.getSpHandle()); newSpHandle = msg.getSpHandle(); uniqueId = msg.getUniqueId(); } Iv2Trace.logIv2InitiateTaskMessage(message, m_mailbox.getHSId(), msg.getTxnId(), newSpHandle); doLocalInitiateOffer(msg); return; }