private void processRejoiningFragmentWork( FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) { assert (ftask.getFragmentCount() > 0); assert (m_rejoinState == RejoinState.REJOINING); FragmentResponseMessage response = new FragmentResponseMessage(ftask, m_hsId); response.setRecovering(true); response.setStatus(FragmentResponseMessage.SUCCESS, null); // log the work done for replay if (!ftask.isReadOnly() && !ftask.isSysProcTask()) { assert (m_notice.isReadOnly() == false); assert (m_loggedFragments != null); m_loggedFragments.appendFragmentTask(ftask); } // add a dummy table for all of the expected dependency ids for (int i = 0; i < ftask.getFragmentCount(); i++) { response.addDependency( ftask.getOutputDepId(i), new VoltTable(new VoltTable.ColumnInfo("DUMMY", VoltType.BIGINT))); } m_mbox.send(response.getDestinationSiteId(), response); // If we're not the coordinator, the transaction is read-only, // and this was the final task, then we can try to move on after // we've finished this work. if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) { m_done = true; } }
@Override public void processRemoteWorkResponse(FragmentResponseMessage response) { // if we've already decided that we're rolling back, then we just // want to discard any incoming FragmentResponses that were // possibly in flight if (m_needsRollback) { return; } if (response.getStatusCode() != FragmentResponseMessage.SUCCESS) { if (m_missingDependencies != null) m_missingDependencies.clear(); m_readyWorkUnits.clear(); if (m_isCoordinator) { // throw an exception which will back the runtime all the way // to the stored procedure invocation call, triggering undo // at that point if (response.getException() != null) { throw response.getException(); } else { throw new FragmentFailureException(); } } else { m_needsRollback = true; m_done = true; } } processFragmentResponseDependencies(response); }
public static void logInitiatorRxMsg(VoltMessage msg, long localHSId) { if (iv2log.isTraceEnabled()) { if (msg instanceof InitiateResponseMessage) { InitiateResponseMessage iresp = (InitiateResponseMessage) msg; String logmsg = "rxInitRsp %s from %s ciHandle %s txnId %s spHandle %s status %s"; iv2log.trace( String.format( logmsg, CoreUtils.hsIdToString(localHSId), CoreUtils.hsIdToString(iresp.m_sourceHSId), ClientInterfaceHandleManager.handleToString(iresp.getClientInterfaceHandle()), txnIdToString(iresp.getTxnId()), txnIdToString(iresp.getSpHandle()), respStatusToString(iresp.getClientResponseData().getStatus()))); } else if (msg instanceof FragmentResponseMessage) { FragmentResponseMessage fresp = (FragmentResponseMessage) msg; String logmsg = "rxFragRsp %s from %s txnId %s spHandle %s status %s"; iv2log.trace( String.format( logmsg, CoreUtils.hsIdToString(localHSId), CoreUtils.hsIdToString(fresp.m_sourceHSId), txnIdToString(fresp.getTxnId()), txnIdToString(fresp.getSpHandle()), fragStatusToString(fresp.getStatusCode()))); } } }
// This is going to run in the BabySitter's thread. This and deliver are synchronized by // virtue of both being called on InitiatorMailbox and not directly called. // (That is, InitiatorMailbox's API, used by BabySitter, is synchronized on the same // lock deliver() is synchronized on.) @Override public void updateReplicas(List<Long> replicas) { // First - correct the official replica set. m_replicaHSIds = replicas; // Update the list of remote replicas that we'll need to send to List<Long> sendToHSIds = new ArrayList<Long>(m_replicaHSIds); sendToHSIds.remove(m_mailbox.getHSId()); m_sendToHSIds = Longs.toArray(sendToHSIds); // Cleanup duplicate counters and collect DONE counters // in this list for further processing. List<DuplicateCounterKey> doneCounters = new LinkedList<DuplicateCounterKey>(); for (Entry<DuplicateCounterKey, DuplicateCounter> entry : m_duplicateCounters.entrySet()) { DuplicateCounter counter = entry.getValue(); int result = counter.updateReplicas(m_replicaHSIds); if (result == DuplicateCounter.DONE) { doneCounters.add(entry.getKey()); } } // Maintain the CI invariant that responses arrive in txnid order. Collections.sort(doneCounters); for (DuplicateCounterKey key : doneCounters) { DuplicateCounter counter = m_duplicateCounters.remove(key); VoltMessage resp = counter.getLastResponse(); if (resp != null) { // MPI is tracking deps per partition HSID. We need to make // sure we write ours into the message getting sent to the MPI if (resp instanceof FragmentResponseMessage) { FragmentResponseMessage fresp = (FragmentResponseMessage) resp; fresp.setExecutorSiteId(m_mailbox.getHSId()); } m_mailbox.send(counter.m_destinationId, resp); } else { hostLog.warn( "TXN " + counter.getTxnId() + " lost all replicas and " + "had no responses. This should be impossible?"); } } writeIv2ViableReplayEntry(); }
// Eventually, the master for a partition set will need to be able to dedupe // FragmentResponses from its replicas. public void handleFragmentResponseMessage(FragmentResponseMessage message) { // Send the message to the duplicate counter, if any DuplicateCounter counter = m_duplicateCounters.get(new DuplicateCounterKey(message.getTxnId(), message.getSpHandle())); if (counter != null) { int result = counter.offer(message); if (result == DuplicateCounter.DONE) { m_duplicateCounters.remove( new DuplicateCounterKey(message.getTxnId(), message.getSpHandle())); m_repairLogTruncationHandle = message.getSpHandle(); FragmentResponseMessage resp = (FragmentResponseMessage) counter.getLastResponse(); // MPI is tracking deps per partition HSID. We need to make // sure we write ours into the message getting sent to the MPI resp.setExecutorSiteId(m_mailbox.getHSId()); m_mailbox.send(counter.m_destinationId, resp); } else if (result == DuplicateCounter.MISMATCH) { VoltDB.crashLocalVoltDB("HASH MISMATCH running multi-part procedure.", true, null); } // doing duplicate suppresion: all done. return; } m_mailbox.send(message.getDestinationSiteId(), message); }
private void processFragmentResponseDependencies(FragmentResponseMessage response) { int depCount = response.getTableCount(); for (int i = 0; i < depCount; i++) { int dependencyId = response.getTableDependencyIdAtIndex(i); VoltTable payload = response.getTableAtIndex(i); assert (payload != null); // if we're getting a dependency, i hope we know about it assert (m_missingDependencies != null); WorkUnit w = m_missingDependencies.get(dependencyId); if (w == null) { String msg = "Unable to find WorkUnit for dependency: " + dependencyId + " as part of TXN: " + txnId + " received from execution site: " + response.getExecutorSiteId(); hostLog.warn(msg); // throw new FragmentFailureException(); return; } // if the node is recovering, it doesn't matter if the payload matches if (response.isRecovering()) { w.putDummyDependency(dependencyId, response.getExecutorSiteId()); } else { w.putDependency( dependencyId, response.getExecutorSiteId(), payload, m_site.getSiteTracker()); } if (w.allDependenciesSatisfied()) { handleWorkUnitComplete(w); } } }
void processFragmentWork( FragmentTaskMessage ftask, HashMap<Integer, List<VoltTable>> dependencies) { assert (ftask.getFragmentCount() > 0); FragmentResponseMessage response = m_site.processFragmentTask(this, dependencies, ftask); if (response.getStatusCode() != FragmentResponseMessage.SUCCESS) { if (m_missingDependencies != null) m_missingDependencies.clear(); m_readyWorkUnits.clear(); if (m_isCoordinator) { // throw an exception which will back the runtime all the way // to the stored procedure invocation call, triggering undo // at that point if (response.getException() != null) { throw response.getException(); } else { throw new FragmentFailureException(); } } else { m_needsRollback = true; m_done = true; } } if (m_isCoordinator && (response.getDestinationSiteId() == response.getExecutorSiteId())) { processFragmentResponseDependencies(response); } else { m_mbox.send(response.getDestinationSiteId(), response); // If we're not the coordinator, the transaction is read-only, // and this was the final task, then we can try to move on after // we've finished this work. if (!isCoordinator() && isReadOnly() && ftask.isFinalTask()) { m_done = true; } } }
// Currently emulates the code in ProcedureRunner.slowPath() // So any change to how that stuff is built will need to // be reflected here MpTestPlan createTestPlan( int batchSize, boolean readOnly, boolean replicatedTable, boolean rollback, long[] remoteHSIds) throws IOException { boolean single_frag = readOnly && replicatedTable; MpTestPlan plan = new MpTestPlan(); List<Integer> distributedOutputDepIds = new ArrayList<Integer>(); List<Integer> depsToResumeList = new ArrayList<Integer>(); List<Integer> depsForLocalTask = new ArrayList<Integer>(); for (int i = 0; i < batchSize; i++) { // each SQL statement in the batch gets an output dep ID // which corresponds to a local fragment ID depsToResumeList.add(i); // each local fragment ID needs an input dep. If this is // not replicated read only, generate a new value and add it to // the distributed output deps if (!single_frag) { // take the dep and add 1000 depsForLocalTask.add(i + 1000); distributedOutputDepIds.add(i + 1000); } else { depsForLocalTask.add(-1); } } // store resume dependencies in the MpTestPlan for later. plan.depsToResume = depsToResumeList; // generate remote task with output IDs, fill in lists appropriately plan.remoteWork = new FragmentTaskMessage( Long.MIN_VALUE, // try not to care? Long.MIN_VALUE, // try not to care? Long.MIN_VALUE, // try not to care? 1234l, // magic, change if it matters readOnly, false, false); // IV2 doesn't use final task (yet) for (int i = 0; i < distributedOutputDepIds.size(); i++) { plan.remoteWork.addFragment( Long.MIN_VALUE, distributedOutputDepIds.get(i), createDummyParameterSet()); } System.out.println("REMOTE TASK: " + plan.remoteWork.toString()); if (!single_frag) { // generate a remote fragment response for each remote message for (int i = 0; i < remoteHSIds.length; i++) { FragmentResponseMessage resp = new FragmentResponseMessage(plan.remoteWork, remoteHSIds[i]); if (rollback && i == (remoteHSIds.length - 1)) { resp.setStatus(FragmentResponseMessage.UNEXPECTED_ERROR, new EEException(1234)); } else { resp.setStatus(FragmentResponseMessage.SUCCESS, null); for (int j = 0; j < distributedOutputDepIds.size(); j++) { resp.addDependency( distributedOutputDepIds.get(j), new VoltTable(new VoltTable.ColumnInfo("BOGO", VoltType.BIGINT))); } } System.out.println("RESPONSE: " + resp); plan.generatedResponses.add(resp); } } // generate local task with new output IDs, use above outputs as inputs, if any plan.localWork = new FragmentTaskMessage( Long.MIN_VALUE, // try not to care Long.MIN_VALUE, Long.MIN_VALUE, 1234l, readOnly, false, false); for (int i = 0; i < batchSize; i++) { plan.localWork.addFragment(0L, depsToResumeList.get(i), createDummyParameterSet()); } for (int i = 0; i < depsForLocalTask.size(); i++) { if (depsForLocalTask.get(i) < 0) continue; plan.localWork.addInputDepId(i, depsForLocalTask.get(i)); } // create the FragmentResponse for the BorrowTask FragmentResponseMessage resp = new FragmentResponseMessage(plan.remoteWork, remoteHSIds[0]); resp.setStatus(FragmentResponseMessage.SUCCESS, null); for (int j = 0; j < batchSize; j++) { resp.addDependency( depsToResumeList.get(j), new VoltTable(new VoltTable.ColumnInfo("BOGO", VoltType.BIGINT))); } System.out.println("BORROW RESPONSE: " + resp); plan.generatedResponses.add(resp); System.out.println("LOCAL TASK: " + plan.localWork.toString()); return plan; }