private static Map<Integer, Long> findLocalSources( Collection<IndexSnapshotRequestConfig.PartitionRanges> partitionRanges, SiteTracker tracker) { Set<Integer> partitions = Sets.newHashSet(); for (IndexSnapshotRequestConfig.PartitionRanges partitionRange : partitionRanges) { partitions.add(partitionRange.partitionId); } Map<Integer, Long> pidToLocalHSId = Maps.newHashMap(); List<Long> localSites = Longs.asList(tracker.getLocalSites()); for (long hsId : localSites) { int pid = tracker.getPartitionForSite(hsId); if (partitions.contains(pid)) { pidToLocalHSId.put(pid, hsId); } } return pidToLocalHSId; }
@Override public Set<Service> get() { return Sets.newLinkedHashSet(); }
/** * The only public method: do all the work to start a snapshot. Assumes that a snapshot is * feasible, that the caller has validated it can be accomplished, that the caller knows this is a * consistent or useful transaction point at which to snapshot. * * @param file_path * @param file_nonce * @param format * @param block * @param txnId * @param data * @param context * @param hostname * @return VoltTable describing the results of the snapshot attempt */ public VoltTable startSnapshotting( final String file_path, final String file_nonce, final SnapshotFormat format, final byte block, final long multiPartTxnId, final long partitionTxnId, final long legacyPerPartitionTxnIds[], final String data, final SystemProcedureExecutionContext context, final String hostname, final HashinatorSnapshotData hashinatorData, final long timestamp) { TRACE_LOG.trace("Creating snapshot target and handing to EEs"); final VoltTable result = SnapshotUtil.constructNodeResultsTable(); final int numLocalSites = context.getCluster().getDeployment().get("deployment").getSitesperhost(); // One site wins the race to create the snapshot targets, populating // m_taskListsForSites for the other sites and creating an appropriate // number of snapshot permits. synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) { SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set( new Runnable() { @Override public void run() { Map<Integer, Long> partitionTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds = m_partitionLastSeenTransactionIds; SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds); m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId); /* * Do a quick sanity check that the provided IDs * don't conflict with currently active partitions. If they do * it isn't fatal we can just skip it. */ for (long txnId : legacyPerPartitionTxnIds) { final int legacyPartition = TxnEgo.getPartitionId(txnId); if (partitionTransactionIds.containsKey(legacyPartition)) { SNAP_LOG.warn( "While saving a snapshot and propagating legacy " + "transaction ids found an id that matches currently active partition" + partitionTransactionIds.get(legacyPartition)); } else { partitionTransactionIds.put(legacyPartition, txnId); } } exportSequenceNumbers = SnapshotSiteProcessor.getExportSequenceNumbers(); createSetupIv2( file_path, file_nonce, format, multiPartTxnId, partitionTransactionIds, data, context, result, exportSequenceNumbers, context.getSiteTrackerForSnapshot(), hashinatorData, timestamp); } }); // Create a barrier to use with the current number of sites to wait for // or if the barrier is already set up check if it is broken and reset if necessary SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites); // From within this EE, record the sequence numbers as of the start of the snapshot (now) // so that the info can be put in the digest. SnapshotSiteProcessor.populateExportSequenceNumbersForExecutionSite(context); SNAP_LOG.debug( "Registering transaction id " + partitionTxnId + " for " + TxnEgo.getPartitionId(partitionTxnId)); m_partitionLastSeenTransactionIds.put(TxnEgo.getPartitionId(partitionTxnId), partitionTxnId); } boolean runPostTasks = false; VoltTable earlyResultTable = null; try { SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await(); try { synchronized (m_createLock) { SNAP_LOG.debug( "Found tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet())); SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId())); Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId()); // If createSetup failed, then the first site to reach here is going // to send the results table generated by createSetup, and then empty out the table. // All other sites to reach here will send the appropriate empty table. // If createSetup was a success but the taskList is null, then we'll use the block // switch to figure out what flavor of empty SnapshotSave result table to return. if (!m_createSuccess.get()) { // There shouldn't be any work for any site if we failed assert (m_taskListsForHSIds.isEmpty()); VoltTable finalresult = m_createResult.get(); if (finalresult != null) { m_createResult.set(null); earlyResultTable = finalresult; } else { // We returned a non-empty NodeResultsTable with the failures in it, // every other site needs to return a NodeResultsTable as well. earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else if (taskList == null) { SNAP_LOG.debug("No task for this site, block " + block); // This node is participating in the snapshot but this site has nothing to do. // Send back an appropriate empty table based on the block flag if (block != 0) { runPostTasks = true; earlyResultTable = SnapshotUtil.constructPartitionResultsTable(); earlyResultTable.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), "SUCCESS", ""); } else { earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else { context .getSiteSnapshotConnection() .initiateSnapshots(format, taskList, multiPartTxnId, exportSequenceNumbers); } if (m_deferredSetupFuture != null) { // Add a listener to the deferred setup so that it can kick off the snapshot // task once the setup is done. m_deferredSetupFuture.addListener( new Runnable() { @Override public void run() { DeferredSnapshotSetup deferredSnapshotSetup = null; try { deferredSnapshotSetup = m_deferredSetupFuture.get(); } catch (Exception e) { // it doesn't throw } assert deferredSnapshotSetup != null; context .getSiteSnapshotConnection() .startSnapshotWithTargets( deferredSnapshotSetup.getPlan().getSnapshotDataTargets()); } }, CoreUtils.SAMETHREADEXECUTOR); } } } finally { SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS); } } catch (TimeoutException e) { VoltDB.crashLocalVoltDB( "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null); } catch (InterruptedException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } catch (BrokenBarrierException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } // If earlyResultTable is set, return here if (earlyResultTable != null) { if (runPostTasks) { // Need to run post-snapshot tasks before finishing SnapshotSiteProcessor.runPostSnapshotTasks(context); } return earlyResultTable; } if (block != 0) { HashSet<Exception> failures = Sets.newHashSet(); String status = "SUCCESS"; String err = ""; try { // For blocking snapshot, propogate the error from deferred setup back to the client final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get(); if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) { status = "FAILURE"; err = deferredSnapshotSetup.getError().toString(); failures.add(deferredSnapshotSetup.getError()); } failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork()); SnapshotSiteProcessor.runPostSnapshotTasks(context); } catch (Exception e) { status = "FAILURE"; err = e.toString(); failures.add(e); } final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable(); if (failures.isEmpty()) { blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } else { status = "FAILURE"; for (Exception e : failures) { err = e.toString(); } blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } return blockingResult; } return result; }