@Override public void run() { // if I'm the leader, send out the catalog if (m_rvdb.m_myHostId == m_rvdb.m_hostIdWithStartupCatalog) { try { // If no catalog was supplied provide an empty one. if (m_rvdb.m_pathToStartupCatalog == null) { try { File emptyJarFile = CatalogUtil.createTemporaryEmptyCatalogJarFile(); if (emptyJarFile == null) { VoltDB.crashLocalVoltDB("Failed to generate empty catalog."); } m_rvdb.m_pathToStartupCatalog = emptyJarFile.getAbsolutePath(); } catch (IOException e) { VoltDB.crashLocalVoltDB( "I/O exception while creating empty catalog jar file.", false, e); } } // Get the catalog bytes and byte count. byte[] catalogBytes = readCatalog(m_rvdb.m_pathToStartupCatalog); // Export needs a cluster global unique id for the initial catalog version long catalogUniqueId = UniqueIdGenerator.makeIdFromComponents( System.currentTimeMillis(), 0, MpInitiator.MP_INIT_PID); hostLog.debug(String.format("Sending %d catalog bytes", catalogBytes.length)); long catalogTxnId; catalogTxnId = TxnEgo.makeZero(MpInitiator.MP_INIT_PID).getTxnId(); // Need to get the deployment bytes from the starter catalog context byte[] deploymentBytes = m_rvdb.getCatalogContext().getDeploymentBytes(); // publish the catalog bytes to ZK CatalogUtil.updateCatalogToZK( m_rvdb.getHostMessenger().getZK(), 0, catalogTxnId, catalogUniqueId, catalogBytes, deploymentBytes); } catch (IOException e) { VoltDB.crashGlobalVoltDB("Unable to distribute catalog.", false, e); } catch (org.apache.zookeeper_voltpatches.KeeperException e) { VoltDB.crashGlobalVoltDB("Unable to publish catalog.", false, e); } catch (InterruptedException e) { VoltDB.crashGlobalVoltDB("Interrupted while publishing catalog.", false, e); } } }
/* * Returns true if the generatino was completely truncated away */ public boolean truncateExportToTxnId(long txnId, long[] perPartitionTxnIds) { // create an easy partitionId:txnId lookup. HashMap<Integer, Long> partitionToTxnId = new HashMap<Integer, Long>(); for (long tid : perPartitionTxnIds) { partitionToTxnId.put(TxnEgo.getPartitionId(tid), tid); } List<ListenableFuture<?>> tasks = new ArrayList<ListenableFuture<?>>(); // pre-iv2, the truncation point is the snapshot transaction id. // In iv2, truncation at the per-partition txn id recorded in the snapshot. for (HashMap<String, ExportDataSource> dataSources : m_dataSourcesByPartition.values()) { for (ExportDataSource source : dataSources.values()) { if (VoltDB.instance().isIV2Enabled()) { Long truncationPoint = partitionToTxnId.get(source.getPartitionId()); if (truncationPoint == null) { exportLog.error( "Snapshot " + txnId + " does not include truncation point for partition " + source.getPartitionId()); } else { tasks.add(source.truncateExportToTxnId(truncationPoint)); } } else { tasks.add(source.truncateExportToTxnId(txnId)); } } } try { Futures.allAsList(tasks).get(); } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unexpected exception truncating export data during snapshot restore. " + "You can back up export overflow data and start the " + "DB without it to get past this error", true, e); } return m_drainedSources.get() == m_numSources; }
@Override public void clientCallback(ClientResponse clientResponse) { // Track the result of the request (Success, Failure) long now = System.currentTimeMillis(); if (clientResponse.getStatus() == ClientResponse.SUCCESS) { TrackingResults.incrementAndGet(0); long txid = clientResponse.getResults()[0].asScalarLong(); final String trace = String.format("%d:%d:%d\n", m_rowid, txid, now); try { m_writer.write(TxnEgo.getPartitionId(txid), trace); } catch (IOException e) { e.printStackTrace(); } } else { TrackingResults.incrementAndGet(1); final String trace = String.format("%d:-1:%d\n", m_rowid, now); try { m_writer.write(-1, trace); } catch (IOException e) { e.printStackTrace(); } } }
/** * The only public method: do all the work to start a snapshot. Assumes that a snapshot is * feasible, that the caller has validated it can be accomplished, that the caller knows this is a * consistent or useful transaction point at which to snapshot. * * @param file_path * @param file_nonce * @param format * @param block * @param txnId * @param data * @param context * @param hostname * @return VoltTable describing the results of the snapshot attempt */ public VoltTable startSnapshotting( final String file_path, final String file_nonce, final SnapshotFormat format, final byte block, final long multiPartTxnId, final long partitionTxnId, final long legacyPerPartitionTxnIds[], final String data, final SystemProcedureExecutionContext context, final String hostname, final HashinatorSnapshotData hashinatorData, final long timestamp) { TRACE_LOG.trace("Creating snapshot target and handing to EEs"); final VoltTable result = SnapshotUtil.constructNodeResultsTable(); final int numLocalSites = context.getCluster().getDeployment().get("deployment").getSitesperhost(); // One site wins the race to create the snapshot targets, populating // m_taskListsForSites for the other sites and creating an appropriate // number of snapshot permits. synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) { SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set( new Runnable() { @Override public void run() { Map<Integer, Long> partitionTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds = m_partitionLastSeenTransactionIds; SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds); m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId); /* * Do a quick sanity check that the provided IDs * don't conflict with currently active partitions. If they do * it isn't fatal we can just skip it. */ for (long txnId : legacyPerPartitionTxnIds) { final int legacyPartition = TxnEgo.getPartitionId(txnId); if (partitionTransactionIds.containsKey(legacyPartition)) { SNAP_LOG.warn( "While saving a snapshot and propagating legacy " + "transaction ids found an id that matches currently active partition" + partitionTransactionIds.get(legacyPartition)); } else { partitionTransactionIds.put(legacyPartition, txnId); } } exportSequenceNumbers = SnapshotSiteProcessor.getExportSequenceNumbers(); createSetupIv2( file_path, file_nonce, format, multiPartTxnId, partitionTransactionIds, data, context, result, exportSequenceNumbers, context.getSiteTrackerForSnapshot(), hashinatorData, timestamp); } }); // Create a barrier to use with the current number of sites to wait for // or if the barrier is already set up check if it is broken and reset if necessary SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites); // From within this EE, record the sequence numbers as of the start of the snapshot (now) // so that the info can be put in the digest. SnapshotSiteProcessor.populateExportSequenceNumbersForExecutionSite(context); SNAP_LOG.debug( "Registering transaction id " + partitionTxnId + " for " + TxnEgo.getPartitionId(partitionTxnId)); m_partitionLastSeenTransactionIds.put(TxnEgo.getPartitionId(partitionTxnId), partitionTxnId); } boolean runPostTasks = false; VoltTable earlyResultTable = null; try { SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await(); try { synchronized (m_createLock) { SNAP_LOG.debug( "Found tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet())); SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId())); Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId()); // If createSetup failed, then the first site to reach here is going // to send the results table generated by createSetup, and then empty out the table. // All other sites to reach here will send the appropriate empty table. // If createSetup was a success but the taskList is null, then we'll use the block // switch to figure out what flavor of empty SnapshotSave result table to return. if (!m_createSuccess.get()) { // There shouldn't be any work for any site if we failed assert (m_taskListsForHSIds.isEmpty()); VoltTable finalresult = m_createResult.get(); if (finalresult != null) { m_createResult.set(null); earlyResultTable = finalresult; } else { // We returned a non-empty NodeResultsTable with the failures in it, // every other site needs to return a NodeResultsTable as well. earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else if (taskList == null) { SNAP_LOG.debug("No task for this site, block " + block); // This node is participating in the snapshot but this site has nothing to do. // Send back an appropriate empty table based on the block flag if (block != 0) { runPostTasks = true; earlyResultTable = SnapshotUtil.constructPartitionResultsTable(); earlyResultTable.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), "SUCCESS", ""); } else { earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else { context .getSiteSnapshotConnection() .initiateSnapshots(format, taskList, multiPartTxnId, exportSequenceNumbers); } if (m_deferredSetupFuture != null) { // Add a listener to the deferred setup so that it can kick off the snapshot // task once the setup is done. m_deferredSetupFuture.addListener( new Runnable() { @Override public void run() { DeferredSnapshotSetup deferredSnapshotSetup = null; try { deferredSnapshotSetup = m_deferredSetupFuture.get(); } catch (Exception e) { // it doesn't throw } assert deferredSnapshotSetup != null; context .getSiteSnapshotConnection() .startSnapshotWithTargets( deferredSnapshotSetup.getPlan().getSnapshotDataTargets()); } }, CoreUtils.SAMETHREADEXECUTOR); } } } finally { SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS); } } catch (TimeoutException e) { VoltDB.crashLocalVoltDB( "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null); } catch (InterruptedException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } catch (BrokenBarrierException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } // If earlyResultTable is set, return here if (earlyResultTable != null) { if (runPostTasks) { // Need to run post-snapshot tasks before finishing SnapshotSiteProcessor.runPostSnapshotTasks(context); } return earlyResultTable; } if (block != 0) { HashSet<Exception> failures = Sets.newHashSet(); String status = "SUCCESS"; String err = ""; try { // For blocking snapshot, propogate the error from deferred setup back to the client final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get(); if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) { status = "FAILURE"; err = deferredSnapshotSetup.getError().toString(); failures.add(deferredSnapshotSetup.getError()); } failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork()); SnapshotSiteProcessor.runPostSnapshotTasks(context); } catch (Exception e) { status = "FAILURE"; err = e.toString(); failures.add(e); } final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable(); if (failures.isEmpty()) { blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } else { status = "FAILURE"; for (Exception e : failures) { err = e.toString(); } blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } return blockingResult; } return result; }