@Override public void run() { CatalogAndIds catalogStuff = null; do { try { catalogStuff = CatalogUtil.getCatalogFromZK(m_rvdb.getHostMessenger().getZK()); } catch (org.apache.zookeeper_voltpatches.KeeperException.NoNodeException e) { } catch (Exception e) { VoltDB.crashLocalVoltDB( "System was interrupted while waiting for a catalog.", false, null); } } while (catalogStuff == null || catalogStuff.catalogBytes.length == 0); String serializedCatalog = null; byte[] catalogJarBytes = catalogStuff.catalogBytes; try { Pair<InMemoryJarfile, String> loadResults = CatalogUtil.loadAndUpgradeCatalogFromJar(catalogStuff.catalogBytes); serializedCatalog = CatalogUtil.getSerializedCatalogStringFromJar(loadResults.getFirst()); catalogJarBytes = loadResults.getFirst().getFullJarBytes(); } catch (IOException e) { VoltDB.crashLocalVoltDB("Unable to load catalog", false, e); } if ((serializedCatalog == null) || (serializedCatalog.length() == 0)) VoltDB.crashLocalVoltDB("Catalog loading failure", false, null); /* N.B. node recovery requires discovering the current catalog version. */ Catalog catalog = new Catalog(); catalog.execute(serializedCatalog); serializedCatalog = null; // note if this fails it will print an error first // This is where we compile real catalog and create runtime // catalog context. To validate deployment we compile and create // a starter context which uses a placeholder catalog. String result = CatalogUtil.compileDeployment(catalog, m_deployment, false); if (result != null) { hostLog.fatal(result); VoltDB.crashLocalVoltDB(result); } try { m_rvdb.m_catalogContext = new CatalogContext( catalogStuff.txnId, catalogStuff.uniqueId, catalog, catalogJarBytes, // Our starter catalog has set the deployment stuff, just yoink it out for now m_rvdb.m_catalogContext.getDeploymentBytes(), catalogStuff.version, -1); } catch (Exception e) { VoltDB.crashLocalVoltDB("Error agreeing on starting catalog version", true, e); } }
@Override public void run() { // if I'm the leader, send out the catalog if (m_rvdb.m_myHostId == m_rvdb.m_hostIdWithStartupCatalog) { try { // If no catalog was supplied provide an empty one. if (m_rvdb.m_pathToStartupCatalog == null) { try { File emptyJarFile = CatalogUtil.createTemporaryEmptyCatalogJarFile(); if (emptyJarFile == null) { VoltDB.crashLocalVoltDB("Failed to generate empty catalog."); } m_rvdb.m_pathToStartupCatalog = emptyJarFile.getAbsolutePath(); } catch (IOException e) { VoltDB.crashLocalVoltDB( "I/O exception while creating empty catalog jar file.", false, e); } } // Get the catalog bytes and byte count. byte[] catalogBytes = readCatalog(m_rvdb.m_pathToStartupCatalog); // Export needs a cluster global unique id for the initial catalog version long catalogUniqueId = UniqueIdGenerator.makeIdFromComponents( System.currentTimeMillis(), 0, MpInitiator.MP_INIT_PID); hostLog.debug(String.format("Sending %d catalog bytes", catalogBytes.length)); long catalogTxnId; catalogTxnId = TxnEgo.makeZero(MpInitiator.MP_INIT_PID).getTxnId(); // Need to get the deployment bytes from the starter catalog context byte[] deploymentBytes = m_rvdb.getCatalogContext().getDeploymentBytes(); // publish the catalog bytes to ZK CatalogUtil.updateCatalogToZK( m_rvdb.getHostMessenger().getZK(), 0, catalogTxnId, catalogUniqueId, catalogBytes, deploymentBytes); } catch (IOException e) { VoltDB.crashGlobalVoltDB("Unable to distribute catalog.", false, e); } catch (org.apache.zookeeper_voltpatches.KeeperException e) { VoltDB.crashGlobalVoltDB("Unable to publish catalog.", false, e); } catch (InterruptedException e) { VoltDB.crashGlobalVoltDB("Interrupted while publishing catalog.", false, e); } } }
/** * Create the completion node for the snapshot identified by the txnId. It assumes that all hosts * will race to call this, so it doesn't fail if the node already exists. * * @param nonce Nonce of the snapshot * @param txnId * @param hostId The local host ID * @param isTruncation Whether or not this is a truncation snapshot * @param truncReqId Optional unique ID fed back to the monitor for identification * @return true if the node is created successfully, false if the node already exists. */ public static ZKUtil.StringCallback createSnapshotCompletionNode( String path, String nonce, long txnId, boolean isTruncation, String truncReqId) { if (!(txnId > 0)) { VoltDB.crashGlobalVoltDB("Txnid must be greather than 0", true, null); } byte nodeBytes[] = null; try { JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("txnId").value(txnId); stringer.key("isTruncation").value(isTruncation); stringer.key("didSucceed").value(false); stringer.key("hostCount").value(-1); stringer.key("path").value(path); stringer.key("nonce").value(nonce); stringer.key("truncReqId").value(truncReqId); stringer.key("exportSequenceNumbers").object().endObject(); stringer.endObject(); JSONObject jsonObj = new JSONObject(stringer.toString()); nodeBytes = jsonObj.toString(4).getBytes(Charsets.UTF_8); } catch (Exception e) { VoltDB.crashLocalVoltDB("Error serializing snapshot completion node JSON", true, e); } ZKUtil.StringCallback cb = new ZKUtil.StringCallback(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; VoltDB.instance() .getHostMessenger() .getZK() .create(snapshotPath, nodeBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, cb, null); return cb; }
void shutdown() { try { m_leaderElector.shutdown(); } catch (Exception e) { VoltDB.crashLocalVoltDB("Error shutting down GlobalServiceElector's LeaderElector", true, e); } }
/** Add a service to be notified if this node becomes the global leader */ synchronized void registerService(Promotable service) { m_services.add(service); if (m_isLeader) { try { service.acceptPromotion(); } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to promote global service.", true, e); } } }
/** * Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to * determine whether the snapshot has finished or not. * * <p>This should only be called when all participants have responded. It is possible that some * hosts finish taking snapshot before the coordinator logs the participating host count. In this * case, the host count would have been decremented multiple times already. To make sure finished * hosts are logged correctly, this method adds participating host count + 1 to the current host * count. * * @param txnId The snapshot txnId * @param participantCount The number of hosts participating in this snapshot */ public static void logParticipatingHostCount(long txnId, int participantCount) { ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; boolean success = false; while (!success) { Stat stat = new Stat(); byte data[] = null; try { data = zk.getData(snapshotPath, false, stat); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { // If snapshot creation failed for some reason, the node won't exist. ignore return; } VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e); } if (data == null) { VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null); } try { JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8)); if (jsonObj.getLong("txnId") != txnId) { VoltDB.crashLocalVoltDB("TxnId should match", false, null); } int hostCount = jsonObj.getInt("hostCount"); // +1 because hostCount was initialized to -1 jsonObj.put("hostCount", hostCount + participantCount + 1); zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion()); } catch (KeeperException.BadVersionException e) { continue; } catch (Exception e) { VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e); } success = true; } }
@Override public void run() { boolean logEnabled = m_rvdb.m_catalogContext.cluster.getLogconfig().get("log").getEnabled(); if (logEnabled) { if (m_config.m_isEnterprise) { try { Class<?> loggerClass = MiscUtils.loadProClass("org.voltdb.CommandLogImpl", "Command logging", false); if (loggerClass != null) { m_rvdb.m_commandLog = (CommandLog) loggerClass.newInstance(); } } catch (InstantiationException e) { VoltDB.crashLocalVoltDB("Unable to instantiate command log", true, e); } catch (IllegalAccessException e) { VoltDB.crashLocalVoltDB("Unable to instantiate command log", true, e); } } } }
@Override public synchronized void becomeLeader() { hostLog.info("Host " + m_hostId + " promoted to be the global service provider"); m_isLeader = true; for (Promotable service : m_services) { try { service.acceptPromotion(); } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to promote global service.", true, e); } } }
@Override public void run() { // Let the Export system read its configuration from the catalog. try { ExportManager.initialize( m_rvdb.m_myHostId, m_rvdb.m_catalogContext, m_isRejoin, m_rvdb.m_messenger, m_rvdb.m_partitionsToSitesAtStartupForExportInit); } catch (Throwable t) { VoltDB.crashLocalVoltDB("Error setting up export", true, t); } }
Inits(RealVoltDB rvdb, int threadCount) { m_rvdb = rvdb; m_config = rvdb.m_config; // determine if this is a rejoining node // (used for license check and later the actual rejoin) if (m_config.m_startAction.doesRejoin()) { m_isRejoin = true; } else { m_isRejoin = false; } m_threadCount = threadCount; m_deployment = rvdb.m_catalogContext.getDeployment(); // find all the InitWork subclasses using reflection and load them up Class<?>[] declaredClasses = Inits.class.getDeclaredClasses(); for (Class<?> cls : declaredClasses) { // skip base classes and fake classes if (cls == InitWork.class) continue; if (cls == COMPLETION_WORK.class) continue; if (InitWork.class.isAssignableFrom(cls)) { InitWork instance = null; try { Constructor<?> constructor = cls.getDeclaredConstructor(Inits.class); instance = (InitWork) constructor.newInstance(this); } catch (Exception e) { VoltDB.crashLocalVoltDB("Critical error loading class " + cls.getName(), true, e); } m_jobs.put(instance.getClass(), instance); } } // make blockers and blockees symmetrical for (InitWork iw : m_jobs.values()) { for (Class<? extends InitWork> cls : iw.m_blockers) { InitWork blocker = m_jobs.get(cls); blocker.m_blockees.add(iw.getClass()); } } // collect initially ready jobs List<Class<? extends InitWork>> toRemove = new ArrayList<Class<? extends InitWork>>(); for (Entry<Class<? extends InitWork>, InitWork> e : m_jobs.entrySet()) { if (e.getValue().m_blockers.size() == 0) { toRemove.add(e.getKey()); m_readyJobs.add(e.getValue()); } } }
@Override public void run() { while (true) { InitWork iw = null; try { iw = m_readyJobs.take(); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB(e.getMessage(), true, e); } if (iw instanceof COMPLETION_WORK) return; // hostLog.info("Running InitWorker: " + iw.getClass().getName()); iw.run(); completeInitWork(iw); } }
public ParameterSet getParams() { params.run(); try { return params.get(); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Interrupted while deserializing a parameter set", false, e); } catch (ExecutionException e) { // Don't rethrow Errors as RuntimeExceptions because we will eat their // delicious goodness later if (e.getCause() != null && e.getCause() instanceof Error) { throw (Error) e.getCause(); } throw new RuntimeException(e); } return null; }
/** * Load the full subclass if it should, otherwise load the noop stub. * * @param partitionId partition id * @param overflowDir * @return Instance of PartitionDRGateway */ public static PartitionDRGateway getInstance( int partitionId, NodeDRGateway nodeGateway, boolean isRejoin) { final VoltDBInterface vdb = VoltDB.instance(); LicenseApi api = vdb.getLicenseApi(); final boolean licensedToDR = api.isDrReplicationAllowed(); // if this is a primary cluster in a DR-enabled scenario // try to load the real version of this class PartitionDRGateway pdrg = null; if (licensedToDR && nodeGateway != null) { pdrg = tryToLoadProVersion(); } if (pdrg == null) { pdrg = new PartitionDRGateway(); } // init the instance and return try { pdrg.init(partitionId, nodeGateway, isRejoin); } catch (IOException e) { VoltDB.crashLocalVoltDB(e.getMessage(), false, e); } return pdrg; }
public static HsqlBackend initializeHSQLBackend(long siteId, CatalogContext context) { synchronized (backendLock) { if (m_backend == null) { try { m_backend = new HsqlBackend(siteId); final String binDDL = context.database.getSchema(); final String ddl = Encoder.decodeBase64AndDecompress(binDDL); final String[] commands = ddl.split("\n"); for (String command : commands) { String decoded_cmd = Encoder.hexDecodeToString(command); decoded_cmd = decoded_cmd.trim(); if (decoded_cmd.length() == 0) { continue; } m_backend.runDDL(decoded_cmd); } } catch (final Exception ex) { hostLog.fatal("Unable to construct HSQL backend"); VoltDB.crashLocalVoltDB(ex.getMessage(), true, ex); } } return m_backend; } }
@Override public void run() { if (!m_isRejoin && !m_config.m_isRejoinTest && !m_rvdb.m_joining) { String snapshotPath = null; if (m_rvdb .m_catalogContext .cluster .getDatabases() .get("database") .getSnapshotschedule() .get("default") != null) { snapshotPath = m_rvdb .m_catalogContext .cluster .getDatabases() .get("database") .getSnapshotschedule() .get("default") .getPath(); } int[] allPartitions = new int[m_rvdb.m_configuredNumberOfPartitions]; for (int ii = 0; ii < allPartitions.length; ii++) { allPartitions[ii] = ii; } org.voltdb.catalog.CommandLog cl = m_rvdb.m_catalogContext.cluster.getLogconfig().get("log"); try { m_rvdb.m_restoreAgent = new RestoreAgent( m_rvdb.m_messenger, m_rvdb.getSnapshotCompletionMonitor(), m_rvdb, m_config.m_startAction, cl.getEnabled(), cl.getLogpath(), cl.getInternalsnapshotpath(), snapshotPath, allPartitions, CatalogUtil.getVoltDbRoot(m_deployment.getPaths()).getAbsolutePath()); } catch (IOException e) { VoltDB.crashLocalVoltDB("Unable to construct the RestoreAgent", true, e); } m_rvdb.m_globalServiceElector.registerService(m_rvdb.m_restoreAgent); m_rvdb.m_restoreAgent.setCatalogContext(m_rvdb.m_catalogContext); // Generate plans and get (hostID, catalogPath) pair Pair<Integer, String> catalog = m_rvdb.m_restoreAgent.findRestoreCatalog(); // if the restore agent found a catalog, set the following info // so the right node can send it out to the others if (catalog != null) { // Make sure the catalog corresponds to the current server version. // Prevent automatic upgrades by rejecting mismatched versions. int hostId = catalog.getFirst().intValue(); String catalogPath = catalog.getSecond(); // Perform a version check when the catalog jar is available // on the current host. // Check that this host is the one providing the catalog. if (m_rvdb.m_myHostId == hostId) { try { byte[] catalogBytes = readCatalog(catalogPath); InMemoryJarfile inMemoryJar = CatalogUtil.loadInMemoryJarFile(catalogBytes); // This call pre-checks and returns the build info/version. String[] buildInfo = CatalogUtil.getBuildInfoFromJar(inMemoryJar); String catalogVersion = buildInfo[0]; String serverVersion = m_rvdb.getVersionString(); if (!catalogVersion.equals(serverVersion)) { VoltDB.crashLocalVoltDB( String.format( "Unable to load version %s catalog \"%s\" " + "from snapshot into a version %s server.", catalogVersion, catalogPath, serverVersion), false, null); } } catch (IOException e) { // Make it non-fatal with no check performed. hostLog.warn( String.format( "Unable to load catalog for version check due to exception: %s.", e.getMessage())); } } hostLog.debug("Found catalog to load on host " + hostId + ": " + catalogPath); m_rvdb.m_hostIdWithStartupCatalog = hostId; assert (m_rvdb.m_hostIdWithStartupCatalog >= 0); m_rvdb.m_pathToStartupCatalog = catalogPath; assert (m_rvdb.m_pathToStartupCatalog != null); } } }
/** * The only public method: do all the work to start a snapshot. Assumes that a snapshot is * feasible, that the caller has validated it can be accomplished, that the caller knows this is a * consistent or useful transaction point at which to snapshot. * * @param file_path * @param file_nonce * @param format * @param block * @param txnId * @param data * @param context * @param hostname * @return VoltTable describing the results of the snapshot attempt */ public VoltTable startSnapshotting( final String file_path, final String file_nonce, final SnapshotFormat format, final byte block, final long multiPartTxnId, final long partitionTxnId, final long legacyPerPartitionTxnIds[], final String data, final SystemProcedureExecutionContext context, final String hostname, final HashinatorSnapshotData hashinatorData, final long timestamp) { TRACE_LOG.trace("Creating snapshot target and handing to EEs"); final VoltTable result = SnapshotUtil.constructNodeResultsTable(); final int numLocalSites = context.getCluster().getDeployment().get("deployment").getSitesperhost(); // One site wins the race to create the snapshot targets, populating // m_taskListsForSites for the other sites and creating an appropriate // number of snapshot permits. synchronized (SnapshotSiteProcessor.m_snapshotCreateLock) { SnapshotSiteProcessor.m_snapshotCreateSetupBarrierActualAction.set( new Runnable() { @Override public void run() { Map<Integer, Long> partitionTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds = m_partitionLastSeenTransactionIds; SNAP_LOG.debug("Last seen partition transaction ids " + partitionTransactionIds); m_partitionLastSeenTransactionIds = new HashMap<Integer, Long>(); partitionTransactionIds.put(TxnEgo.getPartitionId(multiPartTxnId), multiPartTxnId); /* * Do a quick sanity check that the provided IDs * don't conflict with currently active partitions. If they do * it isn't fatal we can just skip it. */ for (long txnId : legacyPerPartitionTxnIds) { final int legacyPartition = TxnEgo.getPartitionId(txnId); if (partitionTransactionIds.containsKey(legacyPartition)) { SNAP_LOG.warn( "While saving a snapshot and propagating legacy " + "transaction ids found an id that matches currently active partition" + partitionTransactionIds.get(legacyPartition)); } else { partitionTransactionIds.put(legacyPartition, txnId); } } exportSequenceNumbers = SnapshotSiteProcessor.getExportSequenceNumbers(); createSetupIv2( file_path, file_nonce, format, multiPartTxnId, partitionTransactionIds, data, context, result, exportSequenceNumbers, context.getSiteTrackerForSnapshot(), hashinatorData, timestamp); } }); // Create a barrier to use with the current number of sites to wait for // or if the barrier is already set up check if it is broken and reset if necessary SnapshotSiteProcessor.readySnapshotSetupBarriers(numLocalSites); // From within this EE, record the sequence numbers as of the start of the snapshot (now) // so that the info can be put in the digest. SnapshotSiteProcessor.populateExportSequenceNumbersForExecutionSite(context); SNAP_LOG.debug( "Registering transaction id " + partitionTxnId + " for " + TxnEgo.getPartitionId(partitionTxnId)); m_partitionLastSeenTransactionIds.put(TxnEgo.getPartitionId(partitionTxnId), partitionTxnId); } boolean runPostTasks = false; VoltTable earlyResultTable = null; try { SnapshotSiteProcessor.m_snapshotCreateSetupBarrier.await(); try { synchronized (m_createLock) { SNAP_LOG.debug( "Found tasks for HSIds: " + CoreUtils.hsIdCollectionToString(m_taskListsForHSIds.keySet())); SNAP_LOG.debug("Looking for local HSID: " + CoreUtils.hsIdToString(context.getSiteId())); Deque<SnapshotTableTask> taskList = m_taskListsForHSIds.remove(context.getSiteId()); // If createSetup failed, then the first site to reach here is going // to send the results table generated by createSetup, and then empty out the table. // All other sites to reach here will send the appropriate empty table. // If createSetup was a success but the taskList is null, then we'll use the block // switch to figure out what flavor of empty SnapshotSave result table to return. if (!m_createSuccess.get()) { // There shouldn't be any work for any site if we failed assert (m_taskListsForHSIds.isEmpty()); VoltTable finalresult = m_createResult.get(); if (finalresult != null) { m_createResult.set(null); earlyResultTable = finalresult; } else { // We returned a non-empty NodeResultsTable with the failures in it, // every other site needs to return a NodeResultsTable as well. earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else if (taskList == null) { SNAP_LOG.debug("No task for this site, block " + block); // This node is participating in the snapshot but this site has nothing to do. // Send back an appropriate empty table based on the block flag if (block != 0) { runPostTasks = true; earlyResultTable = SnapshotUtil.constructPartitionResultsTable(); earlyResultTable.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), "SUCCESS", ""); } else { earlyResultTable = SnapshotUtil.constructNodeResultsTable(); } } else { context .getSiteSnapshotConnection() .initiateSnapshots(format, taskList, multiPartTxnId, exportSequenceNumbers); } if (m_deferredSetupFuture != null) { // Add a listener to the deferred setup so that it can kick off the snapshot // task once the setup is done. m_deferredSetupFuture.addListener( new Runnable() { @Override public void run() { DeferredSnapshotSetup deferredSnapshotSetup = null; try { deferredSnapshotSetup = m_deferredSetupFuture.get(); } catch (Exception e) { // it doesn't throw } assert deferredSnapshotSetup != null; context .getSiteSnapshotConnection() .startSnapshotWithTargets( deferredSnapshotSetup.getPlan().getSnapshotDataTargets()); } }, CoreUtils.SAMETHREADEXECUTOR); } } } finally { SnapshotSiteProcessor.m_snapshotCreateFinishBarrier.await(120, TimeUnit.SECONDS); } } catch (TimeoutException e) { VoltDB.crashLocalVoltDB( "Timed out waiting 120 seconds for all threads to arrive and start snapshot", true, null); } catch (InterruptedException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } catch (BrokenBarrierException e) { result.addRow(context.getHostId(), hostname, "", "FAILURE", CoreUtils.throwableToString(e)); earlyResultTable = result; } // If earlyResultTable is set, return here if (earlyResultTable != null) { if (runPostTasks) { // Need to run post-snapshot tasks before finishing SnapshotSiteProcessor.runPostSnapshotTasks(context); } return earlyResultTable; } if (block != 0) { HashSet<Exception> failures = Sets.newHashSet(); String status = "SUCCESS"; String err = ""; try { // For blocking snapshot, propogate the error from deferred setup back to the client final DeferredSnapshotSetup deferredSnapshotSetup = m_deferredSetupFuture.get(); if (deferredSnapshotSetup != null && deferredSnapshotSetup.getError() != null) { status = "FAILURE"; err = deferredSnapshotSetup.getError().toString(); failures.add(deferredSnapshotSetup.getError()); } failures.addAll(context.getSiteSnapshotConnection().completeSnapshotWork()); SnapshotSiteProcessor.runPostSnapshotTasks(context); } catch (Exception e) { status = "FAILURE"; err = e.toString(); failures.add(e); } final VoltTable blockingResult = SnapshotUtil.constructPartitionResultsTable(); if (failures.isEmpty()) { blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } else { status = "FAILURE"; for (Exception e : failures) { err = e.toString(); } blockingResult.addRow( context.getHostId(), hostname, CoreUtils.getSiteIdFromHSId(context.getSiteId()), status, err); } return blockingResult; } return result; }