// check and initialize global configuration private Configuration checkGlobalConfiguration() { String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, Constants.GLOBAL_ID); if (config == null) { // check if it is upgraded from previous version to yoda - configuration may be stored in // znode /config. Since SeedProvider still need access that, so we remove the config // from global in migration callback after migration is done. config = _coordinator.queryConfiguration(configKind, Constants.GLOBAL_ID); if (config != null) { _log.info("Upgrade from pre-yoda release, move global config to new location"); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); return config; } ConfigurationImpl cfg = new ConfigurationImpl(); cfg.setId(Constants.GLOBAL_ID); cfg.setKind(configKind); cfg.setConfig(Constants.SCHEMA_VERSION, this._serviceInfo.getVersion()); // persist configuration _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), cfg); config = cfg; } return config; }
// check and initialize versioned configuration private Configuration checkVersionedConfiguration() { String serviceVersion = _serviceInfo.getVersion(); String dbSchemaVersion = _dbClient.getSchemaVersion(); if (!serviceVersion.equals(dbSchemaVersion)) { _log.warn( "The db service version {} doesn't equals Db schema version {}, " + "set db service version to Db schema version", serviceVersion, dbSchemaVersion); _serviceInfo.setVersion(dbSchemaVersion); } String kind = _coordinator.getVersionedDbConfigPath(_serviceInfo.getName(), _serviceInfo.getVersion()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), kind, _serviceInfo.getId()); if (config == null) { // check if it is upgraded from previous version to yoda - configuration may be stored in // znode /config config = _coordinator.queryConfiguration(kind, _serviceInfo.getId()); if (config != null) { _log.info("Upgrade from pre-2.5 release, move versioned dbconfig to new location"); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); return config; } ConfigurationImpl cfg = new ConfigurationImpl(); cfg.setId(_serviceInfo.getId()); cfg.setKind(kind); // persist configuration _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), cfg); config = cfg; } return config; }
public void setConfigValue(String key, String value) { String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config != null) { config.setConfig(key, value); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } }
private void removeStaleVersionedDbConfiguration() { String configKind = _coordinator.getVersionedDbConfigPath(_serviceInfo.getName(), _serviceInfo.getVersion()); List<Configuration> configs = _coordinator.queryAllConfiguration(_coordinator.getSiteId(), configKind); for (Configuration config : configs) { if (isStaleConfiguration(config)) { _coordinator.removeServiceConfiguration(_coordinator.getSiteId(), config); _log.info("Remove stale version db config, id: {}", config.getId()); } } }
/** * Checks and sets INIT_DONE state this means we are done with the actual cf changes on the * cassandra side for the target version */ private void setDbConfigInitDone() { String configKind = _coordinator.getVersionedDbConfigPath(_serviceInfo.getName(), _serviceInfo.getVersion()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config != null) { if (config.getConfig(DbConfigConstants.INIT_DONE) == null) { config.setConfig(DbConfigConstants.INIT_DONE, Boolean.TRUE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } } else { // we are expecting this to exist, because its initialized from checkVersionedConfiguration throw new IllegalStateException("unexpected error, db versioned configuration is null"); } }
public String getConfigValue(String key) { String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config != null) { return config.getConfig(key); } return null; }
private void removeStaleServiceConfiguration() { boolean isGeoDBSvc = isGeoDbsvc(); boolean resetAutoBootFlag = false; String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); List<Configuration> configs = _coordinator.queryAllConfiguration(_coordinator.getSiteId(), configKind); for (Configuration config : configs) { if (isStaleConfiguration(config)) { boolean autoboot = Boolean.parseBoolean(config.getConfig(DbConfigConstants.AUTOBOOT)); String configId = config.getId(); if (isGeoDBSvc && !autoboot && (configId.equals("geodb-4") || configId.equals("geodb-5"))) { // for geodbsvc, if restore with the backup of 5 nodes to 3 nodes and the backup is made // on the cluster that the 'autoboot=false' is set on vipr4 or vipr5 // we should set the autoboot=false on the current node or no node with autoboot=false // TODO:This is a temporary/safest solution in Yoda, we'll provide a better soltuion post // Yoda resetAutoBootFlag = true; } if (isStaleConfiguration(config)) { _coordinator.removeServiceConfiguration(_coordinator.getSiteId(), config); _log.info("Remove stale db config, id: {}", config.getId()); } } } if (resetAutoBootFlag) { _log.info("set autoboot flag to false on {}", _serviceInfo.getId()); Configuration config = _coordinator.queryConfiguration( _coordinator.getSiteId(), configKind, _serviceInfo.getId()); config.setConfig(DbConfigConstants.AUTOBOOT, Boolean.FALSE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } }
/** * Checks and registers db configuration information, this is one time when cluster is coming up * for the first time */ private Configuration checkConfiguration() { String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config == null) { // check if it is upgraded from previous version to yoda - configuration may be stored in // zk global area /config. Since SeedProvider still need access that, so we remove the config // from global in migration callback after migration is done. config = _coordinator.queryConfiguration(configKind, _serviceInfo.getId()); if (config != null) { _log.info("Upgrade from pre-yoda release, move dbconfig to new location"); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); return config; } // this is a new node // 1. register its configuration with coordinator // 2. assume autobootstrap configuration // this means that when a node is added, it take 1/2 of biggest token rage and // copies its data over ConfigurationImpl cfg = new ConfigurationImpl(); cfg.setId(_serviceInfo.getId()); cfg.setKind(configKind); cfg.setConfig(DbConfigConstants.NODE_ID, _coordinator.getInetAddessLookupMap().getNodeId()); cfg.setConfig(DbConfigConstants.AUTOBOOT, Boolean.TRUE.toString()); // check other existing db nodes List<Configuration> configs = _coordinator.queryAllConfiguration(_coordinator.getSiteId(), configKind); if (configs.isEmpty()) { // we are the first node - turn off autobootstrap cfg.setConfig(DbConfigConstants.AUTOBOOT, Boolean.FALSE.toString()); } // persist configuration _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), cfg); config = cfg; } return config; }
/** Check offline event info to see if dbsvc/geodbsvc on this node could get started */ private void checkDBOfflineInfo() { Configuration config = _coordinator.queryConfiguration( _coordinator.getSiteId(), Constants.DB_DOWNTIME_TRACKER_CONFIG, _serviceInfo.getName()); DbOfflineEventInfo dbOfflineEventInfo = new DbOfflineEventInfo(config); String localNodeId = _coordinator.getInetAddessLookupMap().getNodeId(); Long lastActiveTimestamp = dbOfflineEventInfo.geLastActiveTimestamp(localNodeId); long zkTimeStamp = (lastActiveTimestamp == null) ? TimeUtils.getCurrentTime() : lastActiveTimestamp; File localDbDir = new File(dbDir); Date lastModified = getLastModified(localDbDir); boolean isDirEmpty = lastModified == null || localDbDir.list().length == 0; long localTimeStamp = (isDirEmpty) ? TimeUtils.getCurrentTime() : lastModified.getTime(); _log.info("Service timestamp in ZK is {}, local file is: {}", zkTimeStamp, localTimeStamp); long diffTime = (zkTimeStamp > localTimeStamp) ? (zkTimeStamp - localTimeStamp) : 0; if (diffTime >= MAX_SERVICE_OUTAGE_TIME) { String errMsg = String.format( "We detect database files on local disk are more than %s days older " + "than last time it was seen in the cluster. It may bring stale data into the database, " + "so the service cannot continue to boot. It may be the result of a VM snapshot rollback. " + "Please contact with EMC support engineer for solution.", diffTime / TimeUtils.DAYS); alertLog.error(errMsg); throw new IllegalStateException(errMsg); } Long offlineTime = dbOfflineEventInfo.getOfflineTimeInMS(localNodeId); if (!isDirEmpty && offlineTime != null && offlineTime >= MAX_SERVICE_OUTAGE_TIME) { String errMsg = String.format( "This node is offline for more than %s days. It may bring stale data into " + "database, so the service cannot continue to boot. Please poweroff this node and follow our " + "node recovery procedure to recover this node", offlineTime / TimeUtils.DAYS); alertLog.error(errMsg); throw new IllegalStateException(errMsg); } }
@Override public void start() throws IOException { if (_log.isInfoEnabled()) { _log.info("Starting DB service..."); } // Suppress Sonar violation of Lazy initialization of static fields should be synchronized // start() method will be only called one time when startup dbsvc, so it's safe to ignore sonar // violation instance = this; // NOSONAR ("squid:S2444") if (backCompatPreYoda) { _log.info( "Pre-yoda back compatible flag detected. Initialize local keystore/truststore for Cassandra native encryption"); initKeystoreAndTruststore(); _schemaUtil.setBackCompatPreYoda(true); } System.setProperty("cassandra.config", _config); System.setProperty("cassandra.config.loader", CassandraConfigLoader.class.getName()); // Set to false to clear all gossip state for the node on restart. // // We encounter a weird Cassandra grossip issue(COP-19246) - some nodes are missing from gossip // when rebooting the entire cluster simultaneously. Critical Gossip // fields(ApplicationState.STATUS, ApplicationState.TOKENS) // are not synchronized during handshaking. It looks like some problem caused by incorrect // gossip version/generation // at system local table. So add this option to cleanup local gossip state during reboot // // Make sure add-vdc/add-standby passed when you would remove this option in the future. // // We need make sure majority local nodes are added as seed nodes. Otherwise cassandra may not // see other nodes if it loses // connection to other sites System.setProperty("cassandra.load_ring_state", "false"); // Nodes in new data center should not auto-bootstrap. // See // https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_add_dc_to_cluster_t.html if (_schemaUtil.isStandby()) { System.setProperty("cassandra.auto_bootstrap", "false"); } InterProcessLock lock = null; Configuration config = null; StartupMode mode = null; try { // we use this lock to discourage more than one node bootstrapping / joining at the same time // Cassandra can handle this but it's generally not recommended to make changes to schema // concurrently lock = getLock(getSchemaLockName()); config = checkConfiguration(); checkGlobalConfiguration(); checkVersionedConfiguration(); removeStaleConfiguration(); mode = checkStartupMode(config); _log.info("Current startup mode is {}", mode); // Check if service is allowed to get started by querying db offline info to avoid bringing // back stale data. // Skipping hibernate mode for node recovery procedure to recover the overdue node. int nodeCount = ((CoordinatorClientImpl) _coordinator).getNodeCount(); if (nodeCount != 1 && mode.type != StartupMode.StartupModeType.HIBERNATE_MODE) { checkDBOfflineInfo(); } // this call causes instantiation of a seed provider instance, so the check*Configuration // calls must be preceed it removeCassandraSavedCaches(); mode.onPreStart(); if (_jmxServer != null) { _jmxServer.start(); System.setProperty( "com.sun.management.jmxremote.port", Integer.toString(_jmxServer.getPort())); } _service = new CassandraDaemon(); _service.init(null); _service.start(); cassandraInitialized = true; mode.onPostStart(); } catch (Exception e) { if (mode != null && mode.type == StartupMode.StartupModeType.HIBERNATE_MODE) { printRecoveryWorkAround(e); } _log.error("e=", e); throw new IllegalStateException(e); } finally { if (lock != null) { try { lock.release(); } catch (Exception ignore) { _log.debug("lock release failed"); } } } if (config.getConfig(DbConfigConstants.JOINED) == null) { config.setConfig(DbConfigConstants.JOINED, Boolean.TRUE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } _statusChecker.waitForAllNodesJoined(); _svcBeacon.start(); if (backCompatPreYoda) { _log.info("Enable duplicated beacon in global area during pre-yoda upgrade"); startDupBeacon(); } setDbInitializedFlag(); setDbConfigInitDone(); _dbClient.start(); if (_schemaUtil.isStandby()) { String localDataRevision = getLocalDataRevision(); if (localDataRevision != null) { _schemaUtil.checkDataRevision(localDataRevision); } } // Setup the vdc information, so that login enabled before migration if (!isGeoDbsvc()) { _schemaUtil.checkAndSetupBootStrapInfo(_dbClient); } dbMgr.init(); if (_handler.run()) { // Setup the bootstrap info root tenant, if root tenant migrated from local db, then skip it if (isGeoDbsvc()) { _schemaUtil.checkAndSetupBootStrapInfo(_dbClient); } else { _schemaUtil.checkAndInitStorageSystemTypes(_dbClient); } startBackgroundTasks(); _log.info("DB service started"); } else { _log.error("DB migration failed. Skipping starting background tasks."); } }