public void setConfigValue(String key, String value) { String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config != null) { config.setConfig(key, value); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } }
/** * Checks and sets INIT_DONE state this means we are done with the actual cf changes on the * cassandra side for the target version */ private void setDbConfigInitDone() { String configKind = _coordinator.getVersionedDbConfigPath(_serviceInfo.getName(), _serviceInfo.getVersion()); Configuration config = _coordinator.queryConfiguration(_coordinator.getSiteId(), configKind, _serviceInfo.getId()); if (config != null) { if (config.getConfig(DbConfigConstants.INIT_DONE) == null) { config.setConfig(DbConfigConstants.INIT_DONE, Boolean.TRUE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } } else { // we are expecting this to exist, because its initialized from checkVersionedConfiguration throw new IllegalStateException("unexpected error, db versioned configuration is null"); } }
private void removeStaleServiceConfiguration() { boolean isGeoDBSvc = isGeoDbsvc(); boolean resetAutoBootFlag = false; String configKind = _coordinator.getDbConfigPath(_serviceInfo.getName()); List<Configuration> configs = _coordinator.queryAllConfiguration(_coordinator.getSiteId(), configKind); for (Configuration config : configs) { if (isStaleConfiguration(config)) { boolean autoboot = Boolean.parseBoolean(config.getConfig(DbConfigConstants.AUTOBOOT)); String configId = config.getId(); if (isGeoDBSvc && !autoboot && (configId.equals("geodb-4") || configId.equals("geodb-5"))) { // for geodbsvc, if restore with the backup of 5 nodes to 3 nodes and the backup is made // on the cluster that the 'autoboot=false' is set on vipr4 or vipr5 // we should set the autoboot=false on the current node or no node with autoboot=false // TODO:This is a temporary/safest solution in Yoda, we'll provide a better soltuion post // Yoda resetAutoBootFlag = true; } if (isStaleConfiguration(config)) { _coordinator.removeServiceConfiguration(_coordinator.getSiteId(), config); _log.info("Remove stale db config, id: {}", config.getId()); } } } if (resetAutoBootFlag) { _log.info("set autoboot flag to false on {}", _serviceInfo.getId()); Configuration config = _coordinator.queryConfiguration( _coordinator.getSiteId(), configKind, _serviceInfo.getId()); config.setConfig(DbConfigConstants.AUTOBOOT, Boolean.FALSE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } }
@Override public void start() throws IOException { if (_log.isInfoEnabled()) { _log.info("Starting DB service..."); } // Suppress Sonar violation of Lazy initialization of static fields should be synchronized // start() method will be only called one time when startup dbsvc, so it's safe to ignore sonar // violation instance = this; // NOSONAR ("squid:S2444") if (backCompatPreYoda) { _log.info( "Pre-yoda back compatible flag detected. Initialize local keystore/truststore for Cassandra native encryption"); initKeystoreAndTruststore(); _schemaUtil.setBackCompatPreYoda(true); } System.setProperty("cassandra.config", _config); System.setProperty("cassandra.config.loader", CassandraConfigLoader.class.getName()); // Set to false to clear all gossip state for the node on restart. // // We encounter a weird Cassandra grossip issue(COP-19246) - some nodes are missing from gossip // when rebooting the entire cluster simultaneously. Critical Gossip // fields(ApplicationState.STATUS, ApplicationState.TOKENS) // are not synchronized during handshaking. It looks like some problem caused by incorrect // gossip version/generation // at system local table. So add this option to cleanup local gossip state during reboot // // Make sure add-vdc/add-standby passed when you would remove this option in the future. // // We need make sure majority local nodes are added as seed nodes. Otherwise cassandra may not // see other nodes if it loses // connection to other sites System.setProperty("cassandra.load_ring_state", "false"); // Nodes in new data center should not auto-bootstrap. // See // https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_add_dc_to_cluster_t.html if (_schemaUtil.isStandby()) { System.setProperty("cassandra.auto_bootstrap", "false"); } InterProcessLock lock = null; Configuration config = null; StartupMode mode = null; try { // we use this lock to discourage more than one node bootstrapping / joining at the same time // Cassandra can handle this but it's generally not recommended to make changes to schema // concurrently lock = getLock(getSchemaLockName()); config = checkConfiguration(); checkGlobalConfiguration(); checkVersionedConfiguration(); removeStaleConfiguration(); mode = checkStartupMode(config); _log.info("Current startup mode is {}", mode); // Check if service is allowed to get started by querying db offline info to avoid bringing // back stale data. // Skipping hibernate mode for node recovery procedure to recover the overdue node. int nodeCount = ((CoordinatorClientImpl) _coordinator).getNodeCount(); if (nodeCount != 1 && mode.type != StartupMode.StartupModeType.HIBERNATE_MODE) { checkDBOfflineInfo(); } // this call causes instantiation of a seed provider instance, so the check*Configuration // calls must be preceed it removeCassandraSavedCaches(); mode.onPreStart(); if (_jmxServer != null) { _jmxServer.start(); System.setProperty( "com.sun.management.jmxremote.port", Integer.toString(_jmxServer.getPort())); } _service = new CassandraDaemon(); _service.init(null); _service.start(); cassandraInitialized = true; mode.onPostStart(); } catch (Exception e) { if (mode != null && mode.type == StartupMode.StartupModeType.HIBERNATE_MODE) { printRecoveryWorkAround(e); } _log.error("e=", e); throw new IllegalStateException(e); } finally { if (lock != null) { try { lock.release(); } catch (Exception ignore) { _log.debug("lock release failed"); } } } if (config.getConfig(DbConfigConstants.JOINED) == null) { config.setConfig(DbConfigConstants.JOINED, Boolean.TRUE.toString()); _coordinator.persistServiceConfiguration(_coordinator.getSiteId(), config); } _statusChecker.waitForAllNodesJoined(); _svcBeacon.start(); if (backCompatPreYoda) { _log.info("Enable duplicated beacon in global area during pre-yoda upgrade"); startDupBeacon(); } setDbInitializedFlag(); setDbConfigInitDone(); _dbClient.start(); if (_schemaUtil.isStandby()) { String localDataRevision = getLocalDataRevision(); if (localDataRevision != null) { _schemaUtil.checkDataRevision(localDataRevision); } } // Setup the vdc information, so that login enabled before migration if (!isGeoDbsvc()) { _schemaUtil.checkAndSetupBootStrapInfo(_dbClient); } dbMgr.init(); if (_handler.run()) { // Setup the bootstrap info root tenant, if root tenant migrated from local db, then skip it if (isGeoDbsvc()) { _schemaUtil.checkAndSetupBootStrapInfo(_dbClient); } else { _schemaUtil.checkAndInitStorageSystemTypes(_dbClient); } startBackgroundTasks(); _log.info("DB service started"); } else { _log.error("DB migration failed. Skipping starting background tasks."); } }
/** * We select seeds based on the following rules - For DR standby sites, use all nodes in active * site as seeds For DR active site, use local nodes as seeds. The rule to select local seed is - * first boot node(AUTOBOOT = false) uses itself as seed nodes so that it could boot and * initialize schema - subsquent node(AUTOBOOT = true) uses other successfully booted(JOINED = * true) nodes as seeds */ @Override public List<InetAddress> getSeeds() { try { CoordinatorClientInetAddressMap nodeMap = _client.getInetAddessLookupMap(); List<Configuration> configs = _client.queryAllConfiguration(_client.getSiteId(), Constants.DB_CONFIG); List<InetAddress> seeds = new ArrayList<>(); // If we are upgrading from pre-2.5 releases, dbconfig exists in zk global area List<Configuration> leftoverConfig = _client.queryAllConfiguration(Constants.DB_CONFIG); configs.addAll(leftoverConfig); // Add extra seeds - seeds from remote sites for (String seed : extraSeeds) { if (StringUtils.isNotEmpty(seed)) { seeds.add(InetAddress.getByName(seed)); } } // On DR standby site, only use seeds from active site. On active site // we use local seeds if (isDrActiveSite) { for (int i = 0; i < configs.size(); i++) { Configuration config = configs.get(i); // Bypasses item of "global" and folders of "version", just check db configurations. if (config.getId() == null || config.getId().equals(Constants.GLOBAL_ID)) { continue; } String nodeIndex = config.getId().split("-")[1]; String nodeId = config.getConfig(DbConfigConstants.NODE_ID); if (nodeId == null) { // suppose that they are existing znodes from a previous version // set the NODE_ID config, id is like db-x nodeId = "vipr" + nodeIndex; config.setConfig(DbConfigConstants.NODE_ID, nodeId); config.removeConfig(DbConfigConstants.DB_IP); _client.persistServiceConfiguration(_client.getSiteId(), config); } if (!Boolean.parseBoolean(config.getConfig(DbConfigConstants.AUTOBOOT)) || (!config.getId().equals(_id) && Boolean.parseBoolean(config.getConfig(DbConfigConstants.JOINED)))) { // all non autobootstrap nodes + other nodes are used as seeds InetAddress ip = null; if (nodeMap != null) { String ipAddress = nodeMap.getConnectableInternalAddress(nodeId); _logger.debug("ip[" + i + "]: " + ipAddress); ip = InetAddress.getByName(ipAddress); } else { ip = InetAddress.getByName(nodeId); } seeds.add(ip); _logger.info("Seed {}", ip); } } } _logger.info("Seeds list {}", StringUtils.join(seeds.toArray(), ",")); return seeds; } catch (Exception e) { throw new IllegalStateException(e); } }