@Test public void failedInstanceShouldReceiveCorrectCoordinatorIdUponRejoiningCluster() throws Throwable { // Given HighlyAvailableGraphDatabase initialMaster = cluster.getMaster(); // When cluster.shutdown(initialMaster); cluster.await(masterAvailable(initialMaster)); cluster.await(masterSeesSlavesAsAvailable(1)); // create node on new master to ensure that it has the greatest tx id createNodeOn(cluster.getMaster()); cluster.sync(); ClusterClient clusterClient = cleanup.add(newClusterClient(new InstanceId(1))); final AtomicReference<InstanceId> coordinatorIdWhenReJoined = new AtomicReference<>(); final CountDownLatch latch = new CountDownLatch(1); clusterClient.addClusterListener( new ClusterListener.Adapter() { @Override public void enteredCluster(ClusterConfiguration clusterConfiguration) { coordinatorIdWhenReJoined.set(clusterConfiguration.getElected(COORDINATOR)); latch.countDown(); } }); clusterClient.init(); clusterClient.start(); // Then latch.await(2, SECONDS); assertEquals(new InstanceId(2), coordinatorIdWhenReJoined.get()); }
/** * WARNING: beware of hacks. * * <p>Fails a member of this cluster by making it not respond to heart beats. A {@link * RepairKit} is returned which is able to repair the instance (i.e start the network) again. * * @param db the {@link HighlyAvailableGraphDatabase} to fail. * @return a {@link RepairKit} which can repair the failure. * @throws IllegalArgumentException if the given db isn't a member of this cluster. */ public RepairKit fail(HighlyAvailableGraphDatabase db) throws Throwable { assertMember(db); ClusterClient clusterClient = db.getDependencyResolver().resolveDependency(ClusterClient.class); LifeSupport clusterClientLife = (LifeSupport) accessible(clusterClient.getClass().getDeclaredField("life")).get(clusterClient); NetworkReceiver receiver = instance(NetworkReceiver.class, clusterClientLife.getLifecycleInstances()); receiver.stop(); ExecutorLifecycleAdapter statemachineExecutor = instance(ExecutorLifecycleAdapter.class, clusterClientLife.getLifecycleInstances()); statemachineExecutor.stop(); NetworkSender sender = instance(NetworkSender.class, clusterClientLife.getLifecycleInstances()); sender.stop(); List<Lifecycle> stoppedServices = new ArrayList<>(); stoppedServices.add(sender); stoppedServices.add(statemachineExecutor); stoppedServices.add(receiver); return new StartNetworkAgainKit(db, stoppedServices); }
public static String stateToString(ManagedCluster cluster) { StringBuilder buf = new StringBuilder("\n"); for (HighlyAvailableGraphDatabase database : cluster.getAllMembers()) { ClusterClient client = database.getDependencyResolver().resolveDependency(ClusterClient.class); buf.append("Instance ") .append(client.getServerId()) .append(":State ") .append(database.getInstanceState()) .append(" (") .append(client.getClusterServer()) .append("):") .append("\n"); ClusterMembers members = database.getDependencyResolver().resolveDependency(ClusterMembers.class); for (ClusterMember clusterMember : members.getMembers()) { buf.append(" ") .append(clusterMember.getInstanceId()) .append(":") .append(clusterMember.getHARole()) .append(" (is alive = ") .append(clusterMember.isAlive()) .append(")") .append("\n"); } } return buf.toString(); }
/** * WARNING: beware of hacks. * * <p>Fails a member of this cluster by making it not respond to heart beats. A {@link * RepairKit} is returned which is able to repair the instance (i.e start the network) again. * * @param db the {@link HighlyAvailableGraphDatabase} to fail. * @return a {@link RepairKit} which can repair the failure. * @throws IllegalArgumentException if the given db isn't a member of this cluster. */ public RepairKit fail(HighlyAvailableGraphDatabase db) throws Throwable { assertMember(db); ClusterClient clusterClient = db.getDependencyResolver().resolveDependency(ClusterClient.class); LifeSupport clusterClientLife = (LifeSupport) accessible(clusterClient.getClass().getDeclaredField("life")).get(clusterClient); NetworkInstance network = instance(NetworkInstance.class, clusterClientLife.getLifecycleInstances()); network.stop(); int serverId = db.getDependencyResolver().resolveDependency(Config.class).get(HaSettings.server_id); db.shutdown(); return new StartDatabaseAgainKit(this, serverId); }
private void startMember(int serverId) throws URISyntaxException { Clusters.Member member = spec.getMembers().get(serverId - 1); StringBuilder initialHosts = new StringBuilder(spec.getMembers().get(0).getHost()); for (int i = 1; i < spec.getMembers().size(); i++) initialHosts.append(",").append(spec.getMembers().get(i).getHost()); if (member.isFullHaMember()) { int haPort = new URI("cluster://" + member.getHost()).getPort() + 3000; GraphDatabaseBuilder graphDatabaseBuilder = new HighlyAvailableGraphDatabaseFactory() .newHighlyAvailableDatabaseBuilder( new File(new File(root, name), "server" + serverId).getAbsolutePath()) .setConfig(ClusterSettings.cluster_name, name) .setConfig(ClusterSettings.initial_hosts, initialHosts.toString()) .setConfig(HaSettings.server_id, serverId + "") .setConfig(ClusterSettings.cluster_server, member.getHost()) .setConfig(HaSettings.ha_server, ":" + haPort) .setConfig(commonConfig); if (instanceConfig.containsKey(serverId)) { graphDatabaseBuilder.setConfig(instanceConfig.get(serverId)); } config(graphDatabaseBuilder, name, serverId); logger.info("Starting cluster node " + serverId + " in cluster " + name); final GraphDatabaseService graphDatabase = graphDatabaseBuilder.newGraphDatabase(); members.put(serverId, (HighlyAvailableGraphDatabase) graphDatabase); life.add( new LifecycleAdapter() { @Override public void stop() throws Throwable { graphDatabase.shutdown(); } }); } else { Map<String, String> config = MapUtil.stringMap( ClusterSettings.cluster_name.name(), name, ClusterSettings.initial_hosts.name(), initialHosts.toString(), ClusterSettings.cluster_server.name(), member.getHost()); Logging clientLogging = new Logging() { @Override public StringLogger getLogger(Class loggingClass) { return new Slf4jStringLogger(logger); } }; life.add( new ClusterClient( ClusterClient.adapt(new Config(config)), clientLogging, new CoordinatorIncapableCredentialsProvider())); } // logger.info( "Started cluster node " + serverId + " in cluster " // + name ); }
private ClusterClient newClusterClient(InstanceId id) { Map<String, String> configMap = MapUtil.stringMap( ClusterSettings.initial_hosts.name(), cluster.getInitialHostsConfigString(), ClusterSettings.server_id.name(), String.valueOf(id.toIntegerIndex()), ClusterSettings.cluster_server.name(), "0.0.0.0:8888"); Config config = new Config( configMap, InternalAbstractGraphDatabase.Configuration.class, GraphDatabaseSettings.class); return new ClusterClient( new Monitors(), ClusterClient.adapt(config), new DevNullLoggingService(), new NotElectableElectionCredentialsProvider(), new ObjectStreamFactory(), new ObjectStreamFactory()); }
@Override protected RemoteTxHook createTxHook() { clusterEventsDelegateInvocationHandler = new DelegateInvocationHandler(); memberContextDelegateInvocationHandler = new DelegateInvocationHandler(); clusterMemberAvailabilityDelegateInvocationHandler = new DelegateInvocationHandler(); clusterEvents = (ClusterMemberEvents) Proxy.newProxyInstance( ClusterMemberEvents.class.getClassLoader(), new Class[] {ClusterMemberEvents.class, Lifecycle.class}, clusterEventsDelegateInvocationHandler); memberContext = (HighAvailabilityMemberContext) Proxy.newProxyInstance( HighAvailabilityMemberContext.class.getClassLoader(), new Class[] {HighAvailabilityMemberContext.class}, memberContextDelegateInvocationHandler); clusterMemberAvailability = (ClusterMemberAvailability) Proxy.newProxyInstance( ClusterMemberAvailability.class.getClassLoader(), new Class[] {ClusterMemberAvailability.class}, clusterMemberAvailabilityDelegateInvocationHandler); ElectionCredentialsProvider electionCredentialsProvider = config.get(HaSettings.slave_only) ? new NotElectableElectionCredentialsProvider() : new DefaultElectionCredentialsProvider( config.get(ClusterSettings.server_id), new OnDiskLastTxIdGetter(new File(getStoreDir())), new HighAvailabilityMemberInfoProvider() { @Override public HighAvailabilityMemberState getHighAvailabilityMemberState() { return memberStateMachine.getCurrentState(); } }); ObjectStreamFactory objectStreamFactory = new ObjectStreamFactory(); clusterClient = new ClusterClient( ClusterClient.adapt(config), logging, electionCredentialsProvider, objectStreamFactory, objectStreamFactory); PaxosClusterMemberEvents localClusterEvents = new PaxosClusterMemberEvents( clusterClient, clusterClient, clusterClient, clusterClient, logging, new Predicate<PaxosClusterMemberEvents.ClusterMembersSnapshot>() { @Override public boolean accept(PaxosClusterMemberEvents.ClusterMembersSnapshot item) { for (MemberIsAvailable member : item.getCurrentAvailableMembers()) { if (member.getRoleUri().getScheme().equals("ha")) { if (HighAvailabilityModeSwitcher.getServerId(member.getRoleUri()) == config.get(ClusterSettings.server_id)) { msgLog.error( String.format( "Instance %s has the same serverId as ours (%d) - will not " + "join this cluster", member.getRoleUri(), config.get(ClusterSettings.server_id))); return true; } } } return true; } }, new HANewSnapshotFunction(), objectStreamFactory, objectStreamFactory); // Force a reelection after we enter the cluster // and when that election is finished refresh the snapshot clusterClient.addClusterListener( new ClusterListener.Adapter() { boolean hasRequestedElection = true; // This ensures that the election result is (at least) from our // request or thereafter @Override public void enteredCluster(ClusterConfiguration clusterConfiguration) { clusterClient.performRoleElections(); } @Override public void elected(String role, InstanceId instanceId, URI electedMember) { if (hasRequestedElection && role.equals(ClusterConfiguration.COORDINATOR)) { clusterClient.removeClusterListener(this); } } }); HighAvailabilityMemberContext localMemberContext = new SimpleHighAvailabilityMemberContext(clusterClient.getServerId()); PaxosClusterMemberAvailability localClusterMemberAvailability = new PaxosClusterMemberAvailability( clusterClient.getServerId(), clusterClient, clusterClient, logging, objectStreamFactory, objectStreamFactory); memberContextDelegateInvocationHandler.setDelegate(localMemberContext); clusterEventsDelegateInvocationHandler.setDelegate(localClusterEvents); clusterMemberAvailabilityDelegateInvocationHandler.setDelegate(localClusterMemberAvailability); members = new ClusterMembers( clusterClient, clusterClient, clusterEvents, new InstanceId(config.get(ClusterSettings.server_id))); memberStateMachine = new HighAvailabilityMemberStateMachine( memberContext, availabilityGuard, members, clusterEvents, clusterClient, logging.getMessagesLog(HighAvailabilityMemberStateMachine.class)); HighAvailabilityConsoleLogger highAvailabilityConsoleLogger = new HighAvailabilityConsoleLogger( logging.getConsoleLog(HighAvailabilityConsoleLogger.class), new InstanceId(config.get(ClusterSettings.server_id))); availabilityGuard.addListener(highAvailabilityConsoleLogger); clusterEvents.addClusterMemberListener(highAvailabilityConsoleLogger); clusterClient.addClusterListener(highAvailabilityConsoleLogger); paxosLife.add(clusterClient); paxosLife.add(memberStateMachine); paxosLife.add(clusterEvents); paxosLife.add(localClusterMemberAvailability); DelegateInvocationHandler<RemoteTxHook> txHookDelegate = new DelegateInvocationHandler<>(); RemoteTxHook txHook = (RemoteTxHook) Proxy.newProxyInstance( RemoteTxHook.class.getClassLoader(), new Class[] {RemoteTxHook.class}, txHookDelegate); new TxHookModeSwitcher( memberStateMachine, txHookDelegate, master, new TxHookModeSwitcher.RequestContextFactoryResolver() { @Override public RequestContextFactory get() { return requestContextFactory; } }, logging.getMessagesLog(TxHookModeSwitcher.class), dependencyResolver); return txHook; }
private void startMember(InstanceId serverId) throws URISyntaxException, IOException { Clusters.Member member = spec.getMembers().get(serverId.toIntegerIndex() - 1); StringBuilder initialHosts = new StringBuilder(spec.getMembers().get(0).getHost()); for (int i = 1; i < spec.getMembers().size(); i++) { initialHosts.append(",").append(spec.getMembers().get(i).getHost()); } File parent = new File(root, name); URI clusterUri = new URI("cluster://" + member.getHost()); if (member.isFullHaMember()) { int clusterPort = clusterUri.getPort(); int haPort = clusterUri.getPort() + 3000; File storeDir = new File(parent, "server" + serverId); if (storeDirInitializer != null) { storeDirInitializer.initializeStoreDir(serverId.toIntegerIndex(), storeDir); } GraphDatabaseBuilder builder = dbFactory.newHighlyAvailableDatabaseBuilder(storeDir.getAbsolutePath()); builder.setConfig(ClusterSettings.cluster_name, name); builder.setConfig(ClusterSettings.initial_hosts, initialHosts.toString()); builder.setConfig(ClusterSettings.server_id, serverId + ""); builder.setConfig(ClusterSettings.cluster_server, "0.0.0.0:" + clusterPort); builder.setConfig(HaSettings.ha_server, ":" + haPort); builder.setConfig(OnlineBackupSettings.online_backup_enabled, Settings.FALSE); builder.setConfig(commonConfig); if (instanceConfig.containsKey(serverId.toIntegerIndex())) { builder.setConfig(instanceConfig.get(serverId.toIntegerIndex())); } config(builder, name, serverId); final HighlyAvailableGraphDatabaseProxy graphDatabase = new HighlyAvailableGraphDatabaseProxy(builder); members.put(serverId, graphDatabase); life.add( new LifecycleAdapter() { @Override public void stop() throws Throwable { graphDatabase.get().shutdown(); } }); } else { Map<String, String> config = MapUtil.stringMap( ClusterSettings.cluster_name.name(), name, ClusterSettings.initial_hosts.name(), initialHosts.toString(), ClusterSettings.server_id.name(), serverId + "", ClusterSettings.cluster_server.name(), "0.0.0.0:" + clusterUri.getPort(), GraphDatabaseSettings.store_dir.name(), new File(parent, "arbiter" + serverId).getAbsolutePath()); Config config1 = new Config( config, InternalAbstractGraphDatabase.Configuration.class, GraphDatabaseSettings.class); ObjectStreamFactory objectStreamFactory = new ObjectStreamFactory(); ClusterClient clusterClient = new ClusterClient( new Monitors(), ClusterClient.adapt(config1), NullLogService.getInstance(), new NotElectableElectionCredentialsProvider(), objectStreamFactory, objectStreamFactory); arbiters.add( new ClusterMembers( clusterClient, clusterClient, new ClusterMemberEvents() { @Override public void addClusterMemberListener(ClusterMemberListener listener) { // noop } @Override public void removeClusterMemberListener(ClusterMemberListener listener) { // noop } }, clusterClient.getServerId())); life.add(new FutureLifecycleAdapter<>(clusterClient)); } }
@Override protected TxHook createTxHook() { clusterEventsDelegateInvocationHandler = new DelegateInvocationHandler(); memberContextDelegateInvocationHandler = new DelegateInvocationHandler(); clusterMemberAvailabilityDelegateInvocationHandler = new DelegateInvocationHandler(); clusterEvents = (ClusterMemberEvents) Proxy.newProxyInstance( ClusterMemberEvents.class.getClassLoader(), new Class[] {ClusterMemberEvents.class, Lifecycle.class}, clusterEventsDelegateInvocationHandler); memberContext = (HighAvailabilityMemberContext) Proxy.newProxyInstance( HighAvailabilityMemberContext.class.getClassLoader(), new Class[] {HighAvailabilityMemberContext.class}, memberContextDelegateInvocationHandler); clusterMemberAvailability = (ClusterMemberAvailability) Proxy.newProxyInstance( ClusterMemberAvailability.class.getClassLoader(), new Class[] {ClusterMemberAvailability.class}, clusterMemberAvailabilityDelegateInvocationHandler); /* * We need to create these anyway since even in compatibility mode we'll use them for switchover. If it turns * out we are not going to need zookeeper, just assign them to the class fields. The difference is in when * they start(). */ ElectionCredentialsProvider electionCredentialsProvider = config.get(HaSettings.slave_only) ? new NotElectableElectionCredentialsProvider() : new DefaultElectionCredentialsProvider( config.get(ClusterSettings.server_id), new OnDiskLastTxIdGetter(new File(getStoreDir())), new HighAvailabilityMemberInfoProvider() { @Override public HighAvailabilityMemberState getHighAvailabilityMemberState() { return memberStateMachine.getCurrentState(); } }); clusterClient = new ClusterClient(ClusterClient.adapt(config), logging, electionCredentialsProvider); PaxosClusterMemberEvents localClusterEvents = new PaxosClusterMemberEvents( clusterClient, clusterClient, clusterClient, clusterClient, logging, new Predicate<PaxosClusterMemberEvents.ClusterMembersSnapshot>() { @Override public boolean accept(PaxosClusterMemberEvents.ClusterMembersSnapshot item) { for (MemberIsAvailable member : item.getCurrentAvailableMembers()) { if (member.getRoleUri().getScheme().equals("ha")) { if (HighAvailabilityModeSwitcher.getServerId(member.getRoleUri()) == config.get(ClusterSettings.server_id)) { msgLog.error( String.format( "Instance %s has the same serverId as ours (%d) - will not join this cluster", member.getRoleUri(), config.get(ClusterSettings.server_id))); return true; } } } return true; } }); // Force a reelection after we enter the cluster // and when that election is finished refresh the snapshot clusterClient.addClusterListener( new ClusterListener.Adapter() { @Override public void enteredCluster(ClusterConfiguration clusterConfiguration) { clusterClient.performRoleElections(); } @Override public void elected(String role, InstanceId instanceId, URI electedMember) { if (role.equals(ClusterConfiguration.COORDINATOR)) { clusterClient.refreshSnapshot(); clusterClient.removeClusterListener(this); } } }); HighAvailabilityMemberContext localMemberContext = new SimpleHighAvailabilityMemberContext(clusterClient.getServerId()); PaxosClusterMemberAvailability localClusterMemberAvailability = new PaxosClusterMemberAvailability( clusterClient.getServerId(), clusterClient, clusterClient, logging); // Here we decide whether to start in compatibility mode or mode or not if (!config.get(HaSettings.coordinators).isEmpty() && !config.get(HaSettings.coordinators).get(0).toString().trim().equals("")) { compatibilityMode = true; compatibilityLifecycle = new LinkedList<Lifecycle>(); Switchover switchover = new ZooToPaxosSwitchover( life, paxosLife, compatibilityLifecycle, clusterEventsDelegateInvocationHandler, memberContextDelegateInvocationHandler, clusterMemberAvailabilityDelegateInvocationHandler, localClusterEvents, localMemberContext, localClusterMemberAvailability); ZooKeeperHighAvailabilityEvents zkEvents = new ZooKeeperHighAvailabilityEvents(logging, config, switchover); compatibilityLifecycle.add(zkEvents); memberContextDelegateInvocationHandler.setDelegate( new SimpleHighAvailabilityMemberContext(zkEvents.getInstanceId())); clusterEventsDelegateInvocationHandler.setDelegate(zkEvents); clusterMemberAvailabilityDelegateInvocationHandler.setDelegate(zkEvents); // Paxos Events added to life, won't be stopped because it isn't started yet paxosLife.add(localClusterEvents); } else { memberContextDelegateInvocationHandler.setDelegate(localMemberContext); clusterEventsDelegateInvocationHandler.setDelegate(localClusterEvents); clusterMemberAvailabilityDelegateInvocationHandler.setDelegate( localClusterMemberAvailability); } members = new ClusterMembers( clusterClient, clusterClient, clusterEvents, new InstanceId(config.get(ClusterSettings.server_id))); memberStateMachine = new HighAvailabilityMemberStateMachine( memberContext, accessGuard, members, clusterEvents, clusterClient, logging.getLogger(HighAvailabilityMemberStateMachine.class)); if (compatibilityMode) { /* * In here goes stuff that needs to stop when switching. If added in paxosLife too they will be restarted. * Adding to life starts them when life.start is called - adding them to compatibilityLifeCycle shuts them * down on switchover */ compatibilityLifecycle.add(memberStateMachine); // compatibilityLifecycle.add( highAvailabilityModeSwitcher ); compatibilityLifecycle.add((Lifecycle) clusterEvents); life.add(memberStateMachine); // life.add( highAvailabilityModeSwitcher ); life.add(clusterEvents); } /* * Here goes stuff that needs to start when paxos kicks in: * In Normal (non compatibility mode): That means they start normally * In Compatibility Mode: That means they start when switchover happens. If added to life too they will be * restarted */ paxosLife.add(memberStateMachine); paxosLife.add(clusterEvents); paxosLife.add(clusterClient); paxosLife.add(localClusterMemberAvailability); DelegateInvocationHandler<TxHook> txHookDelegate = new DelegateInvocationHandler<TxHook>(); TxHook txHook = (TxHook) Proxy.newProxyInstance( TxHook.class.getClassLoader(), new Class[] {TxHook.class}, txHookDelegate); new TxHookModeSwitcher( memberStateMachine, txHookDelegate, master, new TxHookModeSwitcher.RequestContextFactoryResolver() { @Override public RequestContextFactory get() { return requestContextFactory; } }, dependencyResolver); return txHook; }