Пример #1
0
  @Test
  public void failedInstanceShouldReceiveCorrectCoordinatorIdUponRejoiningCluster()
      throws Throwable {
    // Given
    HighlyAvailableGraphDatabase initialMaster = cluster.getMaster();

    // When
    cluster.shutdown(initialMaster);
    cluster.await(masterAvailable(initialMaster));
    cluster.await(masterSeesSlavesAsAvailable(1));

    // create node on new master to ensure that it has the greatest tx id
    createNodeOn(cluster.getMaster());
    cluster.sync();

    ClusterClient clusterClient = cleanup.add(newClusterClient(new InstanceId(1)));

    final AtomicReference<InstanceId> coordinatorIdWhenReJoined = new AtomicReference<>();
    final CountDownLatch latch = new CountDownLatch(1);
    clusterClient.addClusterListener(
        new ClusterListener.Adapter() {
          @Override
          public void enteredCluster(ClusterConfiguration clusterConfiguration) {
            coordinatorIdWhenReJoined.set(clusterConfiguration.getElected(COORDINATOR));
            latch.countDown();
          }
        });

    clusterClient.init();
    clusterClient.start();

    // Then
    latch.await(2, SECONDS);
    assertEquals(new InstanceId(2), coordinatorIdWhenReJoined.get());
  }
Пример #2
0
    /**
     * WARNING: beware of hacks.
     *
     * <p>Fails a member of this cluster by making it not respond to heart beats. A {@link
     * RepairKit} is returned which is able to repair the instance (i.e start the network) again.
     *
     * @param db the {@link HighlyAvailableGraphDatabase} to fail.
     * @return a {@link RepairKit} which can repair the failure.
     * @throws IllegalArgumentException if the given db isn't a member of this cluster.
     */
    public RepairKit fail(HighlyAvailableGraphDatabase db) throws Throwable {
      assertMember(db);
      ClusterClient clusterClient =
          db.getDependencyResolver().resolveDependency(ClusterClient.class);
      LifeSupport clusterClientLife =
          (LifeSupport)
              accessible(clusterClient.getClass().getDeclaredField("life")).get(clusterClient);

      NetworkReceiver receiver =
          instance(NetworkReceiver.class, clusterClientLife.getLifecycleInstances());
      receiver.stop();

      ExecutorLifecycleAdapter statemachineExecutor =
          instance(ExecutorLifecycleAdapter.class, clusterClientLife.getLifecycleInstances());
      statemachineExecutor.stop();

      NetworkSender sender =
          instance(NetworkSender.class, clusterClientLife.getLifecycleInstances());
      sender.stop();

      List<Lifecycle> stoppedServices = new ArrayList<>();
      stoppedServices.add(sender);
      stoppedServices.add(statemachineExecutor);
      stoppedServices.add(receiver);

      return new StartNetworkAgainKit(db, stoppedServices);
    }
Пример #3
0
  public static String stateToString(ManagedCluster cluster) {
    StringBuilder buf = new StringBuilder("\n");
    for (HighlyAvailableGraphDatabase database : cluster.getAllMembers()) {
      ClusterClient client =
          database.getDependencyResolver().resolveDependency(ClusterClient.class);
      buf.append("Instance ")
          .append(client.getServerId())
          .append(":State ")
          .append(database.getInstanceState())
          .append(" (")
          .append(client.getClusterServer())
          .append("):")
          .append("\n");

      ClusterMembers members =
          database.getDependencyResolver().resolveDependency(ClusterMembers.class);

      for (ClusterMember clusterMember : members.getMembers()) {
        buf.append("  ")
            .append(clusterMember.getInstanceId())
            .append(":")
            .append(clusterMember.getHARole())
            .append(" (is alive = ")
            .append(clusterMember.isAlive())
            .append(")")
            .append("\n");
      }
    }

    return buf.toString();
  }
Пример #4
0
    /**
     * WARNING: beware of hacks.
     *
     * <p>Fails a member of this cluster by making it not respond to heart beats. A {@link
     * RepairKit} is returned which is able to repair the instance (i.e start the network) again.
     *
     * @param db the {@link HighlyAvailableGraphDatabase} to fail.
     * @return a {@link RepairKit} which can repair the failure.
     * @throws IllegalArgumentException if the given db isn't a member of this cluster.
     */
    public RepairKit fail(HighlyAvailableGraphDatabase db) throws Throwable {
      assertMember(db);
      ClusterClient clusterClient =
          db.getDependencyResolver().resolveDependency(ClusterClient.class);
      LifeSupport clusterClientLife =
          (LifeSupport)
              accessible(clusterClient.getClass().getDeclaredField("life")).get(clusterClient);
      NetworkInstance network =
          instance(NetworkInstance.class, clusterClientLife.getLifecycleInstances());
      network.stop();

      int serverId =
          db.getDependencyResolver().resolveDependency(Config.class).get(HaSettings.server_id);
      db.shutdown();
      return new StartDatabaseAgainKit(this, serverId);
    }
Пример #5
0
    private void startMember(int serverId) throws URISyntaxException {
      Clusters.Member member = spec.getMembers().get(serverId - 1);
      StringBuilder initialHosts = new StringBuilder(spec.getMembers().get(0).getHost());
      for (int i = 1; i < spec.getMembers().size(); i++)
        initialHosts.append(",").append(spec.getMembers().get(i).getHost());
      if (member.isFullHaMember()) {
        int haPort = new URI("cluster://" + member.getHost()).getPort() + 3000;
        GraphDatabaseBuilder graphDatabaseBuilder =
            new HighlyAvailableGraphDatabaseFactory()
                .newHighlyAvailableDatabaseBuilder(
                    new File(new File(root, name), "server" + serverId).getAbsolutePath())
                .setConfig(ClusterSettings.cluster_name, name)
                .setConfig(ClusterSettings.initial_hosts, initialHosts.toString())
                .setConfig(HaSettings.server_id, serverId + "")
                .setConfig(ClusterSettings.cluster_server, member.getHost())
                .setConfig(HaSettings.ha_server, ":" + haPort)
                .setConfig(commonConfig);
        if (instanceConfig.containsKey(serverId)) {
          graphDatabaseBuilder.setConfig(instanceConfig.get(serverId));
        }

        config(graphDatabaseBuilder, name, serverId);

        logger.info("Starting cluster node " + serverId + " in cluster " + name);
        final GraphDatabaseService graphDatabase = graphDatabaseBuilder.newGraphDatabase();

        members.put(serverId, (HighlyAvailableGraphDatabase) graphDatabase);

        life.add(
            new LifecycleAdapter() {
              @Override
              public void stop() throws Throwable {
                graphDatabase.shutdown();
              }
            });
      } else {
        Map<String, String> config =
            MapUtil.stringMap(
                ClusterSettings.cluster_name.name(), name,
                ClusterSettings.initial_hosts.name(), initialHosts.toString(),
                ClusterSettings.cluster_server.name(), member.getHost());
        Logging clientLogging =
            new Logging() {
              @Override
              public StringLogger getLogger(Class loggingClass) {
                return new Slf4jStringLogger(logger);
              }
            };
        life.add(
            new ClusterClient(
                ClusterClient.adapt(new Config(config)),
                clientLogging,
                new CoordinatorIncapableCredentialsProvider()));
      }

      // logger.info( "Started cluster node " + serverId + " in cluster "
      // + name );
    }
Пример #6
0
  private ClusterClient newClusterClient(InstanceId id) {
    Map<String, String> configMap =
        MapUtil.stringMap(
            ClusterSettings.initial_hosts.name(), cluster.getInitialHostsConfigString(),
            ClusterSettings.server_id.name(), String.valueOf(id.toIntegerIndex()),
            ClusterSettings.cluster_server.name(), "0.0.0.0:8888");

    Config config =
        new Config(
            configMap,
            InternalAbstractGraphDatabase.Configuration.class,
            GraphDatabaseSettings.class);

    return new ClusterClient(
        new Monitors(),
        ClusterClient.adapt(config),
        new DevNullLoggingService(),
        new NotElectableElectionCredentialsProvider(),
        new ObjectStreamFactory(),
        new ObjectStreamFactory());
  }
  @Override
  protected RemoteTxHook createTxHook() {
    clusterEventsDelegateInvocationHandler = new DelegateInvocationHandler();
    memberContextDelegateInvocationHandler = new DelegateInvocationHandler();
    clusterMemberAvailabilityDelegateInvocationHandler = new DelegateInvocationHandler();

    clusterEvents =
        (ClusterMemberEvents)
            Proxy.newProxyInstance(
                ClusterMemberEvents.class.getClassLoader(),
                new Class[] {ClusterMemberEvents.class, Lifecycle.class},
                clusterEventsDelegateInvocationHandler);
    memberContext =
        (HighAvailabilityMemberContext)
            Proxy.newProxyInstance(
                HighAvailabilityMemberContext.class.getClassLoader(),
                new Class[] {HighAvailabilityMemberContext.class},
                memberContextDelegateInvocationHandler);
    clusterMemberAvailability =
        (ClusterMemberAvailability)
            Proxy.newProxyInstance(
                ClusterMemberAvailability.class.getClassLoader(),
                new Class[] {ClusterMemberAvailability.class},
                clusterMemberAvailabilityDelegateInvocationHandler);

    ElectionCredentialsProvider electionCredentialsProvider =
        config.get(HaSettings.slave_only)
            ? new NotElectableElectionCredentialsProvider()
            : new DefaultElectionCredentialsProvider(
                config.get(ClusterSettings.server_id),
                new OnDiskLastTxIdGetter(new File(getStoreDir())),
                new HighAvailabilityMemberInfoProvider() {
                  @Override
                  public HighAvailabilityMemberState getHighAvailabilityMemberState() {
                    return memberStateMachine.getCurrentState();
                  }
                });

    ObjectStreamFactory objectStreamFactory = new ObjectStreamFactory();

    clusterClient =
        new ClusterClient(
            ClusterClient.adapt(config),
            logging,
            electionCredentialsProvider,
            objectStreamFactory,
            objectStreamFactory);
    PaxosClusterMemberEvents localClusterEvents =
        new PaxosClusterMemberEvents(
            clusterClient,
            clusterClient,
            clusterClient,
            clusterClient,
            logging,
            new Predicate<PaxosClusterMemberEvents.ClusterMembersSnapshot>() {
              @Override
              public boolean accept(PaxosClusterMemberEvents.ClusterMembersSnapshot item) {
                for (MemberIsAvailable member : item.getCurrentAvailableMembers()) {
                  if (member.getRoleUri().getScheme().equals("ha")) {
                    if (HighAvailabilityModeSwitcher.getServerId(member.getRoleUri())
                        == config.get(ClusterSettings.server_id)) {
                      msgLog.error(
                          String.format(
                              "Instance %s has the same serverId as ours (%d) - will not "
                                  + "join this cluster",
                              member.getRoleUri(), config.get(ClusterSettings.server_id)));
                      return true;
                    }
                  }
                }
                return true;
              }
            },
            new HANewSnapshotFunction(),
            objectStreamFactory,
            objectStreamFactory);

    // Force a reelection after we enter the cluster
    // and when that election is finished refresh the snapshot
    clusterClient.addClusterListener(
        new ClusterListener.Adapter() {
          boolean hasRequestedElection =
              true; // This ensures that the election result is (at least) from our
          // request or thereafter

          @Override
          public void enteredCluster(ClusterConfiguration clusterConfiguration) {
            clusterClient.performRoleElections();
          }

          @Override
          public void elected(String role, InstanceId instanceId, URI electedMember) {
            if (hasRequestedElection && role.equals(ClusterConfiguration.COORDINATOR)) {
              clusterClient.removeClusterListener(this);
            }
          }
        });

    HighAvailabilityMemberContext localMemberContext =
        new SimpleHighAvailabilityMemberContext(clusterClient.getServerId());
    PaxosClusterMemberAvailability localClusterMemberAvailability =
        new PaxosClusterMemberAvailability(
            clusterClient.getServerId(),
            clusterClient,
            clusterClient,
            logging,
            objectStreamFactory,
            objectStreamFactory);

    memberContextDelegateInvocationHandler.setDelegate(localMemberContext);
    clusterEventsDelegateInvocationHandler.setDelegate(localClusterEvents);
    clusterMemberAvailabilityDelegateInvocationHandler.setDelegate(localClusterMemberAvailability);

    members =
        new ClusterMembers(
            clusterClient,
            clusterClient,
            clusterEvents,
            new InstanceId(config.get(ClusterSettings.server_id)));
    memberStateMachine =
        new HighAvailabilityMemberStateMachine(
            memberContext,
            availabilityGuard,
            members,
            clusterEvents,
            clusterClient,
            logging.getMessagesLog(HighAvailabilityMemberStateMachine.class));

    HighAvailabilityConsoleLogger highAvailabilityConsoleLogger =
        new HighAvailabilityConsoleLogger(
            logging.getConsoleLog(HighAvailabilityConsoleLogger.class),
            new InstanceId(config.get(ClusterSettings.server_id)));
    availabilityGuard.addListener(highAvailabilityConsoleLogger);
    clusterEvents.addClusterMemberListener(highAvailabilityConsoleLogger);
    clusterClient.addClusterListener(highAvailabilityConsoleLogger);

    paxosLife.add(clusterClient);
    paxosLife.add(memberStateMachine);
    paxosLife.add(clusterEvents);
    paxosLife.add(localClusterMemberAvailability);

    DelegateInvocationHandler<RemoteTxHook> txHookDelegate = new DelegateInvocationHandler<>();
    RemoteTxHook txHook =
        (RemoteTxHook)
            Proxy.newProxyInstance(
                RemoteTxHook.class.getClassLoader(),
                new Class[] {RemoteTxHook.class},
                txHookDelegate);
    new TxHookModeSwitcher(
        memberStateMachine,
        txHookDelegate,
        master,
        new TxHookModeSwitcher.RequestContextFactoryResolver() {
          @Override
          public RequestContextFactory get() {
            return requestContextFactory;
          }
        },
        logging.getMessagesLog(TxHookModeSwitcher.class),
        dependencyResolver);
    return txHook;
  }
Пример #8
0
    private void startMember(InstanceId serverId) throws URISyntaxException, IOException {
      Clusters.Member member = spec.getMembers().get(serverId.toIntegerIndex() - 1);
      StringBuilder initialHosts = new StringBuilder(spec.getMembers().get(0).getHost());
      for (int i = 1; i < spec.getMembers().size(); i++) {
        initialHosts.append(",").append(spec.getMembers().get(i).getHost());
      }
      File parent = new File(root, name);
      URI clusterUri = new URI("cluster://" + member.getHost());
      if (member.isFullHaMember()) {
        int clusterPort = clusterUri.getPort();
        int haPort = clusterUri.getPort() + 3000;
        File storeDir = new File(parent, "server" + serverId);
        if (storeDirInitializer != null) {
          storeDirInitializer.initializeStoreDir(serverId.toIntegerIndex(), storeDir);
        }
        GraphDatabaseBuilder builder =
            dbFactory.newHighlyAvailableDatabaseBuilder(storeDir.getAbsolutePath());
        builder.setConfig(ClusterSettings.cluster_name, name);
        builder.setConfig(ClusterSettings.initial_hosts, initialHosts.toString());
        builder.setConfig(ClusterSettings.server_id, serverId + "");
        builder.setConfig(ClusterSettings.cluster_server, "0.0.0.0:" + clusterPort);
        builder.setConfig(HaSettings.ha_server, ":" + haPort);
        builder.setConfig(OnlineBackupSettings.online_backup_enabled, Settings.FALSE);
        builder.setConfig(commonConfig);
        if (instanceConfig.containsKey(serverId.toIntegerIndex())) {
          builder.setConfig(instanceConfig.get(serverId.toIntegerIndex()));
        }

        config(builder, name, serverId);

        final HighlyAvailableGraphDatabaseProxy graphDatabase =
            new HighlyAvailableGraphDatabaseProxy(builder);

        members.put(serverId, graphDatabase);

        life.add(
            new LifecycleAdapter() {
              @Override
              public void stop() throws Throwable {
                graphDatabase.get().shutdown();
              }
            });
      } else {
        Map<String, String> config =
            MapUtil.stringMap(
                ClusterSettings.cluster_name.name(),
                name,
                ClusterSettings.initial_hosts.name(),
                initialHosts.toString(),
                ClusterSettings.server_id.name(),
                serverId + "",
                ClusterSettings.cluster_server.name(),
                "0.0.0.0:" + clusterUri.getPort(),
                GraphDatabaseSettings.store_dir.name(),
                new File(parent, "arbiter" + serverId).getAbsolutePath());
        Config config1 =
            new Config(
                config,
                InternalAbstractGraphDatabase.Configuration.class,
                GraphDatabaseSettings.class);

        ObjectStreamFactory objectStreamFactory = new ObjectStreamFactory();
        ClusterClient clusterClient =
            new ClusterClient(
                new Monitors(),
                ClusterClient.adapt(config1),
                NullLogService.getInstance(),
                new NotElectableElectionCredentialsProvider(),
                objectStreamFactory,
                objectStreamFactory);

        arbiters.add(
            new ClusterMembers(
                clusterClient,
                clusterClient,
                new ClusterMemberEvents() {
                  @Override
                  public void addClusterMemberListener(ClusterMemberListener listener) {
                    // noop
                  }

                  @Override
                  public void removeClusterMemberListener(ClusterMemberListener listener) {
                    // noop
                  }
                },
                clusterClient.getServerId()));

        life.add(new FutureLifecycleAdapter<>(clusterClient));
      }
    }
Пример #9
0
  @Override
  protected TxHook createTxHook() {
    clusterEventsDelegateInvocationHandler = new DelegateInvocationHandler();
    memberContextDelegateInvocationHandler = new DelegateInvocationHandler();
    clusterMemberAvailabilityDelegateInvocationHandler = new DelegateInvocationHandler();

    clusterEvents =
        (ClusterMemberEvents)
            Proxy.newProxyInstance(
                ClusterMemberEvents.class.getClassLoader(),
                new Class[] {ClusterMemberEvents.class, Lifecycle.class},
                clusterEventsDelegateInvocationHandler);
    memberContext =
        (HighAvailabilityMemberContext)
            Proxy.newProxyInstance(
                HighAvailabilityMemberContext.class.getClassLoader(),
                new Class[] {HighAvailabilityMemberContext.class},
                memberContextDelegateInvocationHandler);
    clusterMemberAvailability =
        (ClusterMemberAvailability)
            Proxy.newProxyInstance(
                ClusterMemberAvailability.class.getClassLoader(),
                new Class[] {ClusterMemberAvailability.class},
                clusterMemberAvailabilityDelegateInvocationHandler);

    /*
     *  We need to create these anyway since even in compatibility mode we'll use them for switchover. If it turns
     *  out we are not going to need zookeeper, just assign them to the class fields. The difference is in when
     *  they start().
     */
    ElectionCredentialsProvider electionCredentialsProvider =
        config.get(HaSettings.slave_only)
            ? new NotElectableElectionCredentialsProvider()
            : new DefaultElectionCredentialsProvider(
                config.get(ClusterSettings.server_id),
                new OnDiskLastTxIdGetter(new File(getStoreDir())),
                new HighAvailabilityMemberInfoProvider() {
                  @Override
                  public HighAvailabilityMemberState getHighAvailabilityMemberState() {
                    return memberStateMachine.getCurrentState();
                  }
                });

    clusterClient =
        new ClusterClient(ClusterClient.adapt(config), logging, electionCredentialsProvider);
    PaxosClusterMemberEvents localClusterEvents =
        new PaxosClusterMemberEvents(
            clusterClient,
            clusterClient,
            clusterClient,
            clusterClient,
            logging,
            new Predicate<PaxosClusterMemberEvents.ClusterMembersSnapshot>() {
              @Override
              public boolean accept(PaxosClusterMemberEvents.ClusterMembersSnapshot item) {
                for (MemberIsAvailable member : item.getCurrentAvailableMembers()) {
                  if (member.getRoleUri().getScheme().equals("ha")) {
                    if (HighAvailabilityModeSwitcher.getServerId(member.getRoleUri())
                        == config.get(ClusterSettings.server_id)) {
                      msgLog.error(
                          String.format(
                              "Instance %s has the same serverId as ours (%d) - will not join this cluster",
                              member.getRoleUri(), config.get(ClusterSettings.server_id)));
                      return true;
                    }
                  }
                }
                return true;
              }
            });

    // Force a reelection after we enter the cluster
    // and when that election is finished refresh the snapshot
    clusterClient.addClusterListener(
        new ClusterListener.Adapter() {
          @Override
          public void enteredCluster(ClusterConfiguration clusterConfiguration) {
            clusterClient.performRoleElections();
          }

          @Override
          public void elected(String role, InstanceId instanceId, URI electedMember) {
            if (role.equals(ClusterConfiguration.COORDINATOR)) {
              clusterClient.refreshSnapshot();
              clusterClient.removeClusterListener(this);
            }
          }
        });

    HighAvailabilityMemberContext localMemberContext =
        new SimpleHighAvailabilityMemberContext(clusterClient.getServerId());
    PaxosClusterMemberAvailability localClusterMemberAvailability =
        new PaxosClusterMemberAvailability(
            clusterClient.getServerId(), clusterClient, clusterClient, logging);

    // Here we decide whether to start in compatibility mode or mode or not
    if (!config.get(HaSettings.coordinators).isEmpty()
        && !config.get(HaSettings.coordinators).get(0).toString().trim().equals("")) {
      compatibilityMode = true;
      compatibilityLifecycle = new LinkedList<Lifecycle>();

      Switchover switchover =
          new ZooToPaxosSwitchover(
              life,
              paxosLife,
              compatibilityLifecycle,
              clusterEventsDelegateInvocationHandler,
              memberContextDelegateInvocationHandler,
              clusterMemberAvailabilityDelegateInvocationHandler,
              localClusterEvents,
              localMemberContext,
              localClusterMemberAvailability);

      ZooKeeperHighAvailabilityEvents zkEvents =
          new ZooKeeperHighAvailabilityEvents(logging, config, switchover);
      compatibilityLifecycle.add(zkEvents);
      memberContextDelegateInvocationHandler.setDelegate(
          new SimpleHighAvailabilityMemberContext(zkEvents.getInstanceId()));
      clusterEventsDelegateInvocationHandler.setDelegate(zkEvents);
      clusterMemberAvailabilityDelegateInvocationHandler.setDelegate(zkEvents);
      // Paxos Events added to life, won't be stopped because it isn't started yet
      paxosLife.add(localClusterEvents);
    } else {
      memberContextDelegateInvocationHandler.setDelegate(localMemberContext);
      clusterEventsDelegateInvocationHandler.setDelegate(localClusterEvents);
      clusterMemberAvailabilityDelegateInvocationHandler.setDelegate(
          localClusterMemberAvailability);
    }

    members =
        new ClusterMembers(
            clusterClient,
            clusterClient,
            clusterEvents,
            new InstanceId(config.get(ClusterSettings.server_id)));
    memberStateMachine =
        new HighAvailabilityMemberStateMachine(
            memberContext,
            accessGuard,
            members,
            clusterEvents,
            clusterClient,
            logging.getLogger(HighAvailabilityMemberStateMachine.class));

    if (compatibilityMode) {
      /*
       * In here goes stuff that needs to stop when switching. If added in paxosLife too they will be restarted.
       * Adding to life starts them when life.start is called - adding them to compatibilityLifeCycle shuts them
       * down on switchover
       */
      compatibilityLifecycle.add(memberStateMachine);
      //            compatibilityLifecycle.add( highAvailabilityModeSwitcher );
      compatibilityLifecycle.add((Lifecycle) clusterEvents);
      life.add(memberStateMachine);
      //            life.add( highAvailabilityModeSwitcher );
      life.add(clusterEvents);
    }
    /*
     * Here goes stuff that needs to start when paxos kicks in:
     * In Normal (non compatibility mode): That means they start normally
     * In Compatibility Mode: That means they start when switchover happens. If added to life too they will be
     * restarted
     */
    paxosLife.add(memberStateMachine);
    paxosLife.add(clusterEvents);
    paxosLife.add(clusterClient);
    paxosLife.add(localClusterMemberAvailability);

    DelegateInvocationHandler<TxHook> txHookDelegate = new DelegateInvocationHandler<TxHook>();
    TxHook txHook =
        (TxHook)
            Proxy.newProxyInstance(
                TxHook.class.getClassLoader(), new Class[] {TxHook.class}, txHookDelegate);
    new TxHookModeSwitcher(
        memberStateMachine,
        txHookDelegate,
        master,
        new TxHookModeSwitcher.RequestContextFactoryResolver() {
          @Override
          public RequestContextFactory get() {
            return requestContextFactory;
          }
        },
        dependencyResolver);
    return txHook;
  }