protected void fetchRebalancingStatusFromCoordinator() {
   if (!transport.isCoordinator()) {
     ReplicableCommand command =
         new CacheTopologyControlCommand(
             null,
             CacheTopologyControlCommand.Type.POLICY_GET_STATUS,
             transport.getAddress(),
             transport.getViewId());
     Address coordinator = transport.getCoordinator();
     try {
       Map<Address, Response> responseMap =
           transport.invokeRemotely(
               Collections.singleton(coordinator),
               command,
               ResponseMode.SYNCHRONOUS,
               getGlobalTimeout(),
               null,
               DeliverOrder.NONE,
               false);
       Response response = responseMap.get(coordinator);
       if (response instanceof SuccessfulResponse) {
         globalRebalancingEnabled = ((Boolean) ((SuccessfulResponse) response).getResponseValue());
       } else {
         log.errorReadingRebalancingStatus(coordinator, null);
       }
     } catch (Exception e) {
       log.errorReadingRebalancingStatus(coordinator, e);
     }
   }
 }
  @Override
  public void handleClusterView(boolean mergeView, int newViewId) {
    synchronized (viewHandlingLock) {
      // check to ensure this is not an older view
      if (newViewId <= viewId) {
        log.tracef("Ignoring old cluster view notification: %s", newViewId);
        return;
      }

      boolean becameCoordinator = !isCoordinator && transport.isCoordinator();
      isCoordinator = transport.isCoordinator();
      if (trace) {
        log.tracef(
            "Received new cluster view: %d, isCoordinator = %s, becameCoordinator = %s",
            (Object) newViewId, isCoordinator, becameCoordinator);
      }
      mustRecoverClusterStatus |= mergeView || becameCoordinator;
      if (!isCoordinator) return;

      if (mustRecoverClusterStatus) {
        // Clean up leftover cache status information from the last time we were coordinator.
        // E.g. if the local node was coordinator, started a rebalance, and then lost coordinator
        // status because of a merge, the existing cache statuses may have a rebalance in progress.
        cacheStatusMap.clear();
        try {
          recoverClusterStatus(newViewId, mergeView, transport.getMembers());
          mustRecoverClusterStatus = false;
        } catch (InterruptedException e) {
          log.tracef("Cluster state recovery interrupted because the coordinator is shutting down");
          // the CTMI has already stopped, no need to update the view id or notify waiters
          return;
        } catch (SuspectException e) {
          // We will retry when we receive the new view and then we'll reset the
          // mustRecoverClusterStatus flag
          return;
        } catch (Exception e) {
          if (!isShuttingDown) {
            log.failedToRecoverClusterState(e);
          } else {
            log.tracef("Cluster state recovery failed because the coordinator is shutting down");
          }
        }
      }

      // update the view id last, so join requests from other nodes wait until we recovered existing
      // members' info
      synchronized (viewUpdateLock) {
        viewId = newViewId;
        viewUpdateLock.notifyAll();
      }
    }

    if (!mustRecoverClusterStatus) {
      try {
        updateCacheMembers(transport.getMembers());
      } catch (Exception e) {
        log.errorUpdatingMembersList(e);
      }
    }
  }
Example #3
0
  protected Cache<String, String> startCache() throws IOException {
    CacheBuilder cb = new CacheBuilder(cfgFile);
    EmbeddedCacheManager cacheManager = cb.getCacheManager();
    Configuration dcc = cacheManager.getDefaultCacheConfiguration();

    cacheManager.defineConfiguration(
        "wordcount",
        new ConfigurationBuilder()
            .read(dcc)
            .clustering()
            .l1()
            .disable()
            .clustering()
            .cacheMode(CacheMode.DIST_SYNC)
            .hash()
            .numOwners(1)
            .build());
    Cache<String, String> cache = cacheManager.getCache();

    Transport transport = cache.getAdvancedCache().getRpcManager().getTransport();
    if (isMaster)
      System.out.printf(
          "Node %s joined as master. View is %s.%n",
          transport.getAddress(), transport.getMembers());
    else
      System.out.printf(
          "Node %s joined as slave. View is %s.%n", transport.getAddress(), transport.getMembers());

    return cache;
  }
  @Start(priority = 100)
  public void start() {
    isShuttingDown = false;
    isCoordinator = transport.isCoordinator();

    viewListener = new ClusterViewListener();
    cacheManagerNotifier.addListener(viewListener);
    // The listener already missed the initial view
    handleClusterView(false, transport.getViewId());
  }
 private void confirmMembersAvailable() throws Exception {
   ReplicableCommand heartbeatCommand =
       new CacheTopologyControlCommand(
           null, CacheTopologyControlCommand.Type.POLICY_GET_STATUS, transport.getAddress(), -1);
   transport.invokeRemotely(
       null,
       heartbeatCommand,
       ResponseMode.SYNCHRONOUS,
       getGlobalTimeout(),
       null,
       DeliverOrder.NONE,
       false);
 }
 @Override
 public void broadcastRebalanceStart(
     String cacheName, CacheTopology cacheTopology, boolean totalOrder, boolean distributed) {
   CLUSTER.startRebalance(cacheName, cacheTopology);
   ReplicableCommand command =
       new CacheTopologyControlCommand(
           cacheName,
           CacheTopologyControlCommand.Type.REBALANCE_START,
           transport.getAddress(),
           cacheTopology,
           null,
           transport.getViewId());
   executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed);
 }
 @Override
 public void broadcastStableTopologyUpdate(
     String cacheName, CacheTopology cacheTopology, boolean totalOrder, boolean distributed) {
   log.debugf(
       "Updating cluster-wide stable topology for cache %s, topology = %s",
       cacheName, cacheTopology);
   ReplicableCommand command =
       new CacheTopologyControlCommand(
           cacheName,
           CacheTopologyControlCommand.Type.STABLE_TOPOLOGY_UPDATE,
           transport.getAddress(),
           cacheTopology,
           null,
           transport.getViewId());
   executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed);
 }
 @Start(priority = 25) // after the distribution manager!
 @SuppressWarnings("unused")
 private void setAddress() {
   this.address = transport.getAddress();
   this.isUsingLockDelegation =
       !cacheConfiguration.transaction().transactionMode().isTransactional();
 }
  private void recoverClusterStatus(
      int newViewId, boolean isMergeView, List<Address> clusterMembers) throws Exception {
    ReplicableCommand command =
        new CacheTopologyControlCommand(
            null, CacheTopologyControlCommand.Type.GET_STATUS, transport.getAddress(), newViewId);
    Map<Address, Object> statusResponses =
        executeOnClusterSync(command, getGlobalTimeout(), false, false);

    log.debugf("Got %d status responses. members are %s", statusResponses.size(), clusterMembers);
    Map<String, Map<Address, CacheStatusResponse>> responsesByCache = new HashMap<>();
    for (Map.Entry<Address, Object> responseEntry : statusResponses.entrySet()) {
      Address sender = responseEntry.getKey();
      Map<String, CacheStatusResponse> nodeStatus =
          (Map<String, CacheStatusResponse>) responseEntry.getValue();
      for (Map.Entry<String, CacheStatusResponse> statusEntry : nodeStatus.entrySet()) {
        String cacheName = statusEntry.getKey();

        Map<Address, CacheStatusResponse> cacheResponses = responsesByCache.get(cacheName);
        if (cacheResponses == null) {
          cacheResponses = new HashMap<>();
          responsesByCache.put(cacheName, cacheResponses);
        }
        cacheResponses.put(sender, statusEntry.getValue());
      }
    }

    for (Map.Entry<String, Map<Address, CacheStatusResponse>> e : responsesByCache.entrySet()) {
      ClusterCacheStatus cacheStatus = initCacheStatusIfAbsent(e.getKey());
      cacheStatus.doMergePartitions(e.getValue(), clusterMembers, isMergeView);
    }
  }
  public void executeOnClusterAsync(
      final ReplicableCommand command, final int timeout, boolean totalOrder, boolean distributed) {
    if (!totalOrder) {
      // invoke the command on the local node
      asyncTransportExecutor.submit(
          new Runnable() {
            @Override
            public void run() {
              gcr.wireDependencies(command);
              try {
                if (log.isTraceEnabled())
                  log.tracef("Attempting to execute command on self: %s", command);
                command.perform(null);
              } catch (Throwable throwable) {
                // The command already logs any exception in perform()
              }
            }
          });
    }

    // invoke remotely
    try {
      transport.invokeRemotely(
          null,
          command,
          ResponseMode.ASYNCHRONOUS_WITH_SYNC_MARSHALLING,
          timeout,
          true,
          null,
          totalOrder,
          distributed);
    } catch (Exception e) {
      throw new CacheException("Failed to broadcast asynchronous command: " + command);
    }
  }
  @Start(priority = 11) // after Transport
  public void start() {
    SecurityActions.addCacheManagerListener(cache.getCacheManager(), new RankCalculator());

    isClustered =
        SecurityActions.getCacheConfiguration(cache.getAdvancedCache())
            .clustering()
            .cacheMode()
            .isClustered();

    if (isClustered) {
      // Use component registry to avoid keeping an instance ref simply used on start
      ComponentRegistry componentRegistry =
          SecurityActions.getCacheComponentRegistry(cache.getAdvancedCache());
      Transport transport =
          componentRegistry.getGlobalComponentRegistry().getComponent(Transport.class);
      calculateRank(transport.getAddress(), transport.getMembers(), transport.getViewId());
    }
  }
  @Override
  public void handleClusterView(boolean mergeView, int newViewId) {
    synchronized (viewHandlingLock) {
      // check to ensure this is not an older view
      if (newViewId <= viewId) {
        log.tracef("Ignoring old cluster view notification: %s", newViewId);
        return;
      }

      boolean becameCoordinator = !isCoordinator && transport.isCoordinator();
      isCoordinator = transport.isCoordinator();
      log.tracef(
          "Received new cluster view: %s, isCoordinator = %s, becameCoordinator = %s",
          newViewId, isCoordinator, becameCoordinator);
      if (!isCoordinator) return;

      if (mergeView || becameCoordinator) {
        try {
          recoverClusterStatus(newViewId, mergeView, transport.getMembers());
        } catch (InterruptedException e) {
          log.tracef("Cluster state recovery interrupted because the coordinator is shutting down");
          // the CTMI has already stopped, no need to update the view id or notify waiters
          return;
        } catch (Exception e) {
          // TODO Retry?
          log.failedToRecoverClusterState(e);
        }
      } else {
        try {
          updateCacheMembers(transport.getMembers());
        } catch (Exception e) {
          log.errorUpdatingMembersList(e);
        }
      }

      // update the view id last, so join requests from other nodes wait until we recovered existing
      // members' info
      synchronized (viewUpdateLock) {
        viewId = newViewId;
        viewUpdateLock.notifyAll();
      }
    }
  }
 @Override
 public void broadcastTopologyUpdate(
     String cacheName,
     CacheTopology cacheTopology,
     AvailabilityMode availabilityMode,
     boolean totalOrder,
     boolean distributed) {
   log.debugf(
       "Updating cluster-wide current topology for cache %s, topology = %s, availability mode = %s",
       cacheName, cacheTopology, availabilityMode);
   ReplicableCommand command =
       new CacheTopologyControlCommand(
           cacheName,
           CacheTopologyControlCommand.Type.CH_UPDATE,
           transport.getAddress(),
           cacheTopology,
           availabilityMode,
           transport.getViewId());
   executeOnClusterAsync(command, getGlobalTimeout(), totalOrder, distributed);
 }
 public void testInvokeRemotelyWhenSingleMember() throws Exception {
   Cache cache1 = cache(0, "replSync");
   Transport mockTransport = createMock(Transport.class);
   RpcManagerImpl rpcManager =
       (RpcManagerImpl) TestingUtil.extractComponent(cache1, RpcManager.class);
   Transport originalTransport = TestingUtil.extractComponent(cache1, Transport.class);
   try {
     Address mockAddress1 = createNiceMock(Address.class);
     List<Address> memberList = new ArrayList<Address>(1);
     memberList.add(mockAddress1);
     expect(mockTransport.getMembers()).andReturn(memberList).anyTimes();
     expect(mockTransport.getAddress()).andReturn(null).anyTimes();
     rpcManager.setTransport(mockTransport);
     // Transport invoke remote should not be called.
     replay(mockAddress1, mockTransport);
     // now try a simple replication.  Since the RpcManager is a mock object it will not actually
     // replicate anything.
     cache1.put(key, value);
     verify(mockTransport);
   } finally {
     if (rpcManager != null) rpcManager.setTransport(originalTransport);
   }
 }
  private void recoverClusterStatus(
      int newViewId, final boolean isMergeView, final List<Address> clusterMembers)
      throws Exception {
    log.debugf("Recovering cluster status for view %d", newViewId);
    ReplicableCommand command =
        new CacheTopologyControlCommand(
            null, CacheTopologyControlCommand.Type.GET_STATUS, transport.getAddress(), newViewId);
    Map<Address, Object> statusResponses =
        executeOnClusterSync(
            command, getGlobalTimeout(), false, false, new CacheTopologyFilterReuser());

    log.debugf("Got %d status responses. members are %s", statusResponses.size(), clusterMembers);
    Map<String, Map<Address, CacheStatusResponse>> responsesByCache = new HashMap<>();
    boolean recoveredRebalancingStatus = true;
    for (Map.Entry<Address, Object> responseEntry : statusResponses.entrySet()) {
      Address sender = responseEntry.getKey();
      ManagerStatusResponse nodeStatus = (ManagerStatusResponse) responseEntry.getValue();
      recoveredRebalancingStatus &= nodeStatus.isRebalancingEnabled();
      for (Map.Entry<String, CacheStatusResponse> statusEntry : nodeStatus.getCaches().entrySet()) {
        String cacheName = statusEntry.getKey();
        Map<Address, CacheStatusResponse> cacheResponses = responsesByCache.get(cacheName);
        if (cacheResponses == null) {
          cacheResponses = new HashMap<>();
          responsesByCache.put(cacheName, cacheResponses);
        }
        cacheResponses.put(sender, statusEntry.getValue());
      }
    }

    globalRebalancingEnabled = recoveredRebalancingStatus;
    // Compute the new consistent hashes on separate threads
    int maxThreads = Runtime.getRuntime().availableProcessors() / 2 + 1;
    CompletionService<Void> cs =
        new SemaphoreCompletionService<>(asyncTransportExecutor, maxThreads);
    for (final Map.Entry<String, Map<Address, CacheStatusResponse>> e :
        responsesByCache.entrySet()) {
      final ClusterCacheStatus cacheStatus = initCacheStatusIfAbsent(e.getKey());
      cs.submit(
          new Callable<Void>() {
            @Override
            public Void call() throws Exception {
              cacheStatus.doMergePartitions(e.getValue(), clusterMembers, isMergeView);
              return null;
            }
          });
    }
    for (int i = 0; i < responsesByCache.size(); i++) {
      cs.take();
    }
  }
  public void notifyCoordinatorPushCompleted(int viewId) throws Exception {
    Transport t = rpcManager.getTransport();

    if (t.isCoordinator()) {
      if (trace)
        log.tracef(
            "Node %s is the coordinator, marking push for %d as complete directly", self, viewId);
      markNodePushCompleted(viewId, self);
    } else {
      final RehashControlCommand cmd =
          cf.buildRehashControlCommand(RehashControlCommand.Type.NODE_PUSH_COMPLETED, self, viewId);
      Address coordinator = rpcManager.getTransport().getCoordinator();

      if (trace)
        log.tracef(
            "Node %s is not the coordinator, sending request to mark push for %d as complete to %s",
            self, viewId, coordinator);
      rpcManager.invokeRemotely(
          Collections.singleton(coordinator),
          cmd,
          ResponseMode.SYNCHRONOUS,
          configuration.getRehashRpcTimeout());
    }
  }
  @Start(priority = 100)
  public void start() {
    isShuttingDown = false;
    isCoordinator = transport.isCoordinator();

    viewListener = new ClusterViewListener();
    cacheManagerNotifier.addListener(viewListener);
    // The listener already missed the initial view
    asyncTransportExecutor.submit(
        new Runnable() {
          @Override
          public void run() {
            handleClusterView(false, transport.getViewId());
          }
        });

    fetchRebalancingStatusFromCoordinator();
  }
  // needs to be AFTER the RpcManager
  // The DMI is cache-scoped, so it will always start after the RMI, which is global-scoped
  @Start(priority = 20)
  private void join() throws Exception {
    if (trace) log.trace("starting distribution manager on " + getMyAddress());
    notifier.addListener(listener);

    Transport t = rpcManager.getTransport();
    List<Address> members = t.getMembers();
    self = t.getAddress();
    lastViewId = t.getViewId();
    consistentHash = ConsistentHashHelper.createConsistentHash(configuration, members);
    lastSuccessfulCH = ConsistentHashHelper.createConsistentHash(configuration, members);

    // in case we are/become the coordinator, make sure we're in the push confirmations map before
    // anyone else
    synchronized (pushConfirmations) {
      pushConfirmations.put(t.getAddress(), -1);
    }

    // allow incoming requests
    joinStartedLatch.countDown();

    // nothing to push, but we need to inform the coordinator that we have finished our push
    notifyCoordinatorPushCompleted(t.getViewId());
  }
  private Map<Address, Object> executeOnClusterSync(
      final ReplicableCommand command, final int timeout, boolean totalOrder, boolean distributed)
      throws Exception {
    // first invoke remotely

    if (totalOrder) {
      Map<Address, Response> responseMap =
          transport.invokeRemotely(
              transport.getMembers(),
              command,
              ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS,
              timeout,
              false,
              null,
              totalOrder,
              distributed);
      Map<Address, Object> responseValues =
          new HashMap<Address, Object>(transport.getMembers().size());
      for (Map.Entry<Address, Response> entry : responseMap.entrySet()) {
        Address address = entry.getKey();
        Response response = entry.getValue();
        if (!response.isSuccessful()) {
          Throwable cause =
              response instanceof ExceptionResponse
                  ? ((ExceptionResponse) response).getException()
                  : null;
          throw new CacheException(
              "Unsuccessful response received from node " + address + ": " + response, cause);
        }
        responseValues.put(address, ((SuccessfulResponse) response).getResponseValue());
      }
      return responseValues;
    }

    Future<Map<Address, Response>> remoteFuture =
        asyncTransportExecutor.submit(
            new Callable<Map<Address, Response>>() {
              @Override
              public Map<Address, Response> call() throws Exception {
                return transport.invokeRemotely(
                    null,
                    command,
                    ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS,
                    timeout,
                    true,
                    null,
                    false,
                    false);
              }
            });

    // invoke the command on the local node
    gcr.wireDependencies(command);
    Response localResponse;
    try {
      if (log.isTraceEnabled()) log.tracef("Attempting to execute command on self: %s", command);
      localResponse = (Response) command.perform(null);
    } catch (Throwable throwable) {
      throw new Exception(throwable);
    }
    if (!localResponse.isSuccessful()) {
      throw new CacheException("Unsuccessful local response: " + localResponse);
    }

    // wait for the remote commands to finish
    Map<Address, Response> responseMap = remoteFuture.get(timeout, TimeUnit.MILLISECONDS);

    // parse the responses
    Map<Address, Object> responseValues =
        new HashMap<Address, Object>(transport.getMembers().size());
    for (Map.Entry<Address, Response> entry : responseMap.entrySet()) {
      Address address = entry.getKey();
      Response response = entry.getValue();
      if (!response.isSuccessful()) {
        Throwable cause =
            response instanceof ExceptionResponse
                ? ((ExceptionResponse) response).getException()
                : null;
        throw new CacheException(
            "Unsuccessful response received from node " + address + ": " + response, cause);
      }
      responseValues.put(address, ((SuccessfulResponse) response).getResponseValue());
    }

    responseValues.put(
        transport.getAddress(), ((SuccessfulResponse) localResponse).getResponseValue());

    return responseValues;
  }