Exemple #1
0
 private NodeState buildLocalNodeState(NodeState body) {
   if (body == null) {
     body = new NodeState();
   }
   body.id = getHost().getId();
   body.status = NodeStatus.SYNCHRONIZING;
   body.groupReference = UriUtils.buildPublicUri(getHost(), getSelfLink());
   body.documentSelfLink = UriUtils.buildUriPath(getSelfLink(), body.id);
   body.documentKind = Utils.buildKind(NodeState.class);
   body.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
   return body;
 }
 @Before
 public void setUp() throws Exception {
   try {
     this.baseAccountId = Utils.getNowMicrosUtc();
     this.host.setTransactionService(null);
     if (this.host.getServiceStage(SimpleTransactionFactoryService.SELF_LINK) == null) {
       this.host.startServiceAndWait(
           SimpleTransactionFactoryService.class, SimpleTransactionFactoryService.SELF_LINK);
       this.host.startServiceAndWait(
           BankAccountFactoryService.class, BankAccountFactoryService.SELF_LINK);
     }
   } catch (Throwable e) {
     throw new RuntimeException(e);
   }
 }
Exemple #3
0
  /**
   * Merges current node group state with state that came through a PATCH.
   *
   * <p>PATCH requests are sent from
   *
   * <p>1) local service to itself, after it has communicated with a peer, during maintenance.
   *
   * <p>2) A remote peer when its probing this local service, during its maintenance cycle
   *
   * <p>The key invariants that should not be violated, guaranteeing forward evolution of state even
   * if nodes only talk to a small portion of their peers:
   *
   * <p>- When a status changes, the change is accepted if the remote version is higher
   *
   * <p>- A local node is the only node that can change its own node entry status, for a PATCH that
   * it receives.
   *
   * <p>- A node should never increment the version of a node entry, for other nodes, unless that
   * node entry is marked UNAVAILABLE
   *
   * <p>- When a status changes during gossip version must be incremented - Versions always move
   * forward
   */
  private void mergeRemoteAndLocalMembership(
      NodeGroupState localState, NodeGroupState remotePeerState, EnumSet<NodeGroupChange> changes) {
    if (localState == null) {
      return;
    }

    boolean isSelfPatch = remotePeerState.documentOwner.equals(getHost().getId());
    long now = Utils.getNowMicrosUtc();

    NodeState selfEntry = localState.nodes.get(getHost().getId());

    for (NodeState remoteNodeEntry : remotePeerState.nodes.values()) {

      NodeState l = localState.nodes.get(remoteNodeEntry.id);
      boolean isLocalNode = remoteNodeEntry.id.equals(getHost().getId());

      if (!isSelfPatch && isLocalNode) {
        if (remoteNodeEntry.status != l.status) {
          logWarning(
              "Peer %s is reporting us as %s, current status: %s",
              remotePeerState.documentOwner, remoteNodeEntry.status, l.status);
          if (remoteNodeEntry.documentVersion > l.documentVersion) {
            // increment local version to re-enforce we are alive and well
            l.documentVersion = remoteNodeEntry.documentVersion;
            l.documentUpdateTimeMicros = now;
            changes.add(NodeGroupChange.SELF_CHANGE);
          }
        }
        // local instance of node group service is the only one that can update its own
        // status
        continue;
      }

      if (l == null) {
        boolean hasExpired =
            remoteNodeEntry.documentExpirationTimeMicros > 0
                && remoteNodeEntry.documentExpirationTimeMicros < now;
        if (hasExpired || NodeState.isUnAvailable(remoteNodeEntry)) {
          continue;
        }
        if (!isLocalNode) {
          logInfo(
              "Adding new peer %s (%s), status %s",
              remoteNodeEntry.id, remoteNodeEntry.groupReference, remoteNodeEntry.status);
        }
        // we found a new peer, through the gossip PATCH. Add to our state
        localState.nodes.put(remoteNodeEntry.id, remoteNodeEntry);
        changes.add(NodeGroupChange.PEER_ADDED);
        continue;
      }

      boolean needsUpdate = l.status != remoteNodeEntry.status;
      if (needsUpdate) {
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
      }

      if (isSelfPatch && isLocalNode && needsUpdate) {
        // we sent a self PATCH to update our status. Move our version forward;
        remoteNodeEntry.documentVersion =
            Math.max(remoteNodeEntry.documentVersion, l.documentVersion) + 1;
      }

      // versions move forward only, ignore stale nodes
      if (remoteNodeEntry.documentVersion < l.documentVersion) {
        logInfo(
            "v:%d - q:%d, v:%d - q:%d , %s - %s (local:%s %d)",
            l.documentVersion,
            l.membershipQuorum,
            remoteNodeEntry.documentVersion,
            remoteNodeEntry.membershipQuorum,
            l.id,
            remotePeerState.documentOwner,
            getHost().getId(),
            selfEntry.documentVersion);
        continue;
      }

      if (remoteNodeEntry.documentVersion == l.documentVersion && needsUpdate) {
        // pick update with most recent time, even if that is prone to drift and jitter
        // between nodes
        if (remoteNodeEntry.documentUpdateTimeMicros < l.documentUpdateTimeMicros) {
          logWarning(
              "Ignoring update for %s from peer %s. Local status: %s, remote status: %s",
              remoteNodeEntry.id, remotePeerState.documentOwner, l.status, remoteNodeEntry.status);
          continue;
        }
      }

      if (remoteNodeEntry.status == NodeStatus.UNAVAILABLE
          && l.documentExpirationTimeMicros == 0
          && remoteNodeEntry.documentExpirationTimeMicros == 0) {
        remoteNodeEntry.documentExpirationTimeMicros =
            Utils.getNowMicrosUtc() + localState.config.nodeRemovalDelayMicros;
        logInfo(
            "Set expiration at %d for unavailable node %s(%s)",
            remoteNodeEntry.documentExpirationTimeMicros,
            remoteNodeEntry.id,
            remoteNodeEntry.groupReference);
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
        needsUpdate = true;
      }

      if (remoteNodeEntry.status == NodeStatus.UNAVAILABLE && needsUpdate) {
        // nodes increment their own entry version, except, if they are unavailable
        remoteNodeEntry.documentVersion++;
      }

      localState.nodes.put(remoteNodeEntry.id, remoteNodeEntry);
    }

    List<String> missingNodes = new ArrayList<>();
    for (NodeState l : localState.nodes.values()) {
      NodeState r = remotePeerState.nodes.get(l.id);
      if (!NodeState.isUnAvailable(l) || l.id.equals(getHost().getId())) {
        continue;
      }

      long expirationMicros = l.documentExpirationTimeMicros;
      if (r != null) {
        expirationMicros = Math.max(l.documentExpirationTimeMicros, r.documentExpirationTimeMicros);
      }

      if (expirationMicros > 0 && now > expirationMicros) {
        changes.add(NodeGroupChange.PEER_STATUS_CHANGE);
        logInfo("Removing expired unavailable node %s(%s)", l.id, l.groupReference);
        missingNodes.add(l.id);
      }
    }

    for (String id : missingNodes) {
      localState.nodes.remove(id);
    }

    boolean isModified = !changes.isEmpty();
    localState.membershipUpdateTimeMicros =
        Math.max(
            remotePeerState.membershipUpdateTimeMicros,
            isModified ? now : localState.membershipUpdateTimeMicros);
    if (isModified) {
      logInfo(
          "State updated, merge with %s, self %s, %d",
          remotePeerState.documentOwner,
          localState.documentOwner,
          localState.membershipUpdateTimeMicros);
    }
  }
Exemple #4
0
  public void handleGossipPatchCompletion(
      Operation maint,
      Operation patch,
      Throwable e,
      NodeGroupState localState,
      NodeGroupState patchBody,
      AtomicInteger remaining,
      NodeState remotePeer) {

    try {
      if (patch == null) {
        return;
      }

      long updateTime = localState.membershipUpdateTimeMicros;
      if (e != null) {
        updateTime =
            remotePeer.status != NodeStatus.UNAVAILABLE ? Utils.getNowMicrosUtc() : updateTime;

        if (remotePeer.status != NodeStatus.UNAVAILABLE) {
          remotePeer.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
          remotePeer.documentVersion++;
        }
        remotePeer.status = NodeStatus.UNAVAILABLE;
      } else {
        NodeGroupState peerState = getBody(patch);
        if (peerState.documentOwner.equals(remotePeer.id)) {
          NodeState remotePeerStateFromRsp = peerState.nodes.get(remotePeer.id);
          if (remotePeerStateFromRsp.documentVersion > remotePeer.documentVersion) {
            remotePeer = remotePeerStateFromRsp;
          }
        } else if (remotePeer.status != NodeStatus.REPLACED) {
          logWarning(
              "Peer address %s has changed to id %s from %s",
              patch.getUri(), peerState.documentOwner, remotePeer.id);
          remotePeer.status = NodeStatus.REPLACED;
          remotePeer.documentVersion++;
          updateTime = Utils.getNowMicrosUtc();
        }
        updateTime = Math.max(updateTime, peerState.membershipUpdateTimeMicros);
      }

      synchronized (patchBody) {
        patchBody.nodes.put(remotePeer.id, remotePeer);
        patchBody.membershipUpdateTimeMicros =
            Math.max(updateTime, patchBody.membershipUpdateTimeMicros);
      }

    } finally {
      int r = remaining.decrementAndGet();
      if (r != 0) {
        return;
      }

      // to merge updated state, issue a self PATCH. It contains NodeState entries for every
      // peer node we just talked to
      sendRequest(Operation.createPatch(getUri()).setBody(patchBody));

      maint.complete();
    }
  }
Exemple #5
0
  private void performGroupMaintenance(Operation maint, Operation get, Throwable getEx) {
    // we ignore any body associated with the PUT

    if (getEx != null) {
      logWarning("Failure getting state: %s", getEx.toString());
      maint.complete();
      return;
    }

    if (!get.hasBody()) {
      maint.complete();
      return;
    }

    NodeGroupState localState = get.getBody(NodeGroupState.class);

    if (localState == null || localState.nodes == null) {
      maint.complete();
      return;
    }

    if (localState.nodes.size() <= 1) {
      maint.complete();
      return;
    }

    if (getHost().isStopping()) {
      maint.complete();
      return;
    }

    // probe a fixed, random selection of our peers, giving them our view of the group and
    // getting back theirs

    // probe log 10 of peers (exclude self)
    int peersToProbe = (int) Math.log10(localState.nodes.size() - 1);
    // probe at least N peers
    peersToProbe = Math.max(peersToProbe, MIN_PEER_GOSSIP_COUNT);
    // probe at most total number of peers
    peersToProbe = Math.min(localState.nodes.size() - 1, peersToProbe);

    AtomicInteger remaining = new AtomicInteger(peersToProbe);
    NodeState[] randomizedPeers = shuffleGroupMembers(localState);
    NodeState localNode = localState.nodes.get(getHost().getId());
    localNode.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localNode.groupReference = UriUtils.buildPublicUri(getHost(), getSelfLink());
    localState.documentOwner = getHost().getId();

    NodeGroupState patchBody = new NodeGroupState();
    patchBody.documentOwner = getHost().getId();
    patchBody.documentUpdateTimeMicros = Utils.getNowMicrosUtc();

    int probeCount = 0;
    for (NodeState peer : randomizedPeers) {
      if (peer == null) {
        continue;
      }

      if (peer.id.equals(getHost().getId())) {
        continue;
      }

      NodeState remotePeer = peer;
      URI peerUri = peer.groupReference;
      // send a gossip PATCH to the peer, with our state

      // perform a health check to N randomly selected peers
      // 1) We issue a PATCH to a peer, with the body set to our view of the group
      // 2a) if the peer is healthy, they will merge our state with theirs and return
      // the merged state in the response. We will then update our state and mark the
      // peer AVAILABLE. We just update peer node, we don't currently merge their state
      // 2b) if the PATCH failed, we mark the PEER it UNAVAILABLE

      CompletionHandler ch =
          (o, e) ->
              handleGossipPatchCompletion(
                  maint, o, e, localState, patchBody, remaining, remotePeer);
      Operation patch =
          Operation.createPatch(peerUri)
              .setBody(localState)
              .setRetryCount(0)
              .setExpiration(Utils.getNowMicrosUtc() + getHost().getOperationTimeoutMicros() / 2)
              .forceRemote()
              .setCompletion(ch);

      if (peer.groupReference.equals(localNode.groupReference)
          && peer.status != NodeStatus.REPLACED) {
        // If we just detected this is a peer node that used to listen on our address,
        // but its obviously no longer around, mark it as REPLACED and do not send PATCH
        peer.status = NodeStatus.REPLACED;
        peer.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
        peer.documentVersion++;
        ch.handle(null, null);
      } else {
        sendRequest(patch);
      }

      // only probe N peers
      if (++probeCount >= peersToProbe) {
        break;
      }
    }

    if (probeCount == 0) {
      maint.complete();
    }
  }
Exemple #6
0
  private void handleJoinPost(
      JoinPeerRequest joinBody,
      Operation joinOp,
      NodeGroupState localState,
      NodeGroupState remotePeerState) {

    if (UriUtils.isHostEqual(getHost(), joinBody.memberGroupReference)) {
      logInfo("Skipping self join");
      // we tried joining ourself, abort;
      joinOp.complete();
      return;
    }

    NodeState self = localState.nodes.get(getHost().getId());

    if (joinOp != null) {
      self.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
      self.documentVersion++;

      // at a minimum we need 2 nodes to synch: self plus the node we are joining
      self.synchQuorum = 2;

      if (joinBody.synchQuorum != null) {
        self.synchQuorum = Math.max(self.synchQuorum, joinBody.synchQuorum);
      }

      if (joinBody.localNodeOptions != null) {
        if (!validateNodeOptions(joinOp, joinBody.localNodeOptions)) {
          return;
        }
        self.options = joinBody.localNodeOptions;
      }

      localState.membershipUpdateTimeMicros = self.documentUpdateTimeMicros;

      // complete the join POST, continue with state merge
      joinOp.complete();
    }

    // this method is two pass
    // First pass get the remote peer state
    // Second pass, insert self

    if (remotePeerState == null) {
      // Pass 1, get existing member state
      sendRequest(
          Operation.createGet(joinBody.memberGroupReference)
              .setCompletion(
                  (o, e) -> {
                    if (e != null) {
                      logWarning("Failure getting peer %s state:%s", o.getUri(), e.toString());
                      return;
                    }

                    NodeGroupState remoteState = getBody(o);
                    handleJoinPost(joinBody, null, localState, remoteState);
                  }));
      return;
    }

    // Pass 2, merge remote group state with ours, send self to peer
    sendRequest(Operation.createPatch(getUri()).setBody(remotePeerState));

    logInfo(
        "Synch quorum: %d. Sending POST to insert self (%s) to peer %s",
        self.synchQuorum, self.groupReference, joinBody.memberGroupReference);

    Operation insertSelfToPeer =
        Operation.createPost(joinBody.memberGroupReference)
            .setBody(self)
            .setCompletion(
                (o, e) -> {
                  if (e != null) {
                    logSevere("Insert POST to %s failed", o.getUri());
                    return;
                  }
                  // we will restart services to synchronize with peers on the next
                  // maintenance interval with a stable group membership
                });
    sendRequest(insertSelfToPeer);
  }
Exemple #7
0
  private void handleUpdateQuorumPatch(Operation patch, NodeGroupState localState) {
    UpdateQuorumRequest bd = patch.getBody(UpdateQuorumRequest.class);
    NodeState self = localState.nodes.get(getHost().getId());
    logInfo("Updating self quorum from %d. Body: %s", self.membershipQuorum, Utils.toJsonHtml(bd));

    if (bd.membershipQuorum != null) {
      self.membershipQuorum = bd.membershipQuorum;
    }
    if (bd.synchQuorum != null) {
      self.synchQuorum = bd.synchQuorum;
    }
    self.documentVersion++;
    self.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localState.membershipUpdateTimeMicros = self.documentUpdateTimeMicros;

    if (!bd.isGroupUpdate) {
      patch.setBodyNoCloning(localState).complete();
      return;
    }

    // TODO use a three phase consensus algorithm to update quorum similar
    // to the steady state replication consensus.

    // Issue N requests to update quorum to all member of the group. If they
    // do not all succeed the request, then the operation fails and some peers
    // will be left with a quorum level different than the others. That is
    // acceptable. The replication logic, can reject a peer if its quorum level
    // is not set at the same level as the owner. The client of this request can
    // also retry...

    bd.isGroupUpdate = false;

    int failureThreshold = (localState.nodes.size() - 1) / 2;
    AtomicInteger pending = new AtomicInteger(localState.nodes.size());
    AtomicInteger failures = new AtomicInteger();
    CompletionHandler c =
        (o, e) -> {
          if (e != null) {
            logWarning("Node %s failed quorum update: %s", o.getUri(), e.toString());
            failures.incrementAndGet();
          }

          int p = pending.decrementAndGet();
          if (p != 0) {
            return;
          }
          if (failures.get() > failureThreshold) {
            patch.fail(new IllegalStateException("Majority of nodes failed request"));
          } else {
            patch.setBodyNoCloning(localState).complete();
          }
        };

    for (NodeState node : localState.nodes.values()) {
      if (!NodeState.isAvailable(node, getHost().getId(), true)) {
        c.handle(null, null);
        continue;
      }
      if (bd.membershipQuorum != null) {
        node.membershipQuorum = bd.membershipQuorum;
      }
      if (bd.synchQuorum != null) {
        node.synchQuorum = bd.synchQuorum;
      }
      node.documentVersion++;
      node.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
      Operation p = Operation.createPatch(node.groupReference).setBody(bd).setCompletion(c);
      sendRequest(p);
    }
  }