Ejemplo n.º 1
0
  private void handleUpdateQuorumPatch(Operation patch, NodeGroupState localState) {
    UpdateQuorumRequest bd = patch.getBody(UpdateQuorumRequest.class);
    NodeState self = localState.nodes.get(getHost().getId());
    logInfo("Updating self quorum from %d. Body: %s", self.membershipQuorum, Utils.toJsonHtml(bd));

    if (bd.membershipQuorum != null) {
      self.membershipQuorum = bd.membershipQuorum;
    }
    if (bd.synchQuorum != null) {
      self.synchQuorum = bd.synchQuorum;
    }
    self.documentVersion++;
    self.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localState.membershipUpdateTimeMicros = self.documentUpdateTimeMicros;

    if (!bd.isGroupUpdate) {
      patch.setBodyNoCloning(localState).complete();
      return;
    }

    // TODO use a three phase consensus algorithm to update quorum similar
    // to the steady state replication consensus.

    // Issue N requests to update quorum to all member of the group. If they
    // do not all succeed the request, then the operation fails and some peers
    // will be left with a quorum level different than the others. That is
    // acceptable. The replication logic, can reject a peer if its quorum level
    // is not set at the same level as the owner. The client of this request can
    // also retry...

    bd.isGroupUpdate = false;

    int failureThreshold = (localState.nodes.size() - 1) / 2;
    AtomicInteger pending = new AtomicInteger(localState.nodes.size());
    AtomicInteger failures = new AtomicInteger();
    CompletionHandler c =
        (o, e) -> {
          if (e != null) {
            logWarning("Node %s failed quorum update: %s", o.getUri(), e.toString());
            failures.incrementAndGet();
          }

          int p = pending.decrementAndGet();
          if (p != 0) {
            return;
          }
          if (failures.get() > failureThreshold) {
            patch.fail(new IllegalStateException("Majority of nodes failed request"));
          } else {
            patch.setBodyNoCloning(localState).complete();
          }
        };

    for (NodeState node : localState.nodes.values()) {
      if (!NodeState.isAvailable(node, getHost().getId(), true)) {
        c.handle(null, null);
        continue;
      }
      if (bd.membershipQuorum != null) {
        node.membershipQuorum = bd.membershipQuorum;
      }
      if (bd.synchQuorum != null) {
        node.synchQuorum = bd.synchQuorum;
      }
      node.documentVersion++;
      node.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
      Operation p = Operation.createPatch(node.groupReference).setBody(bd).setCompletion(c);
      sendRequest(p);
    }
  }
Ejemplo n.º 2
0
  private void performGroupMaintenance(Operation maint, Operation get, Throwable getEx) {
    // we ignore any body associated with the PUT

    if (getEx != null) {
      logWarning("Failure getting state: %s", getEx.toString());
      maint.complete();
      return;
    }

    if (!get.hasBody()) {
      maint.complete();
      return;
    }

    NodeGroupState localState = get.getBody(NodeGroupState.class);

    if (localState == null || localState.nodes == null) {
      maint.complete();
      return;
    }

    if (localState.nodes.size() <= 1) {
      maint.complete();
      return;
    }

    if (getHost().isStopping()) {
      maint.complete();
      return;
    }

    // probe a fixed, random selection of our peers, giving them our view of the group and
    // getting back theirs

    // probe log 10 of peers (exclude self)
    int peersToProbe = (int) Math.log10(localState.nodes.size() - 1);
    // probe at least N peers
    peersToProbe = Math.max(peersToProbe, MIN_PEER_GOSSIP_COUNT);
    // probe at most total number of peers
    peersToProbe = Math.min(localState.nodes.size() - 1, peersToProbe);

    AtomicInteger remaining = new AtomicInteger(peersToProbe);
    NodeState[] randomizedPeers = shuffleGroupMembers(localState);
    NodeState localNode = localState.nodes.get(getHost().getId());
    localNode.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
    localNode.groupReference = UriUtils.buildPublicUri(getHost(), getSelfLink());
    localState.documentOwner = getHost().getId();

    NodeGroupState patchBody = new NodeGroupState();
    patchBody.documentOwner = getHost().getId();
    patchBody.documentUpdateTimeMicros = Utils.getNowMicrosUtc();

    int probeCount = 0;
    for (NodeState peer : randomizedPeers) {
      if (peer == null) {
        continue;
      }

      if (peer.id.equals(getHost().getId())) {
        continue;
      }

      NodeState remotePeer = peer;
      URI peerUri = peer.groupReference;
      // send a gossip PATCH to the peer, with our state

      // perform a health check to N randomly selected peers
      // 1) We issue a PATCH to a peer, with the body set to our view of the group
      // 2a) if the peer is healthy, they will merge our state with theirs and return
      // the merged state in the response. We will then update our state and mark the
      // peer AVAILABLE. We just update peer node, we don't currently merge their state
      // 2b) if the PATCH failed, we mark the PEER it UNAVAILABLE

      CompletionHandler ch =
          (o, e) ->
              handleGossipPatchCompletion(
                  maint, o, e, localState, patchBody, remaining, remotePeer);
      Operation patch =
          Operation.createPatch(peerUri)
              .setBody(localState)
              .setRetryCount(0)
              .setExpiration(Utils.getNowMicrosUtc() + getHost().getOperationTimeoutMicros() / 2)
              .forceRemote()
              .setCompletion(ch);

      if (peer.groupReference.equals(localNode.groupReference)
          && peer.status != NodeStatus.REPLACED) {
        // If we just detected this is a peer node that used to listen on our address,
        // but its obviously no longer around, mark it as REPLACED and do not send PATCH
        peer.status = NodeStatus.REPLACED;
        peer.documentUpdateTimeMicros = Utils.getNowMicrosUtc();
        peer.documentVersion++;
        ch.handle(null, null);
      } else {
        sendRequest(patch);
      }

      // only probe N peers
      if (++probeCount >= peersToProbe) {
        break;
      }
    }

    if (probeCount == 0) {
      maint.complete();
    }
  }
  /**
   * Issues updates to peer nodes, after a local update has been accepted. If the service support
   * OWNER_SELECTION the replication message is the Propose message in the consensus work flow.
   *
   * @param localState
   * @param outboundOp
   * @param req
   * @param rsp
   */
  void replicateUpdate(
      NodeGroupState localState,
      Operation outboundOp,
      SelectAndForwardRequest req,
      SelectOwnerResponse rsp) {

    int memberCount = localState.nodes.size();
    NodeState selfNode = localState.nodes.get(getHost().getId());
    AtomicInteger successCount = new AtomicInteger(0);

    if (req.serviceOptions.contains(ServiceOption.OWNER_SELECTION)
        && selfNode.membershipQuorum > memberCount) {
      outboundOp.fail(new IllegalStateException("Not enough peers: " + memberCount));
      return;
    }

    if (memberCount == 1) {
      outboundOp.complete();
      return;
    }

    AtomicInteger failureCount = new AtomicInteger();

    // The eligible count can be less than the member count if the parent node selector has
    // a smaller replication factor than group size. We need to use the replication factor
    // as the upper bound for calculating success and failure thresholds
    int eligibleMemberCount = rsp.selectedNodes.size();

    // When quorum is not required, succeed when we replicate to at least one remote node,
    // or, if only local node is available, succeed immediately.
    int successThreshold = Math.min(2, eligibleMemberCount - 1);
    int failureThreshold = eligibleMemberCount - successThreshold;

    if (req.serviceOptions.contains(ServiceOption.OWNER_SELECTION)) {
      successThreshold = Math.min(eligibleMemberCount, selfNode.membershipQuorum);
      failureThreshold = eligibleMemberCount - successThreshold;

      if (failureThreshold == successThreshold && successThreshold == 1) {
        // degenerate case: node group has just two members and quorum must be one, which
        // means even the single remote peer is down, we should still succeed.
        failureThreshold = 0;
      }
    }

    final int successThresholdFinal = successThreshold;
    final int failureThresholdFinal = failureThreshold;

    CompletionHandler c =
        (o, e) -> {
          if (e == null
              && o != null
              && o.getStatusCode() >= Operation.STATUS_CODE_FAILURE_THRESHOLD) {
            e = new IllegalStateException("Request failed: " + o.toString());
          }
          int sCount = successCount.get();
          int fCount = failureCount.get();
          if (e != null) {
            logInfo("Replication to %s failed: %s", o.getUri(), e.toString());
            fCount = failureCount.incrementAndGet();
          } else {
            sCount = successCount.incrementAndGet();
          }

          if (sCount == successThresholdFinal) {
            outboundOp.complete();
            return;
          }

          if (fCount == 0) {
            return;
          }

          if (fCount >= failureThresholdFinal || ((fCount + sCount) == memberCount)) {
            String error =
                String.format(
                    "%s to %s failed. Success: %d,  Fail: %d, quorum: %d, threshold: %d",
                    outboundOp.getAction(),
                    outboundOp.getUri().getPath(),
                    sCount,
                    fCount,
                    selfNode.membershipQuorum,
                    failureThresholdFinal);
            logWarning("%s", error);
            outboundOp.fail(new IllegalStateException(error));
          }
        };

    String jsonBody = Utils.toJson(req.linkedState);

    Operation update =
        Operation.createPost(null)
            .setAction(outboundOp.getAction())
            .setBodyNoCloning(jsonBody)
            .setCompletion(c)
            .setRetryCount(1)
            .setExpiration(outboundOp.getExpirationMicrosUtc())
            .transferRequestHeadersFrom(outboundOp)
            .removePragmaDirective(Operation.PRAGMA_DIRECTIVE_FORWARDED)
            .addPragmaDirective(Operation.PRAGMA_DIRECTIVE_REPLICATED)
            .setReferer(outboundOp.getReferer());

    if (update.getCookies() != null) {
      update.getCookies().clear();
    }

    ServiceClient cl = getHost().getClient();
    String selfId = getHost().getId();

    // trigger completion once, for self node, since its part of our accounting
    c.handle(null, null);

    rsp.selectedNodes.forEach(
        (m) -> {
          if (m.id.equals(selfId)) {
            return;
          }

          if (m.options.contains(NodeOption.OBSERVER)) {
            return;
          }

          try {
            URI remotePeerService =
                new URI(
                    m.groupReference.getScheme(),
                    null,
                    m.groupReference.getHost(),
                    m.groupReference.getPort(),
                    outboundOp.getUri().getPath(),
                    outboundOp.getUri().getQuery(),
                    null);
            update.setUri(remotePeerService);
          } catch (Throwable e1) {
          }

          if (NodeState.isUnAvailable(m)) {
            c.handle(update, new IllegalStateException("node is not available"));
            return;
          }

          cl.send(update);
        });
  }