Пример #1
0
  /**
   * Multicasts a GET_DIGEST_REQ to all current members and waits for all responses (GET_DIGEST_RSP)
   * or N ms.
   *
   * @return
   */
  private Digest fetchDigestsFromAllMembersInSubPartition(List<Address> current_mbrs) {
    if (current_mbrs == null) return null;

    GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.GET_DIGEST_REQ);
    Message get_digest_req = new Message();
    get_digest_req.setFlag(Message.OOB);
    get_digest_req.putHeader(gms.getId(), hdr);

    long max_wait_time = gms.merge_timeout > 0 ? gms.merge_timeout / 2 : 2000L;
    digest_collector.reset(current_mbrs);

    // add my own digest first
    Digest digest = (Digest) gms.getDownProtocol().down(Event.GET_DIGEST_EVT);
    digest_collector.add(gms.local_addr, digest);

    gms.getDownProtocol().down(new Event(Event.MSG, get_digest_req));
    digest_collector.waitForAllResponses(max_wait_time);
    if (log.isDebugEnabled()) {
      if (digest_collector.hasAllResponses())
        log.debug(gms.local_addr + ": fetched all digests for " + current_mbrs);
      else
        log.debug(
            gms.local_addr
                + ": fetched incomplete digests (after timeout of "
                + max_wait_time
                + ") ms for "
                + current_mbrs);
    }
    Map<Address, Digest> responses = new HashMap<Address, Digest>(digest_collector.getResults());
    MutableDigest retval = new MutableDigest(responses.size());
    for (Digest dig : responses.values()) {
      if (dig != null) retval.add(dig);
    }
    return retval;
  }
Пример #2
0
    /**
     * Merge all digests into one. For each sender, the new value is max(highest_delivered),
     * max(highest_received). This method has a lock on merge_rsps
     */
    private Digest consolidateDigests(List<MergeData> merge_rsps, int num_mbrs) {
      MutableDigest retval = new MutableDigest(num_mbrs);

      for (MergeData data : merge_rsps) {
        Digest tmp_digest = data.getDigest();
        if (tmp_digest == null) continue;

        retval.merge(tmp_digest);
      }
      return retval.copy();
    }
Пример #3
0
    /**
     * Merge all digests into one. For each sender, the new value is min(low_seqno),
     * max(high_seqno), max(high_seqno_seen). This method has a lock on merge_rsps
     */
    private Digest consolidateDigests(Vector<MergeData> merge_rsps, int num_mbrs) {
      MutableDigest retval = new MutableDigest(num_mbrs);

      for (MergeData data : merge_rsps) {
        Digest tmp_digest = data.getDigest();
        if (tmp_digest == null) {
          if (log.isErrorEnabled()) log.error("tmp_digest == null; skipping");
          continue;
        }
        retval.merge(tmp_digest);
      }
      return retval.copy();
    }
Пример #4
0
  /**
   * Multicasts a GET_DIGEST_REQ to all current members and waits for all responses (GET_DIGEST_RSP)
   * or N ms.
   *
   * @return
   */
  private Digest fetchDigestsFromAllMembersInSubPartition(
      List<Address> current_mbrs, MergeId merge_id) {

    // Optimization: if we're the only member, we don't need to multicast the get-digest message
    if (current_mbrs == null
        || current_mbrs.size() == 1 && current_mbrs.get(0).equals(gms.local_addr))
      return (Digest) gms.getDownProtocol().down(new Event(Event.GET_DIGEST, gms.local_addr));

    GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.GET_DIGEST_REQ);
    hdr.merge_id = merge_id;
    Message get_digest_req =
        new Message().setFlag(Message.Flag.OOB, Message.Flag.INTERNAL).putHeader(gms.getId(), hdr);

    long max_wait_time =
        gms.merge_timeout / 2; // gms.merge_timeout is guaranteed to be > 0, verified in init()
    digest_collector.reset(current_mbrs);

    gms.getDownProtocol().down(new Event(Event.MSG, get_digest_req));

    // add my own digest first - the get_digest_req needs to be sent first *before* getting our own
    // digest, so
    // we have that message in our digest !
    Digest digest =
        (Digest) gms.getDownProtocol().down(new Event(Event.GET_DIGEST, gms.local_addr));
    digest_collector.add(gms.local_addr, digest);
    digest_collector.waitForAllResponses(max_wait_time);
    if (log.isTraceEnabled()) {
      if (digest_collector.hasAllResponses())
        log.trace(gms.local_addr + ": fetched all digests for " + current_mbrs);
      else
        log.trace(
            gms.local_addr
                + ": fetched incomplete digests (after timeout of "
                + max_wait_time
                + ") ms for "
                + current_mbrs);
    }
    Map<Address, Digest> responses = new HashMap<Address, Digest>(digest_collector.getResults());
    MutableDigest retval = new MutableDigest(responses.size());
    for (Digest dig : responses.values()) {
      if (dig != null) retval.add(dig);
    }
    return retval;
  }
Пример #5
0
  public void handleMembershipChange(Collection<Request> requests) {
    boolean joinAndStateTransferInitiated = false;
    boolean useFlushIfPresent = gms.use_flush_if_present;
    Collection<Address> new_mbrs = new LinkedHashSet<>(requests.size());
    Collection<Address> suspected_mbrs = new LinkedHashSet<>(requests.size());
    Collection<Address> leaving_mbrs = new LinkedHashSet<>(requests.size());

    boolean self_leaving = false; // is the coord leaving

    for (Request req : requests) {
      switch (req.type) {
        case Request.JOIN:
          new_mbrs.add(req.mbr);
          if (req.useFlushIfPresent) useFlushIfPresent = true;
          break;
        case Request.JOIN_WITH_STATE_TRANSFER:
          new_mbrs.add(req.mbr);
          joinAndStateTransferInitiated = true;
          if (req.useFlushIfPresent) useFlushIfPresent = true;
          break;
        case Request.LEAVE:
          if (req.suspected) suspected_mbrs.add(req.mbr);
          else {
            leaving_mbrs.add(req.mbr);
            if (gms.local_addr != null && gms.local_addr.equals(req.mbr)) self_leaving = true;
          }
          break;
        case Request.SUSPECT:
          suspected_mbrs.add(req.mbr);
          break;
      }
    }

    new_mbrs.remove(gms.local_addr); // remove myself - cannot join myself (already joined)

    if (gms.getViewId() == null) {
      // we're probably not the coord anymore (we just left ourselves), let someone else do it
      // (client will retry when it doesn't get a response)
      log.debug(
          "gms.view_id is null, I'm not the coordinator anymore (leaving=%b); "
              + "the new coordinator will handle the leave request",
          self_leaving);
      return;
    }

    List<Address> current_members = gms.members.getMembers();
    leaving_mbrs.retainAll(
        current_members); // remove all elements of leaving_mbrs which are not current members
    if (suspected_mbrs.remove(gms.local_addr))
      log.warn("I am the coord and I'm being suspected -- will probably leave shortly");

    suspected_mbrs.retainAll(
        current_members); // remove all elements of suspected_mbrs which are not current members

    // for the members that have already joined, return the current digest and membership
    for (Iterator<Address> it = new_mbrs.iterator(); it.hasNext(); ) {
      Address mbr = it.next();
      if (gms.members.contains(mbr)) { // already joined: return current digest and membership
        log.trace(
            "%s: %s already present; returning existing view %s", gms.local_addr, mbr, gms.view);
        Tuple<View, Digest> tuple = gms.getViewAndDigest();
        if (tuple != null) gms.sendJoinResponse(new JoinRsp(tuple.getVal1(), tuple.getVal2()), mbr);
        else
          log.warn(
              "%s: did not find a digest matching view %s; dropping JOIN-RSP",
              gms.local_addr, gms.view);
        it
            .remove(); // remove it anyway, even if we didn't find a digest matching the view
                       // (joiner will retry)
      }
    }

    if (new_mbrs.isEmpty() && leaving_mbrs.isEmpty() && suspected_mbrs.isEmpty()) {
      log.trace("%s: found no members to add or remove, will not create new view", gms.local_addr);
      return;
    }

    View new_view = gms.getNextView(new_mbrs, leaving_mbrs, suspected_mbrs);

    if (new_view.size() == 0
        && gms.local_addr != null
        && gms.local_addr.equals(new_view.getCreator())) {
      if (self_leaving) gms.initState(); // in case connect() is called again
      return;
    }

    log.trace(
        "%s: joiners=%s, suspected=%s, leaving=%s, new view: %s",
        gms.local_addr, new_mbrs, suspected_mbrs, leaving_mbrs, new_view);

    JoinRsp join_rsp = null;
    boolean hasJoiningMembers = !new_mbrs.isEmpty();
    try {
      boolean successfulFlush =
          !useFlushIfPresent || !gms.flushProtocolInStack || gms.startFlush(new_view);
      if (!successfulFlush && hasJoiningMembers) {
        // Don't send a join response if the flush fails
        // (http://jira.jboss.org/jira/browse/JGRP-759)
        // The joiner should block until the previous FLUSH completed
        sendLeaveResponses(leaving_mbrs); // we still have to send potential leave responses
        // but let the joining client timeout and send another join request
        return;
      }

      // we cannot garbage collect during joining a new member *if* we're the only member
      // Example: {A}, B joins, after returning JoinRsp to B, A garbage collects messages higher
      // than those
      // in the digest returned to the client, so the client will *not* be able to ask for
      // retransmission
      // of those messages if he misses them
      if (hasJoiningMembers) {
        gms.getDownProtocol().down(new Event(Event.SUSPEND_STABLE, MAX_SUSPEND_TIMEOUT));
        // create a new digest, which contains the new members, minus left members
        MutableDigest join_digest =
            new MutableDigest(new_view.getMembersRaw()).set(gms.getDigest());
        for (Address member : new_mbrs)
          join_digest.set(member, 0, 0); // ... and set the new members. their first seqno will be 1

        // If the digest from NAKACK doesn't include all members of the view, we try once more; if
        // it is still
        // incomplete, we don't send a JoinRsp back to the joiner(s). This shouldn't be a problem as
        // they will retry
        if (join_digest.allSet() || join_digest.set(gms.getDigest()).allSet())
          join_rsp = new JoinRsp(new_view, join_digest);
        else
          log.warn(
              "%s: digest does not match view (missing seqnos for %s); dropping JOIN-RSP",
              gms.local_addr, Arrays.toString(join_digest.getNonSetMembers()));
      }

      sendLeaveResponses(leaving_mbrs); // no-op if no leaving members

      // we don't need to send the digest to existing members:
      // https://issues.jboss.org/browse/JGRP-1317
      gms.castViewChange(new_view, null, new_mbrs);
      gms.sendJoinResponses(join_rsp, new_mbrs);
    } finally {
      if (hasJoiningMembers) gms.getDownProtocol().down(new Event(Event.RESUME_STABLE));
      if (!joinAndStateTransferInitiated && useFlushIfPresent) gms.stopFlush();
      if (self_leaving) gms.initState(); // in case connect() is called again
    }
  }