/** * Sends the new view and digest to all subgroup coordinors in coords. Each coord will in turn * * <ol> * <li>broadcast the new view and digest to all the members of its subgroup (MergeView) * <li>on reception of the view, if it is a MergeView, each member will set the digest and * install the new view * </ol> */ private void sendMergeView( Collection<Address> coords, MergeData combined_merge_data, MergeId merge_id) { if (coords == null || combined_merge_data == null) return; View view = combined_merge_data.view; Digest digest = combined_merge_data.digest; if (view == null || digest == null) { if (log.isErrorEnabled()) log.error("view or digest is null, cannot send consolidated merge view/digest"); return; } if (log.isDebugEnabled()) log.debug( gms.local_addr + ": sending merge view " + view.getVid() + " to coordinators " + coords); gms.merge_ack_collector.reset(coords); int size = gms.merge_ack_collector.size(); long timeout = gms.view_ack_collection_timeout; long start = System.currentTimeMillis(); for (Address coord : coords) { Message msg = new Message(coord, null, null); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.INSTALL_MERGE_VIEW); hdr.view = view; hdr.my_digest = digest; hdr.merge_id = merge_id; msg.putHeader(gms.getId(), hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); } // [JGRP-700] - FLUSH: flushing should span merge // if flush is in stack wait for acks from separated island coordinators if (gms.flushProtocolInStack) { try { gms.merge_ack_collector.waitForAllAcks(timeout); long stop = System.currentTimeMillis(); if (log.isTraceEnabled()) log.trace( "received all ACKs (" + size + ") for merge view " + view + " in " + (stop - start) + "ms"); } catch (TimeoutException e) { log.warn( gms.local_addr + ": failed to collect all ACKs for merge (" + size + ") for view " + view + " after " + timeout + "ms, missing ACKs from " + gms.merge_ack_collector.printMissing()); } } }
/** * If merge_id is not equal to this.merge_id then discard. Else cast the view/digest to all * members of this group. */ public void handleMergeView(final MergeData data, final MergeId merge_id) { if (!matchMergeId(merge_id)) { if (log.isErrorEnabled()) log.error("merge_ids don't match (or are null); merge view discarded"); return; } // only send to our *current* members, if we have A and B being merged (we are B), then we would // *not* // receive a VIEW_ACK from A because A doesn't see us in the pre-merge view yet and discards the // view // [JGRP-700] - FLUSH: flushing should span merge // we have to send new view only to current members and we should not wait // for view acks from newly merged mebers List<Address> newViewMembers = new Vector<Address>(data.view.getMembers()); newViewMembers.removeAll(gms.members.getMembers()); gms.castViewChangeWithDest(data.view, data.digest, null, newViewMembers); // if we have flush in stack send ack back to merge coordinator if (gms.flushProtocolInStack) { Message ack = new Message(data.getSender(), null, null); ack.setFlag(Message.OOB); GMS.GmsHeader ack_hdr = new GMS.GmsHeader(GMS.GmsHeader.INSTALL_MERGE_VIEW_OK); ack.putHeader(gms.getId(), ack_hdr); gms.getDownProtocol().down(new Event(Event.MSG, ack)); } cancelMerge(merge_id); }
/** * Multicasts a GET_DIGEST_REQ to all current members and waits for all responses (GET_DIGEST_RSP) * or N ms. * * @return */ private Digest fetchDigestsFromAllMembersInSubPartition(List<Address> current_mbrs) { if (current_mbrs == null) return null; GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.GET_DIGEST_REQ); Message get_digest_req = new Message(); get_digest_req.setFlag(Message.OOB); get_digest_req.putHeader(gms.getId(), hdr); long max_wait_time = gms.merge_timeout > 0 ? gms.merge_timeout / 2 : 2000L; digest_collector.reset(current_mbrs); // add my own digest first Digest digest = (Digest) gms.getDownProtocol().down(Event.GET_DIGEST_EVT); digest_collector.add(gms.local_addr, digest); gms.getDownProtocol().down(new Event(Event.MSG, get_digest_req)); digest_collector.waitForAllResponses(max_wait_time); if (log.isDebugEnabled()) { if (digest_collector.hasAllResponses()) log.debug(gms.local_addr + ": fetched all digests for " + current_mbrs); else log.debug( gms.local_addr + ": fetched incomplete digests (after timeout of " + max_wait_time + ") ms for " + current_mbrs); } Map<Address, Digest> responses = new HashMap<Address, Digest>(digest_collector.getResults()); MutableDigest retval = new MutableDigest(responses.size()); for (Digest dig : responses.values()) { if (dig != null) retval.add(dig); } return retval; }
protected void sendMergeRejectedResponse(Address sender, MergeId merge_id) { Message msg = new Message(sender).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.MERGE_RSP); hdr.merge_rejected = true; hdr.merge_id = merge_id; msg.putHeader(gms.getId(), hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); }
/** * Fetches the digests from all members and installs them again. Used only for diagnosis and * support; don't use this otherwise ! */ void fixDigests() { Digest digest = fetchDigestsFromAllMembersInSubPartition(gms.view.getMembers()); Message msg = new Message(); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.INSTALL_DIGEST); hdr.my_digest = digest; msg.putHeader(gms.getId(), hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); }
void cancelMerge(MergeId id) { if (setMergeId(id, null)) { merge_task.stop(); stopMergeKiller(); merge_rsps.reset(); gms.getViewHandler().resume(); gms.getDownProtocol().down(new Event(Event.RESUME_STABLE)); } }
protected void sendMergeRejectedResponse(Address sender, MergeId merge_id) { Message msg = new Message(sender, null, null); msg.setFlag(Message.OOB); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.MERGE_RSP); hdr.merge_rejected = true; hdr.merge_id = merge_id; msg.putHeader(gms.getId(), hdr); if (log.isDebugEnabled()) log.debug("merge response=" + hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); }
/** Send back a response containing view and digest to sender */ private void sendMergeResponse(Address sender, View view, Digest digest, MergeId merge_id) { Message msg = new Message(sender).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.MERGE_RSP); hdr.merge_id = merge_id; hdr.view = view; hdr.my_digest = digest; msg.putHeader(gms.getId(), hdr); if (log.isTraceEnabled()) log.trace(gms.local_addr + ": sending merge response=" + hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); }
private void sendMergeCancelledMessage(Collection<Address> coords, MergeId merge_id) { if (coords == null || merge_id == null) return; for (Address coord : coords) { Message msg = new Message(coord); // msg.setFlag(Message.Flag.OOB); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.CANCEL_MERGE); hdr.merge_id = merge_id; msg.putHeader(gms.getId(), hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); } }
private void sendMergeCancelledMessage(Collection<Address> coords, MergeId merge_id) { if (coords == null || merge_id == null) return; for (Address coord : coords) { Message msg = new Message(coord, null, null); // msg.setFlag(Message.OOB); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.CANCEL_MERGE); hdr.merge_id = merge_id; msg.putHeader(gms.getId(), hdr); if (log.isDebugEnabled()) log.debug(gms.local_addr + ": sending cancel merge to " + coord); gms.getDownProtocol().down(new Event(Event.MSG, msg)); } }
protected void _handleMergeRequest( Address sender, MergeId merge_id, Collection<? extends Address> mbrs) throws Exception { boolean success = matchMergeId(merge_id) || setMergeId(null, merge_id); if (!success) throw new Exception( "merge " + this.merge_id + " is already in progress, received merge-id=" + merge_id); /* Clears the view handler queue and discards all JOIN/LEAVE/MERGE requests until after the MERGE */ // gms.getViewHandler().suspend(); if (log.isTraceEnabled()) log.trace( gms.local_addr + ": got merge request from " + sender + ", merge_id=" + merge_id + ", mbrs=" + mbrs); // merge the membership of the current view with mbrs List<Address> members = new LinkedList<Address>(); if (mbrs != null) { // didn't use a set because we didn't want to change the membership order at this // time (although not incorrect) for (Address mbr : mbrs) { if (!members.contains(mbr)) members.add(mbr); } } ViewId tmp_vid = gms.getViewId(); if (tmp_vid != null) tmp_vid = tmp_vid.copy(); if (tmp_vid == null) throw new Exception("view ID is null; cannot return merge response"); View view = new View(tmp_vid, new ArrayList<Address>(members)); // [JGRP-524] - FLUSH and merge: flush doesn't wrap entire merge process // [JGRP-770] - Concurrent startup of many channels doesn't stabilize // [JGRP-700] - FLUSH: flushing should span merge /*if flush is in stack, let this coordinator flush its cluster island */ if (gms.flushProtocolInStack && !gms.startFlush(view)) throw new Exception("flush failed"); // we still need to fetch digests from all members, and not just return our own digest // (https://issues.jboss.org/browse/JGRP-948) Digest digest = fetchDigestsFromAllMembersInSubPartition(members, merge_id); if (digest == null || digest.size() == 0) throw new Exception( "failed fetching digests from subpartition members; dropping merge response"); sendMergeResponse(sender, view, digest, merge_id); }
void cancelMerge(MergeId id) { if (setMergeId(id, null)) { merge_task.stop(); merge_rsps.reset(); gms.getViewHandler().resume(id); } }
public void handleMergeCancelled(MergeId merge_id) { try { gms.stopFlush(); } catch (Throwable t) { log.error("stop flush failed", t); } if (log.isTraceEnabled()) log.trace(gms.local_addr + ": merge " + merge_id + " is cancelled"); cancelMerge(merge_id); }
/** * Multicasts a GET_DIGEST_REQ to all current members and waits for all responses (GET_DIGEST_RSP) * or N ms. * * @return */ private Digest fetchDigestsFromAllMembersInSubPartition( List<Address> current_mbrs, MergeId merge_id) { // Optimization: if we're the only member, we don't need to multicast the get-digest message if (current_mbrs == null || current_mbrs.size() == 1 && current_mbrs.get(0).equals(gms.local_addr)) return (Digest) gms.getDownProtocol().down(new Event(Event.GET_DIGEST, gms.local_addr)); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.GET_DIGEST_REQ); hdr.merge_id = merge_id; Message get_digest_req = new Message().setFlag(Message.Flag.OOB, Message.Flag.INTERNAL).putHeader(gms.getId(), hdr); long max_wait_time = gms.merge_timeout / 2; // gms.merge_timeout is guaranteed to be > 0, verified in init() digest_collector.reset(current_mbrs); gms.getDownProtocol().down(new Event(Event.MSG, get_digest_req)); // add my own digest first - the get_digest_req needs to be sent first *before* getting our own // digest, so // we have that message in our digest ! Digest digest = (Digest) gms.getDownProtocol().down(new Event(Event.GET_DIGEST, gms.local_addr)); digest_collector.add(gms.local_addr, digest); digest_collector.waitForAllResponses(max_wait_time); if (log.isTraceEnabled()) { if (digest_collector.hasAllResponses()) log.trace(gms.local_addr + ": fetched all digests for " + current_mbrs); else log.trace( gms.local_addr + ": fetched incomplete digests (after timeout of " + max_wait_time + ") ms for " + current_mbrs); } Map<Address, Digest> responses = new HashMap<Address, Digest>(digest_collector.getResults()); MutableDigest retval = new MutableDigest(responses.size()); for (Digest dig : responses.values()) { if (dig != null) retval.add(dig); } return retval; }
public boolean setMergeId(MergeId expected, MergeId new_value) { merge_lock.lock(); try { boolean match = Util.match(this.merge_id, expected); if (match) { if (new_value != null && merge_id_history.contains(new_value)) return false; else merge_id_history.add(new_value); this.merge_id = new_value; if (this.merge_id != null) { // Clears the view handler queue and discards all JOIN/LEAVE/MERGE requests until after // the MERGE gms.getViewHandler().suspend(); gms.getDownProtocol().down(new Event(Event.SUSPEND_STABLE, 20000)); startMergeKiller(); } } return match; } finally { merge_lock.unlock(); } }
/** * If merge_id is not equal to this.merge_id then discard. Else cast the view/digest to all * members of this group. */ public void handleMergeView(final MergeData data, final MergeId merge_id) { if (!matchMergeId(merge_id)) { if (log.isTraceEnabled()) log.trace( gms.local_addr + ": merge_ids (mine: " + this.merge_id + ", received: " + merge_id + ") don't match; merge view " + data.view.getViewId() + " is discarded"); return; } // only send to our *current* members, if we have A and B being merged (we are B), then we would // *not* // want to block on a VIEW_ACK from A because A doesn't see us in the pre-merge view yet and // discards the view List<Address> newViewMembers = new ArrayList<Address>(data.view.getMembers()); newViewMembers.removeAll(gms.members.getMembers()); try { gms.castViewChange(data.view, data.digest, null, newViewMembers); // if we have flush in stack send ack back to merge coordinator if (gms.flushProtocolInStack) { // [JGRP-700] - FLUSH: flushing should span merge Message ack = new Message(data.getSender()).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL); GMS.GmsHeader ack_hdr = new GMS.GmsHeader(GMS.GmsHeader.INSTALL_MERGE_VIEW_OK); ack.putHeader(gms.getId(), ack_hdr); gms.getDownProtocol().down(new Event(Event.MSG, ack)); } } finally { cancelMerge(merge_id); } }
/** * Get the view and digest and send back both (MergeData) in the form of a MERGE_RSP to the * sender. If a merge is already in progress, send back a MergeData with the merge_rejected field * set to true. * * @param sender The address of the merge leader * @param merge_id The merge ID * @param mbrs The set of members from which we expect responses */ public void handleMergeRequest( Address sender, MergeId merge_id, Collection<? extends Address> mbrs) { boolean success = matchMergeId(merge_id) || setMergeId(null, merge_id); if (!success) { if (log.isWarnEnabled()) log.warn(gms.local_addr + ": merge is already in progress"); sendMergeRejectedResponse(sender, merge_id); return; } /* Clears the view handler queue and discards all JOIN/LEAVE/MERGE requests until after the MERGE */ gms.getViewHandler().suspend(merge_id); if (log.isDebugEnabled()) log.debug( gms.local_addr + ": got merge request from " + sender + ", merge_id=" + merge_id + ", mbrs=" + mbrs); // merge the membership of the current view with mbrs List<Address> members = new LinkedList<Address>(); if (mbrs != null) { // didn't use a set because we didn't want to change the membership order at this // time (although not incorrect) for (Address mbr : mbrs) { if (!members.contains(mbr)) members.add(mbr); } } ViewId tmp_vid = gms.view_id != null ? gms.view_id.copy() : null; if (tmp_vid == null) { log.warn("view ID is null; cannot return merge response"); sendMergeRejectedResponse(sender, merge_id); return; } View view = new View(tmp_vid, new Vector<Address>(members)); // [JGRP-524] - FLUSH and merge: flush doesn't wrap entire merge process // [JGRP-770] - Concurrent startup of many channels doesn't stabilize // [JGRP-700] - FLUSH: flushing should span merge /*if flush is in stack, let this coordinator flush its cluster island */ boolean successfulFlush = gms.startFlush(view); if (!successfulFlush) { sendMergeRejectedResponse(sender, merge_id); if (log.isWarnEnabled()) log.warn( gms.local_addr + ": flush failed; sending merge rejected message to " + sender + ", merge_id=" + merge_id); cancelMerge(merge_id); return; } Digest digest = fetchDigestsFromAllMembersInSubPartition(members); if (digest.size() == 0) { log.error("failed fetching digests from subpartition members; dropping merge response"); return; } sendMergeResponse(sender, view, digest, merge_id); }