/** * Tests A|6, A|7, A|8 and B|7, B|8, B|9 -> we should have a subviews in MergeView consisting of * only 2 elements: A|5 and B|5 */ public void testMultipleViewsBySameMembers() throws Exception { View a1 = View.create( a.getAddress(), 6, a.getAddress(), b.getAddress(), c.getAddress(), d.getAddress()); // {A,B,C,D} View a2 = View.create(a.getAddress(), 7, a.getAddress(), b.getAddress(), c.getAddress()); // {A,B,C} View a3 = View.create(a.getAddress(), 8, a.getAddress(), b.getAddress()); // {A,B} View a4 = View.create(a.getAddress(), 9, a.getAddress()); // {A} View b1 = View.create(b.getAddress(), 7, b.getAddress(), c.getAddress(), d.getAddress()); View b2 = View.create(b.getAddress(), 8, b.getAddress(), c.getAddress()); View b3 = View.create(b.getAddress(), 9, b.getAddress()); Util.close(c, d); // not interested in those... // A and B cannot communicate: discard(true, a, b); // inject view A|6={A} into A and B|5={B} into B injectView(a4, a); injectView(b3, b); assert a.getView().equals(a4); assert b.getView().equals(b3); List<Event> merge_events = new ArrayList<>(); for (View view : Arrays.asList(a3, a4, a2, a1)) { MERGE3.MergeHeader hdr = MERGE3.MergeHeader.createInfo(view.getViewId(), null, null); Message msg = new Message(null, a.getAddress(), null).putHeader(merge_id, hdr); merge_events.add(new Event(Event.MSG, msg)); } for (View view : Arrays.asList(b2, b3, b1)) { MERGE3.MergeHeader hdr = MERGE3.MergeHeader.createInfo(view.getViewId(), null, null); Message msg = new Message(null, b.getAddress(), null).putHeader(merge_id, hdr); merge_events.add(new Event(Event.MSG, msg)); } // A and B can communicate again discard(false, a, b); injectMergeEvents(merge_events, a, b); checkInconsistencies(a, b); // merge will happen between A and B Util.waitUntilAllChannelsHaveSameSize(10000, 500, a, b); System.out.println("A's view: " + a.getView() + "\nB's view: " + b.getView()); assert a.getView().size() == 2; assert a.getView().containsMember(a.getAddress()); assert a.getView().containsMember(b.getAddress()); assert a.getView().equals(b.getView()); for (View merge_view : Arrays.asList(getViewFromGMS(a), getViewFromGMS(b))) { System.out.println(merge_view); assert merge_view instanceof MergeView; List<View> subviews = ((MergeView) merge_view).getSubgroups(); assert subviews.size() == 2; } }
/** Tests a merge between ViewIds of the same coord, e.g. A|6, A|7, A|8, A|9 */ public void testViewsBySameCoord() { View v1 = View.create( a.getAddress(), 6, a.getAddress(), b.getAddress(), c.getAddress(), d.getAddress()); // {A,B,C,D} View v2 = View.create(a.getAddress(), 7, a.getAddress(), b.getAddress(), c.getAddress()); // {A,B,C} View v3 = View.create(a.getAddress(), 8, a.getAddress(), b.getAddress()); // {A,B} View v4 = View.create(a.getAddress(), 9, a.getAddress()); // {A} Util.close(b, c, d); // not interested in those... MERGE3 merge = (MERGE3) a.getProtocolStack().findProtocol(MERGE3.class); for (View view : Arrays.asList(v1, v2, v4, v3)) { MERGE3.MergeHeader hdr = MERGE3.MergeHeader.createInfo(view.getViewId(), null, null); Message msg = new Message(null, a.getAddress(), null).putHeader(merge.getId(), hdr); merge.up(new Event(Event.MSG, msg)); } merge.checkInconsistencies(); // no merge will happen Util.waitUntilAllChannelsHaveSameSize(10000, 500, a); System.out.println("A's view: " + a.getView()); assert a.getView().size() == 1; assert a.getView().containsMember(a.getAddress()); }
protected void sendDiscoveryResponse( Address logical_addr, List<PhysicalAddress> physical_addrs, boolean is_server, boolean return_view_only, String logical_name, final Address sender) { PingData data; if (return_view_only) { data = new PingData(logical_addr, view, is_server, null, null); } else { ViewId view_id = view != null ? view.getViewId() : null; data = new PingData(logical_addr, null, view_id, is_server, logical_name, physical_addrs); } final Message rsp_msg = new Message(sender, null, null); rsp_msg.setFlag(Message.OOB); final PingHeader rsp_hdr = new PingHeader(PingHeader.GET_MBRS_RSP, data); rsp_msg.putHeader(this.id, rsp_hdr); if (stagger_timeout > 0) { int view_size = view != null ? view.size() : 10; int rank = Util.getRank(view, local_addr); // returns 0 if view or local_addr are null long sleep_time = rank == 0 ? Util.random(stagger_timeout) : stagger_timeout * rank / view_size - (stagger_timeout / view_size); timer.schedule( new Runnable() { public void run() { if (log.isTraceEnabled()) log.trace( local_addr + ": received GET_MBRS_REQ from " + sender + ", sending staggered response " + rsp_hdr); down_prot.down(new Event(Event.MSG, rsp_msg)); } }, sleep_time, TimeUnit.MILLISECONDS); return; } if (log.isTraceEnabled()) log.trace("received GET_MBRS_REQ from " + sender + ", sending response " + rsp_hdr); down_prot.down(new Event(Event.MSG, rsp_msg)); }
/** * Computes the next view. Returns a copy that has <code>old_mbrs</code> and <code>suspected_mbrs * </code> removed and <code>new_mbrs</code> added. */ public View getNextView( Collection<Address> new_mbrs, Collection<Address> old_mbrs, Collection<Address> suspected_mbrs) { synchronized (members) { ViewId view_id = view != null ? view.getViewId() : null; if (view_id == null) { log.error("view_id is null"); return null; // this should *never* happen ! } long vid = Math.max(view_id.getId(), ltime) + 1; ltime = vid; Membership tmp_mbrs = tmp_members.copy(); // always operate on the temporary membership tmp_mbrs.remove(suspected_mbrs); tmp_mbrs.remove(old_mbrs); tmp_mbrs.add(new_mbrs); List<Address> mbrs = tmp_mbrs.getMembers(); Address new_coord = local_addr; if (!mbrs.isEmpty()) new_coord = mbrs.get(0); View v = new View(new_coord, vid, mbrs); // Update membership (see DESIGN for explanation): tmp_members.set(mbrs); // Update joining list (see DESIGN for explanation) if (new_mbrs != null) { for (Address tmp_mbr : new_mbrs) { if (!joining.contains(tmp_mbr)) joining.add(tmp_mbr); } } // Update leaving list (see DESIGN for explanations) if (old_mbrs != null) { for (Address addr : old_mbrs) { if (!leaving.contains(addr)) leaving.add(addr); } } if (suspected_mbrs != null) { for (Address addr : suspected_mbrs) { if (!leaving.contains(addr)) leaving.add(addr); } } return v; } }
/** A: {A,B} B: {A,B} C: {C} C receives INFO from B only */ public void testMergeWithIncompleteInfos() throws Exception { Util.close(d); enableInfoSender(false, a, b, c); View one = View.create(a.getAddress(), 10, a.getAddress(), b.getAddress()); View two = View.create(c.getAddress(), 10, c.getAddress()); injectView(one, a, b); injectView(two, c); enableInfoSender(false, a, b, c); Util.waitUntilAllChannelsHaveSameSize(10000, 500, a, b); Util.waitUntilAllChannelsHaveSameSize(10000, 500, c); System.out.printf("\nPartitions:\n"); for (JChannel ch : Arrays.asList(a, b, c)) System.out.println(ch.getName() + ": " + ch.getView()); MERGE3.MergeHeader hdr = MERGE3.MergeHeader.createInfo(one.getViewId(), null, null); Message msg = new Message(null, b.getAddress(), null) .putHeader(merge_id, hdr); // B sends the INFO message to C Event merge_event = new Event(Event.MSG, msg); MERGE3 merge = (MERGE3) c.getProtocolStack().findProtocol(MERGE3.class); merge.up(merge_event); enableInfoSender(true, a, b, c); System.out.println("\nEnabling INFO sending in merge protocols to merge subclusters"); Util.waitUntilAllChannelsHaveSameSize(30000, 1000, a, b, c); System.out.println("\nResulting views:"); for (JChannel ch : Arrays.asList(a, b, c)) { GMS gms = (GMS) ch.getProtocolStack().findProtocol(GMS.class); View mv = gms.view(); System.out.println(mv); } for (JChannel ch : Arrays.asList(a, b, c)) { GMS gms = (GMS) ch.getProtocolStack().findProtocol(GMS.class); View mv = gms.view(); assert mv instanceof MergeView; assert mv.size() == 3; assert ((MergeView) mv).getSubgroups().size() == 2; } for (JChannel ch : Arrays.asList(a, b, c)) { View view = ch.getView(); assert view.size() == 3 : "view should have 3 members: " + view; } }
/** * An event was received from the layer below. Usually the current layer will want to examine the * event type and - depending on its type - perform some computation (e.g. removing headers from a * MSG event type, or updating the internal membership list when receiving a VIEW_CHANGE event). * Finally the event is either a) discarded, or b) an event is sent down the stack using <code> * PassDown</code> or c) the event (or another event) is sent up the stack using <code>PassUp * </code>. * * <p>For the PING protocol, the Up operation does the following things. 1. If the event is a * Event.MSG then PING will inspect the message header. If the header is null, PING simply passes * up the event If the header is PingHeader.GET_MBRS_REQ then the PING protocol will PassDown a * PingRequest message If the header is PingHeader.GET_MBRS_RSP we will add the message to the * initial members vector and wake up any waiting threads. 2. If the event is Event.SET_LOCAL_ADDR * we will simple set the local address of this protocol 3. For all other messages we simple pass * it up to the protocol above * * @param evt - the event that has been sent from the layer below */ @SuppressWarnings("unchecked") public Object up(Event evt) { switch (evt.getType()) { case Event.MSG: Message msg = (Message) evt.getArg(); PingHeader hdr = (PingHeader) msg.getHeader(this.id); if (hdr == null) return up_prot.up(evt); PingData data = hdr.data; Address logical_addr = data != null ? data.getAddress() : null; if (is_leaving) return null; // prevents merging back a leaving member // (https://issues.jboss.org/browse/JGRP-1336) switch (hdr.type) { case PingHeader.GET_MBRS_REQ: // return Rsp(local_addr, coord) if (group_addr == null || hdr.cluster_name == null) { if (log.isWarnEnabled()) log.warn( "group_addr (" + group_addr + ") or cluster_name of header (" + hdr.cluster_name + ") is null; passing up discovery request from " + msg.getSrc() + ", but this should not" + " be the case"); } else { if (!group_addr.equals(hdr.cluster_name)) { if (log.isWarnEnabled()) log.warn( "discarding discovery request for cluster '" + hdr.cluster_name + "' from " + msg.getSrc() + "; our cluster name is '" + group_addr + "'. " + "Please separate your clusters cleanly."); return null; } } // add physical address and logical name of the discovery sender (if available) to the // cache if (data != null) { if (logical_addr == null) logical_addr = msg.getSrc(); Collection<PhysicalAddress> physical_addrs = data.getPhysicalAddrs(); PhysicalAddress physical_addr = physical_addrs != null && !physical_addrs.isEmpty() ? physical_addrs.iterator().next() : null; if (logical_addr != null && physical_addr != null) down( new Event( Event.SET_PHYSICAL_ADDRESS, new Tuple<Address, PhysicalAddress>(logical_addr, physical_addr))); if (logical_addr != null && data.getLogicalName() != null) UUID.add(logical_addr, data.getLogicalName()); discoveryRequestReceived(msg.getSrc(), data.getLogicalName(), physical_addrs); synchronized (ping_responses) { for (Responses response : ping_responses) { response.addResponse(data, false); } } } if (hdr.view_id != null) { // If the discovery request is merge-triggered, and the ViewId shipped with it // is the same as ours, we don't respond (JGRP-1315). ViewId my_view_id = view != null ? view.getViewId() : null; if (my_view_id != null && my_view_id.equals(hdr.view_id)) return null; boolean send_discovery_rsp = force_sending_discovery_rsps || is_coord || current_coord == null || current_coord.equals(msg.getSrc()); if (!send_discovery_rsp) { if (log.isTraceEnabled()) log.trace( local_addr + ": suppressing merge response as I'm not a coordinator and the " + "discovery request was not sent by a coordinator"); return null; } } if (isMergeRunning()) { if (log.isTraceEnabled()) log.trace( local_addr + ": suppressing merge response as a merge is already in progress"); return null; } List<PhysicalAddress> physical_addrs = hdr.view_id != null ? null : Arrays.asList( (PhysicalAddress) down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr))); sendDiscoveryResponse( local_addr, physical_addrs, is_server, hdr.view_id != null, UUID.get(local_addr), msg.getSrc()); return null; case PingHeader.GET_MBRS_RSP: // add response to vector and notify waiting thread // add physical address (if available) to transport's cache if (data != null) { Address response_sender = msg.getSrc(); if (logical_addr == null) logical_addr = msg.getSrc(); Collection<PhysicalAddress> addrs = data.getPhysicalAddrs(); PhysicalAddress physical_addr = addrs != null && !addrs.isEmpty() ? addrs.iterator().next() : null; if (logical_addr != null && physical_addr != null) down( new Event( Event.SET_PHYSICAL_ADDRESS, new Tuple<Address, PhysicalAddress>(logical_addr, physical_addr))); if (logical_addr != null && data.getLogicalName() != null) UUID.add(logical_addr, data.getLogicalName()); if (log.isTraceEnabled()) log.trace( local_addr + ": received GET_MBRS_RSP from " + response_sender + ": " + data); boolean overwrite = logical_addr != null && logical_addr.equals(response_sender); synchronized (ping_responses) { for (Responses response : ping_responses) { response.addResponse(data, overwrite); } } } return null; default: if (log.isWarnEnabled()) log.warn("got PING header with unknown type (" + hdr.type + ')'); return null; } case Event.GET_PHYSICAL_ADDRESS: try { sendDiscoveryRequest(group_addr, null, null); } catch (InterruptedIOException ie) { if (log.isWarnEnabled()) { log.warn("Discovery request for cluster " + group_addr + " interrupted"); } Thread.currentThread().interrupt(); } catch (Exception ex) { if (log.isErrorEnabled()) log.error("failed sending discovery request", ex); } return null; case Event.FIND_INITIAL_MBRS: // sent by transport return findInitialMembers(null); } return up_prot.up(evt); }
public ViewId getViewId() { return view != null ? view.getViewId() : null; }
@ManagedAttribute public String getView() { return view != null ? view.getViewId().toString() : "null"; }
/** * Callback method <br> * Called by the ProtocolStack when a message is received. * * @param evt the event carrying the message from the protocol stack */ public Object up(Event evt) { switch (evt.getType()) { case Event.MSG: Message msg = (Message) evt.getArg(); if (stats) { received_msgs++; received_bytes += msg.getLength(); } // discard local messages (sent by myself to me) if (discard_own_messages && local_addr != null && msg.getSrc() != null && local_addr.equals(msg.getSrc())) return null; break; case Event.VIEW_CHANGE: View tmp = (View) evt.getArg(); if (tmp instanceof MergeView) my_view = new View(tmp.getViewId(), tmp.getMembers()); else my_view = tmp; // Bela&Vladimir Oct 27th,2006 (JGroups 2.4): we need to set connected=true because a client // can // call channel.getView() in viewAccepted() callback invoked on this thread (see // Event.VIEW_CHANGE handling below) // not good: we are only connected when we returned from connect() - bela June 22 2007 // Changed: when a channel gets a view of which it is a member then it should be // connected even if connect() hasn't returned yet ! (bela Noc 2010) if (state != State.CONNECTED) state = State.CONNECTED; break; case Event.CONFIG: Map<String, Object> cfg = (Map<String, Object>) evt.getArg(); if (cfg != null) { if (cfg.containsKey("state_transfer")) { state_transfer_supported = (Boolean) cfg.get("state_transfer"); } if (cfg.containsKey("flush_supported")) { flush_supported = (Boolean) cfg.get("flush_supported"); } } break; case Event.GET_STATE_OK: StateTransferResult result = (StateTransferResult) evt.getArg(); if (up_handler != null) { try { Object retval = up_handler.up(evt); state_promise.setResult(new StateTransferResult()); return retval; } catch (Throwable t) { state_promise.setResult(new StateTransferResult(t)); } } if (receiver != null) { try { if (result.hasBuffer()) { byte[] tmp_state = result.getBuffer(); ByteArrayInputStream input = new ByteArrayInputStream(tmp_state); receiver.setState(input); } state_promise.setResult(result); } catch (Throwable t) { state_promise.setResult(new StateTransferResult(t)); } } break; case Event.STATE_TRANSFER_INPUTSTREAM_CLOSED: state_promise.setResult((StateTransferResult) evt.getArg()); break; case Event.STATE_TRANSFER_INPUTSTREAM: // Oct 13,2006 moved to down() when Event.STATE_TRANSFER_INPUTSTREAM_CLOSED is received // state_promise.setResult(is != null? Boolean.TRUE : Boolean.FALSE); if (up_handler != null) return up_handler.up(evt); InputStream is = (InputStream) evt.getArg(); if (is != null && receiver != null) { try { receiver.setState(is); } catch (Throwable t) { throw new RuntimeException("failed calling setState() in state requester", t); } } break; case Event.STATE_TRANSFER_OUTPUTSTREAM: if (receiver != null && evt.getArg() != null) { try { receiver.getState((OutputStream) evt.getArg()); } catch (Exception e) { throw new RuntimeException("failed calling getState() in state provider", e); } } break; case Event.GET_LOCAL_ADDRESS: return local_addr; default: break; } // If UpHandler is installed, pass all events to it and return (UpHandler is e.g. a building // block) if (up_handler != null) return up_handler.up(evt); if (receiver != null) return invokeCallback(evt.getType(), evt.getArg()); return null; }
/** * Sets the new view and sends a VIEW_CHANGE event up and down the stack. If the view is a * MergeView (subclass of View), then digest will be non-null and has to be set before installing * the view. */ public void installView(View new_view, Digest digest) { ViewId vid = new_view.getVid(); List<Address> mbrs = new_view.getMembers(); ltime = Math.max( vid.getId(), ltime); // compute the logical time, regardless of whether the view is accepted // Discards view with id lower than or equal to our own. Will be installed without check if it // is the first view if (view != null) { ViewId view_id = view.getViewId(); int rc = vid.compareToIDs(view_id); if (rc <= 0) { if (log.isWarnEnabled() && rc < 0 && log_view_warnings) { // only scream if view is smaller, silently discard same views log.warn( local_addr + ": received view < current view;" + " discarding it (current vid: " + view_id + ", new vid: " + vid + ')'); } return; } } /* Check for self-inclusion: if I'm not part of the new membership, I just discard it. This ensures that messages sent in view V1 are only received by members of V1 */ if (!mbrs.contains(local_addr)) { if (log.isWarnEnabled() && log_view_warnings) log.warn(local_addr + ": not member of view " + new_view.getViewId() + "; discarding it"); return; } if (digest != null) { if (new_view instanceof MergeView) mergeDigest(digest); else setDigest(digest); } if (log.isDebugEnabled()) log.debug(local_addr + ": installing view " + new_view); Event view_event; synchronized (members) { view = new View(new_view.getVid(), new_view.getMembers()); view_event = new Event(Event.VIEW_CHANGE, new_view); // Set the membership. Take into account joining members if (!mbrs.isEmpty()) { members.set(mbrs); tmp_members.set(members); joining.removeAll(mbrs); // remove all members in mbrs from joining // remove all elements from 'leaving' that are not in 'mbrs' leaving.retainAll(mbrs); tmp_members.add(joining); // add members that haven't yet shown up in the membership tmp_members.remove( leaving); // remove members that haven't yet been removed from the membership // add to prev_members for (Address addr : mbrs) { if (!prev_members.contains(addr)) prev_members.add(addr); } } Address coord = determineCoordinator(); if (coord != null && coord.equals(local_addr) && !haveCoordinatorRole()) { becomeCoordinator(); } else { if (haveCoordinatorRole() && !local_addr.equals(coord)) { becomeParticipant(); merge_ack_collector.reset(null); // we don't need this one anymore } } } // - Changed order of passing view up and down (http://jira.jboss.com/jira/browse/JGRP-347) // - Changed it back (bela Sept 4 2007): http://jira.jboss.com/jira/browse/JGRP-564 // - Moved sending up view_event out of the synchronized block (bela Nov 2011) down_prot.down(view_event); // needed e.g. by failure detector or UDP up_prot.up(view_event); List<Address> tmp_mbrs = new_view.getMembers(); ack_collector.retainAll(tmp_mbrs); merge_ack_collector.retainAll(tmp_mbrs); if (new_view instanceof MergeView) merger.forceCancelMerge(); if (stats) { num_views++; prev_views.add(new Tuple<View, Long>(new_view, System.currentTimeMillis())); } }
/** * Broadcasts the new view and digest, and waits for acks from all members in the list given as * argument. If the list is null, we take the members who are part of new_view */ public void castViewChange( View new_view, Digest digest, JoinRsp jr, Collection<Address> newMembers) { if (log.isTraceEnabled()) log.trace(local_addr + ": mcasting view " + new_view + " (" + new_view.size() + " mbrs)\n"); // Send down a local TMP_VIEW event. This is needed by certain layers (e.g. NAKACK) to compute // correct digest // in case client's next request (e.g. getState()) reaches us *before* our own view change // multicast. // Check NAKACK's TMP_VIEW handling for details down_prot.up(new Event(Event.TMP_VIEW, new_view)); down_prot.down(new Event(Event.TMP_VIEW, new_view)); List<Address> ackMembers = new ArrayList<Address>(new_view.getMembers()); if (newMembers != null && !newMembers.isEmpty()) ackMembers.removeAll(newMembers); Message view_change_msg = new Message(); // bcast to all members GmsHeader hdr = new GmsHeader(GmsHeader.VIEW, new_view); hdr.my_digest = digest; view_change_msg.putHeader(this.id, hdr); // If we're the only member the VIEW is broadcast to, let's simply install the view directly, // without // sending the VIEW multicast ! Or else N-1 members drop the multicast anyway... if (local_addr != null && ackMembers.size() == 1 && ackMembers.get(0).equals(local_addr)) { // we need to add the message to the retransmit window (e.g. in NAKACK), so (1) it can be // retransmitted and // (2) we increment the seqno (otherwise, we'd return an incorrect digest) down_prot.down(new Event(Event.ADD_TO_XMIT_TABLE, view_change_msg)); impl.handleViewChange(new_view, digest); } else { if (!ackMembers.isEmpty()) ack_collector.reset(ackMembers); down_prot.down(new Event(Event.MSG, view_change_msg)); try { if (!ackMembers.isEmpty()) { ack_collector.waitForAllAcks(view_ack_collection_timeout); if (log.isTraceEnabled()) log.trace( local_addr + ": received all " + ack_collector.expectedAcks() + " ACKs from members for view " + new_view.getVid()); } } catch (TimeoutException e) { if (log_collect_msgs && log.isWarnEnabled()) { log.warn( local_addr + ": failed to collect all ACKs (expected=" + ack_collector.expectedAcks() + ") for view " + new_view.getViewId() + " after " + view_ack_collection_timeout + "ms, missing ACKs from " + ack_collector.printMissing()); } } } if (jr != null && (newMembers != null && !newMembers.isEmpty())) { ack_collector.reset(new ArrayList<Address>(newMembers)); for (Address joiner : newMembers) { sendJoinResponse(jr, joiner); } try { ack_collector.waitForAllAcks(view_ack_collection_timeout); if (log.isTraceEnabled()) log.trace( local_addr + ": received all ACKs (" + ack_collector.expectedAcks() + ") from joiners for view " + new_view.getVid()); } catch (TimeoutException e) { if (log_collect_msgs && log.isWarnEnabled()) { log.warn( local_addr + ": failed to collect all ACKs (expected=" + ack_collector.expectedAcks() + ") for unicast view " + new_view + " after " + view_ack_collection_timeout + "ms, missing ACKs from " + ack_collector.printMissing()); } } } }