/** * Reliable unicast layer. Implemented with negative acks. Every sender keeps its messages in an * AckSenderWindow. A receiver stores incoming messages in a NakReceiverWindow, and asks the sender * for retransmission if a gap is detected. Every now and then (stable_interval), a timer task sends * a STABLE message to all senders, including the highest received and delivered seqnos. A sender * purges messages lower than highest delivered and asks the STABLE sender for messages it might * have missed (smaller than highest received). A STABLE message can also be sent when a receiver * has received more than max_bytes from a given sender. * * <p>The advantage of this protocol over {@link org.jgroups.protocols.UNICAST} is that it doesn't * send acks for every message. Instead, it sends 'acks' after receiving max_bytes and/ or * periodically (stable_interval). * * @author Bela Ban */ @MBean(description = "Reliable unicast layer") public class UNICAST2 extends Protocol implements AgeOutCache.Handler<Address> { public static final long DEFAULT_FIRST_SEQNO = Global.DEFAULT_FIRST_UNICAST_SEQNO; /* ------------------------------------------ Properties ------------------------------------------ */ @Deprecated protected int[] timeout = { 400, 800, 1600, 3200 }; // for NakSenderWindow: max time to wait for missing acks /** * The first value (in milliseconds) to use in the exponential backoff retransmission mechanism. * Only enabled if the value is > 0 */ @Deprecated @Property( description = "The first value (in milliseconds) to use in the exponential backoff. Enabled if greater than 0", deprecatedMessage = "Not used anymore") protected int exponential_backoff = 300; @Property( description = "Max number of messages to be removed from a NakReceiverWindow. This property might " + "get removed anytime, so don't use it !") protected int max_msg_batch_size = 500; @Property(description = "Max number of bytes before a stability message is sent to the sender") protected long max_bytes = 10000000; @Property( description = "Max number of milliseconds before a stability message is sent to the sender(s)") protected long stable_interval = 60000L; @Property( description = "Max number of STABLE messages sent for the same highest_received seqno. A value < 1 is invalid") protected int max_stable_msgs = 5; @Property( description = "Number of rows of the matrix in the retransmission table (only for experts)", writable = false) protected int xmit_table_num_rows = 100; @Property( description = "Number of elements of a row of the matrix in the retransmission table (only for experts). " + "The capacity of the matrix is xmit_table_num_rows * xmit_table_msgs_per_row", writable = false) protected int xmit_table_msgs_per_row = 2000; @Property( description = "Resize factor of the matrix in the retransmission table (only for experts)", writable = false) protected double xmit_table_resize_factor = 1.2; @Property( description = "Number of milliseconds after which the matrix in the retransmission table " + "is compacted (only for experts)", writable = false) protected long xmit_table_max_compaction_time = 10 * 60 * 1000; @Deprecated @Property( description = "If enabled, the removal of a message from the retransmission table causes an " + "automatic purge (only for experts)", writable = false, deprecatedMessage = "not used anymore") protected boolean xmit_table_automatic_purging = true; @Property( description = "Whether to use the old retransmitter which retransmits individual messages or the new one " + "which uses ranges of retransmitted messages. Default is true. Note that this property will be removed in 3.0; " + "it is only used to switch back to the old (and proven) retransmitter mechanism if issues occur") protected boolean use_range_based_retransmitter = true; @Property( description = "Time (in milliseconds) after which an idle incoming or outgoing connection is closed. The " + "connection will get re-established when used again. 0 disables connection reaping") protected long conn_expiry_timeout = 60000; @Property( description = "Interval (in milliseconds) at which missing messages (from all retransmit buffers) " + "are retransmitted") protected long xmit_interval = 1000; /* --------------------------------------------- JMX ---------------------------------------------- */ protected long num_msgs_sent = 0, num_msgs_received = 0; /* --------------------------------------------- Fields ------------------------------------------------ */ protected final ConcurrentMap<Address, SenderEntry> send_table = Util.createConcurrentMap(); protected final ConcurrentMap<Address, ReceiverEntry> recv_table = Util.createConcurrentMap(); /** RetransmitTask running every xmit_interval ms */ protected Future<?> xmit_task; protected final ReentrantLock recv_table_lock = new ReentrantLock(); protected volatile List<Address> members = new ArrayList<Address>(11); protected Address local_addr = null; protected TimeScheduler timer = null; // used for retransmissions (passed to AckSenderWindow) protected volatile boolean running = false; protected short last_conn_id = 0; protected long max_retransmit_time = 60 * 1000L; protected AgeOutCache<Address> cache = null; protected Future<?> stable_task_future = null; // bcasts periodic STABLE message (added to timer below) protected Future<?> connection_reaper; // closes idle connections public int[] getTimeout() { return timeout; } @Deprecated @Property( name = "timeout", converter = PropertyConverters.IntegerArray.class, description = "list of timeouts", deprecatedMessage = "not used anymore") public void setTimeout(int[] val) { if (val != null) timeout = val; } public void setMaxMessageBatchSize(int size) { if (size >= 1) max_msg_batch_size = size; } @ManagedAttribute public String getLocalAddress() { return local_addr != null ? local_addr.toString() : "null"; } @ManagedAttribute public String getMembers() { return members.toString(); } @ManagedAttribute(description = "Returns the number of outgoing (send) connections") public int getNumSendConnections() { return send_table.size(); } @ManagedAttribute(description = "Returns the number of incoming (receive) connections") public int getNumReceiveConnections() { return recv_table.size(); } @ManagedAttribute( description = "Returns the total number of outgoing (send) and incoming (receive) connections") public int getNumConnections() { return getNumReceiveConnections() + getNumSendConnections(); } @ManagedOperation public String printConnections() { StringBuilder sb = new StringBuilder(); if (!send_table.isEmpty()) { sb.append("send connections:\n"); for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); } } if (!recv_table.isEmpty()) { sb.append("\nreceive connections:\n"); for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); } } return sb.toString(); } @ManagedAttribute(description = "Whether the ConnectionReaper task is running") public boolean isConnectionReaperRunning() { return connection_reaper != null && !connection_reaper.isDone(); } @ManagedAttribute public long getNumMessagesSent() { return num_msgs_sent; } @ManagedAttribute public long getNumMessagesReceived() { return num_msgs_received; } @ManagedAttribute(description = "Total number of undelivered messages in all receive windows") public long getXmitTableUndeliveredMessages() { long retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.size(); } return retval; } @ManagedAttribute(description = "Total number of missing messages in all receive windows") public long getXmitTableMissingMessages() { long retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumMissing(); } return retval; } @ManagedAttribute(description = "Number of compactions in all (receive and send) windows") public int getXmitTableNumCompactions() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumCompactions(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumCompactions(); } return retval; } @ManagedAttribute(description = "Number of moves in all (receive and send) windows") public int getXmitTableNumMoves() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumMoves(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumMoves(); } return retval; } @ManagedAttribute(description = "Number of resizes in all (receive and send) windows") public int getXmitTableNumResizes() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumResizes(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumResizes(); } return retval; } @ManagedAttribute(description = "Number of purges in all (receive and send) windows") public int getXmitTableNumPurges() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumPurges(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumPurges(); } return retval; } @ManagedOperation(description = "Prints the contents of the receive windows for all members") public String printReceiveWindowMessages() { StringBuilder ret = new StringBuilder(local_addr + ":\n"); for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { Address addr = entry.getKey(); Table<Message> buf = entry.getValue().received_msgs; ret.append(addr).append(": ").append(buf.toString()).append('\n'); } return ret.toString(); } @ManagedOperation(description = "Prints the contents of the send windows for all members") public String printSendWindowMessages() { StringBuilder ret = new StringBuilder(local_addr + ":\n"); for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { Address addr = entry.getKey(); Table<Message> buf = entry.getValue().sent_msgs; ret.append(addr).append(": ").append(buf.toString()).append('\n'); } return ret.toString(); } @ManagedAttribute(description = "Number of retransmit requests received") protected final AtomicLong xmit_reqs_received = new AtomicLong(0); @ManagedAttribute(description = "Number of retransmit requests sent") protected final AtomicLong xmit_reqs_sent = new AtomicLong(0); @ManagedAttribute(description = "Number of retransmit responses sent") protected final AtomicLong xmit_rsps_sent = new AtomicLong(0); @ManagedAttribute(description = "Is the retransmit task running") public boolean isXmitTaskRunning() { return xmit_task != null && !xmit_task.isDone(); } public long getMaxRetransmitTime() { return max_retransmit_time; } @Property( description = "Max number of milliseconds we try to retransmit a message to any given member. After that, " + "the connection is removed. Any new connection to that member will start with seqno #1 again. 0 disables this") public void setMaxRetransmitTime(long max_retransmit_time) { this.max_retransmit_time = max_retransmit_time; if (cache != null && max_retransmit_time > 0) cache.setTimeout(max_retransmit_time); } @ManagedAttribute public int getAgeOutCacheSize() { return cache != null ? cache.size() : 0; } @ManagedOperation public String printAgeOutCache() { return cache != null ? cache.toString() : "n/a"; } public AgeOutCache<Address> getAgeOutCache() { return cache; } public void resetStats() { num_msgs_sent = num_msgs_received = 0; xmit_reqs_received.set(0); xmit_reqs_sent.set(0); xmit_rsps_sent.set(0); } public TimeScheduler getTimer() { return timer; } /** * Only used for unit tests, don't use ! * * @param timer */ public void setTimer(TimeScheduler timer) { this.timer = timer; } public void init() throws Exception { super.init(); if (max_stable_msgs < 1) throw new IllegalArgumentException("max_stable_msgs ( " + max_stable_msgs + ") must be > 0"); if (max_bytes <= 0) throw new IllegalArgumentException("max_bytes has to be > 0"); } public void start() throws Exception { timer = getTransport().getTimer(); if (timer == null) throw new Exception("timer is null"); if (max_retransmit_time > 0) cache = new AgeOutCache<Address>(timer, max_retransmit_time, this); running = true; if (stable_interval > 0) startStableTask(); if (conn_expiry_timeout > 0) startConnectionReaper(); startRetransmitTask(); } public void stop() { running = false; stopStableTask(); stopConnectionReaper(); stopRetransmitTask(); removeAllConnections(); } public Object up(Event evt) { Message msg; Address dst, src; Unicast2Header hdr; switch (evt.getType()) { case Event.MSG: msg = (Message) evt.getArg(); dst = msg.getDest(); if (dst == null || msg.isFlagSet(Message.NO_RELIABILITY)) // only handle unicast messages break; // pass up // changed from removeHeader(): we cannot remove the header because if we do loopback=true // at the // transport level, we will not have the header on retransmit ! (bela Aug 22 2006) hdr = (Unicast2Header) msg.getHeader(this.id); if (hdr == null) break; src = msg.getSrc(); switch (hdr.type) { case Unicast2Header.DATA: // received regular message handleDataReceived(src, hdr.seqno, hdr.conn_id, hdr.first, msg, evt); return null; // we pass the deliverable message up in handleDataReceived() case Unicast2Header.XMIT_REQ: // received ACK for previously sent message handleXmitRequest(src, (SeqnoList) msg.getObject()); break; case Unicast2Header.SEND_FIRST_SEQNO: handleResendingOfFirstMessage(src, hdr.seqno); break; case Unicast2Header.STABLE: stable(msg.getSrc(), hdr.conn_id, hdr.seqno, hdr.high_seqno); break; default: log.error("UnicastHeader type " + hdr.type + " not known !"); break; } return null; } return up_prot.up(evt); // Pass up to the layer above us } public Object down(Event evt) { switch (evt.getType()) { case Event.MSG: // Add UnicastHeader, add to AckSenderWindow and pass down Message msg = (Message) evt.getArg(); Address dst = msg.getDest(); /* only handle unicast messages */ if (dst == null || msg.isFlagSet(Message.NO_RELIABILITY)) break; if (!running) { if (log.isTraceEnabled()) log.trace("discarded message as start() has not yet been called, message: " + msg); return null; } SenderEntry entry = send_table.get(dst); if (entry == null) { entry = new SenderEntry(getNewConnectionId()); SenderEntry existing = send_table.putIfAbsent(dst, entry); if (existing != null) entry = existing; else { if (log.isTraceEnabled()) log.trace( local_addr + ": created connection to " + dst + " (conn_id=" + entry.send_conn_id + ")"); if (cache != null && !members.contains(dst)) cache.add(dst); } } short send_conn_id = entry.send_conn_id; long seqno = entry.sent_msgs_seqno.getAndIncrement(); long sleep = 10; while (running) { try { msg.putHeader( this.id, Unicast2Header.createDataHeader(seqno, send_conn_id, seqno == DEFAULT_FIRST_SEQNO)); entry.sent_msgs.add(seqno, msg); // add *including* UnicastHeader, adds to retransmitter if (conn_expiry_timeout > 0) entry.update(); break; } catch (Throwable t) { if (!running) break; if (log.isWarnEnabled()) log.warn("failed sending message", t); Util.sleep(sleep); sleep = Math.min(5000, sleep * 2); } } if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr) .append(" --> DATA(") .append(dst) .append(": #") .append(seqno) .append(", conn_id=") .append(send_conn_id); if (seqno == DEFAULT_FIRST_SEQNO) sb.append(", first"); sb.append(')'); log.trace(sb); } try { down_prot.down(evt); num_msgs_sent++; } catch (Throwable t) { log.warn("failed sending the message", t); } return null; // we already passed the msg down case Event.VIEW_CHANGE: // remove connections to peers that are not members anymore ! View view = (View) evt.getArg(); List<Address> new_members = view.getMembers(); Set<Address> non_members = new HashSet<Address>(send_table.keySet()); non_members.addAll(recv_table.keySet()); members = new_members; non_members.removeAll(new_members); if (cache != null) cache.removeAll(new_members); if (!non_members.isEmpty()) { if (log.isTraceEnabled()) log.trace("removing non members " + non_members); for (Address non_mbr : non_members) removeConnection(non_mbr); } break; case Event.SET_LOCAL_ADDRESS: local_addr = (Address) evt.getArg(); break; } return down_prot.down(evt); // Pass on to the layer below us } /** * Purge all messages in window for local_addr, which are <= low. Check if the window's highest * received message is > high: if true, retransmit all messages from high - win.high to sender * * @param sender * @param hd Highest delivered seqno * @param hr Highest received seqno */ protected void stable(Address sender, short conn_id, long hd, long hr) { SenderEntry entry = send_table.get(sender); Table<Message> win = entry != null ? entry.sent_msgs : null; if (win == null) return; if (log.isTraceEnabled()) log.trace( new StringBuilder() .append(local_addr) .append(" <-- STABLE(") .append(sender) .append(": ") .append(hd) .append("-") .append(hr) .append(", conn_id=" + conn_id) + ")"); if (entry.send_conn_id != conn_id) { log.warn( local_addr + ": my conn_id (" + entry.send_conn_id + ") != received conn_id (" + conn_id + "); discarding STABLE message !"); return; } win.purge(hd, true); long win_hr = win.getHighestReceived(); if (win_hr > hr) { for (long seqno = hr; seqno <= win_hr; seqno++) { Message msg = win.get( seqno); // destination is still the same (the member which sent the STABLE message) if (msg != null) down_prot.down(new Event(Event.MSG, msg)); } } } @ManagedOperation( description = "Sends a STABLE message to all senders. This causes message purging and potential" + " retransmissions from senders") public void sendStableMessages() { for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { Address dest = entry.getKey(); ReceiverEntry val = entry.getValue(); Table<Message> win = val != null ? val.received_msgs : null; if (win != null) { long[] tmp = win.getDigest(); long low = tmp[0], high = tmp[1]; if (val.last_highest == high) { if (val.num_stable_msgs >= max_stable_msgs) { continue; } else val.num_stable_msgs++; } else { val.last_highest = high; val.num_stable_msgs = 1; } sendStableMessage(dest, val.recv_conn_id, low, high); } } } protected void sendStableMessage(Address dest, short conn_id, long hd, long hr) { Message stable_msg = new Message(dest, null, null); Unicast2Header hdr = Unicast2Header.createStableHeader(conn_id, hd, hr); stable_msg.putHeader(this.id, hdr); stable_msg.setFlag(Message.OOB); if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr) .append(" --> STABLE(") .append(dest) .append(": ") .append(hd) .append("-") .append(hr) .append(", conn_id=") .append(conn_id) .append(")"); log.trace(sb.toString()); } down_prot.down(new Event(Event.MSG, stable_msg)); } protected void startStableTask() { if (stable_task_future == null || stable_task_future.isDone()) { final Runnable stable_task = new Runnable() { public void run() { try { sendStableMessages(); } catch (Throwable t) { log.error("sending of STABLE messages failed", t); } } }; stable_task_future = timer.scheduleWithFixedDelay( stable_task, stable_interval, stable_interval, TimeUnit.MILLISECONDS); if (log.isTraceEnabled()) log.trace("stable task started"); } } protected void stopStableTask() { if (stable_task_future != null) { stable_task_future.cancel(false); stable_task_future = null; } } protected synchronized void startConnectionReaper() { if (connection_reaper == null || connection_reaper.isDone()) connection_reaper = timer.scheduleWithFixedDelay( new ConnectionReaper(), conn_expiry_timeout, conn_expiry_timeout, TimeUnit.MILLISECONDS); } protected synchronized void stopConnectionReaper() { if (connection_reaper != null) connection_reaper.cancel(false); } /** * Removes and resets from connection table (which is already locked). Returns true if member was * found, otherwise false. This method is public only so it can be invoked by unit testing, but * should not otherwise be used ! */ public void removeConnection(Address mbr) { removeSendConnection(mbr); removeReceiveConnection(mbr); } public void removeSendConnection(Address mbr) { send_table.remove(mbr); } public void removeReceiveConnection(Address mbr) { ReceiverEntry entry2 = recv_table.remove(mbr); if (entry2 != null) { Table<Message> win = entry2.received_msgs; if (win != null) sendStableMessage( mbr, entry2.recv_conn_id, win.getHighestDelivered(), win.getHighestReceived()); entry2.reset(); } } /** * This method is public only so it can be invoked by unit testing, but should not otherwise be * used ! */ @ManagedOperation( description = "Trashes all connections to other nodes. This is only used for testing") public void removeAllConnections() { send_table.clear(); sendStableMessages(); for (ReceiverEntry entry2 : recv_table.values()) entry2.reset(); recv_table.clear(); } public void retransmit(SeqnoList missing, Address sender) { Unicast2Header hdr = Unicast2Header.createXmitReqHeader(); Message retransmit_msg = new Message(sender, null, missing); retransmit_msg.setFlag(Message.OOB); if (log.isTraceEnabled()) log.trace(local_addr + ": sending XMIT_REQ (" + missing + ") to " + sender); retransmit_msg.putHeader(this.id, hdr); down_prot.down(new Event(Event.MSG, retransmit_msg)); xmit_reqs_sent.addAndGet(missing.size()); } /** * Called by AgeOutCache, to removed expired connections * * @param key */ public void expired(Address key) { if (key != null) { if (log.isDebugEnabled()) log.debug("removing connection to " + key + " because it expired"); removeConnection(key); } } /** * Check whether the hashmap contains an entry e for <code>sender</code> (create if not). If * e.received_msgs is null and <code>first</code> is true: create a new AckReceiverWindow(seqno) * and add message. Set e.received_msgs to the new window. Else just add the message. */ protected void handleDataReceived( Address sender, long seqno, short conn_id, boolean first, Message msg, Event evt) { if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr).append(" <-- DATA(").append(sender).append(": #").append(seqno); if (conn_id != 0) sb.append(", conn_id=").append(conn_id); if (first) sb.append(", first"); sb.append(')'); log.trace(sb); } ReceiverEntry entry = getReceiverEntry(sender, seqno, first, conn_id); if (entry == null) return; if (conn_expiry_timeout > 0) entry.update(); Table<Message> win = entry.received_msgs; boolean added = win.add(seqno, msg); // win is guaranteed to be non-null if we get here num_msgs_received++; if (added) { int len = msg.getLength(); if (len > 0 && entry.incrementStable(len)) sendStableMessage( sender, entry.recv_conn_id, win.getHighestDelivered(), win.getHighestReceived()); } // An OOB message is passed up immediately. Later, when remove() is called, we discard it. This // affects ordering ! // http://jira.jboss.com/jira/browse/JGRP-377 if (msg.isFlagSet(Message.OOB) && added) { try { up_prot.up(evt); } catch (Throwable t) { log.error("couldn't deliver OOB message " + msg, t); } } final AtomicBoolean processing = win.getProcessing(); if (!processing.compareAndSet(false, true)) { return; } // Try to remove as many messages as possible and pass them up. // Prevents concurrent passing up of messages by different threads // (http://jira.jboss.com/jira/browse/JGRP-198); // this is all the more important once we have a concurrent stack // (http://jira.jboss.com/jira/browse/JGRP-181), // where lots of threads can come up to this point concurrently, but only 1 is allowed to pass // at a time // We *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1, // P2, Q2 can result in // delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to be delivered // only in the // order in which they were sent by their senders boolean released_processing = false; try { while (true) { List<Message> msgs = win.removeMany(processing, true, max_msg_batch_size); // remove my own messages if (msgs == null || msgs.isEmpty()) { released_processing = true; return; } for (Message m : msgs) { // discard OOB msg: it has already been delivered // (http://jira.jboss.com/jira/browse/JGRP-377) if (m.isFlagSet(Message.OOB)) continue; try { up_prot.up(new Event(Event.MSG, m)); } catch (Throwable t) { log.error("couldn't deliver message " + m, t); } } } } finally { // processing is always set in win.remove(processing) above and never here ! This code is just // a // 2nd line of defense should there be an exception before win.remove(processing) sets // processing if (!released_processing) processing.set(false); } } protected ReceiverEntry getReceiverEntry( Address sender, long seqno, boolean first, short conn_id) { ReceiverEntry entry = recv_table.get(sender); if (entry != null && entry.recv_conn_id == conn_id) return entry; recv_table_lock.lock(); try { entry = recv_table.get(sender); if (first) { if (entry == null) { entry = getOrCreateReceiverEntry(sender, seqno, conn_id); } else { // entry != null && win != null if (conn_id != entry.recv_conn_id) { if (log.isTraceEnabled()) log.trace( local_addr + ": conn_id=" + conn_id + " != " + entry.recv_conn_id + "; resetting receiver window"); recv_table.remove(sender); entry = getOrCreateReceiverEntry(sender, seqno, conn_id); } else {; } } } else { // entry == null && win == null OR entry != null && win == null OR entry != null && // win != null if (entry == null || entry.recv_conn_id != conn_id) { recv_table_lock.unlock(); sendRequestForFirstSeqno(sender, seqno); // drops the message and returns (see below) return null; } } return entry; } finally { if (recv_table_lock.isHeldByCurrentThread()) recv_table_lock.unlock(); } } protected ReceiverEntry getOrCreateReceiverEntry(Address sender, long seqno, short conn_id) { Table<Message> table = new Table<Message>( xmit_table_num_rows, xmit_table_msgs_per_row, seqno - 1, xmit_table_resize_factor, xmit_table_max_compaction_time); ReceiverEntry entry = new ReceiverEntry(table, conn_id); ReceiverEntry entry2 = recv_table.putIfAbsent(sender, entry); if (entry2 != null) return entry2; if (log.isTraceEnabled()) log.trace( local_addr + ": created receiver window for " + sender + " at seqno=#" + seqno + " for conn-id=" + conn_id); return entry; } protected void handleXmitRequest(Address sender, SeqnoList missing) { if (log.isTraceEnabled()) log.trace( new StringBuilder() .append(local_addr) .append(" <-- XMIT(") .append(sender) .append(": #") .append(missing) .append(')')); SenderEntry entry = send_table.get(sender); xmit_reqs_received.addAndGet(missing.size()); Table<Message> win = entry != null ? entry.sent_msgs : null; if (win != null) { for (long seqno : missing) { Message msg = win.get(seqno); if (msg == null) { if (log.isWarnEnabled() && !local_addr.equals(sender)) { StringBuilder sb = new StringBuilder(); sb.append("(requester=").append(sender).append(", local_addr=").append(this.local_addr); sb.append(") message ").append(sender).append("::").append(seqno); sb.append(" not found in retransmission table of ") .append(sender) .append(":\n") .append(win); log.warn(sb.toString()); } continue; } down_prot.down(new Event(Event.MSG, msg)); xmit_rsps_sent.incrementAndGet(); } } } /** * We need to resend our first message with our conn_id * * @param sender * @param seqno Resend the non null messages in the range [lowest .. seqno] */ protected void handleResendingOfFirstMessage(Address sender, long seqno) { if (log.isTraceEnabled()) log.trace(local_addr + " <-- SEND_FIRST_SEQNO(" + sender + "," + seqno + ")"); SenderEntry entry = send_table.get(sender); Table<Message> win = entry != null ? entry.sent_msgs : null; if (win == null) { if (log.isErrorEnabled()) log.error(local_addr + ": sender window for " + sender + " not found"); return; } boolean first_sent = false; for (long i = win.getLow() + 1; i <= seqno; i++) { Message rsp = win.get(i); if (rsp == null) continue; if (first_sent) { down_prot.down(new Event(Event.MSG, rsp)); } else { first_sent = true; // We need to copy the UnicastHeader and put it back into the message because Message.copy() // doesn't copy // the headers and therefore we'd modify the original message in the sender retransmission // window // (https://jira.jboss.org/jira/browse/JGRP-965) Message copy = rsp.copy(); Unicast2Header hdr = (Unicast2Header) copy.getHeader(this.id); Unicast2Header newhdr = hdr.copy(); newhdr.first = true; copy.putHeader(this.id, newhdr); down_prot.down(new Event(Event.MSG, copy)); } } } protected void startRetransmitTask() { if (xmit_task == null || xmit_task.isDone()) xmit_task = timer.scheduleWithFixedDelay( new RetransmitTask(), 0, xmit_interval, TimeUnit.MILLISECONDS); } protected void stopRetransmitTask() { if (xmit_task != null) { xmit_task.cancel(true); xmit_task = null; } } protected synchronized short getNewConnectionId() { short retval = last_conn_id; if (last_conn_id >= Short.MAX_VALUE || last_conn_id < 0) last_conn_id = 0; else last_conn_id++; return retval; } protected void sendRequestForFirstSeqno(Address dest, long seqno_received) { Message msg = new Message(dest); msg.setFlag(Message.OOB); Unicast2Header hdr = Unicast2Header.createSendFirstSeqnoHeader(seqno_received); msg.putHeader(this.id, hdr); if (log.isTraceEnabled()) log.trace(local_addr + " --> SEND_FIRST_SEQNO(" + dest + "," + seqno_received + ")"); down_prot.down(new Event(Event.MSG, msg)); } @ManagedOperation( description = "Closes connections that have been idle for more than conn_expiry_timeout ms") public void reapIdleConnections() { if (conn_expiry_timeout <= 0) return; // remove expired connections from send_table for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { SenderEntry val = entry.getValue(); long age = val.age(); if (age >= conn_expiry_timeout) { removeSendConnection(entry.getKey()); if (log.isDebugEnabled()) log.debug( local_addr + ": removed expired connection for " + entry.getKey() + " (" + age + " ms old) from send_table"); } } // remove expired connections from recv_table for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { ReceiverEntry val = entry.getValue(); long age = val.age(); if (age >= conn_expiry_timeout) { removeReceiveConnection(entry.getKey()); if (log.isDebugEnabled()) log.debug( local_addr + ": removed expired connection for " + entry.getKey() + " (" + age + " ms old) from recv_table"); } } } /** * The following types and fields are serialized: * * <pre> * | DATA | seqno | conn_id | first | * | ACK | seqno | * | SEND_FIRST_SEQNO | seqno | * </pre> */ public static class Unicast2Header extends Header { public static final byte DATA = 0; public static final byte XMIT_REQ = 1; public static final byte SEND_FIRST_SEQNO = 2; public static final byte STABLE = 3; byte type; long seqno; // DATA and STABLE long high_seqno; // STABLE short conn_id; // DATA, STABLE boolean first; // DATA public Unicast2Header() {} // used for externalization public static Unicast2Header createDataHeader(long seqno, short conn_id, boolean first) { return new Unicast2Header(DATA, seqno, 0L, conn_id, first); } public static Unicast2Header createXmitReqHeader() { return new Unicast2Header(XMIT_REQ); } public static Unicast2Header createStableHeader(short conn_id, long low, long high) { if (low > high) throw new IllegalArgumentException("low (" + low + ") needs to be <= high (" + high + ")"); Unicast2Header retval = new Unicast2Header(STABLE, low); retval.high_seqno = high; retval.conn_id = conn_id; return retval; } public static Unicast2Header createSendFirstSeqnoHeader(long seqno_received) { return new Unicast2Header(SEND_FIRST_SEQNO, seqno_received); } protected Unicast2Header(byte type) { this.type = type; } protected Unicast2Header(byte type, long seqno) { this.type = type; this.seqno = seqno; } protected Unicast2Header(byte type, long seqno, long high, short conn_id, boolean first) { this.type = type; this.seqno = seqno; this.high_seqno = high; this.conn_id = conn_id; this.first = first; } public byte getType() { return type; } public long getSeqno() { return seqno; } public long getHighSeqno() { return high_seqno; } public short getConnId() { return conn_id; } public boolean isFirst() { return first; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append(type2Str(type)).append(", seqno=").append(seqno); if (conn_id != 0) sb.append(", conn_id=").append(conn_id); if (first) sb.append(", first"); return sb.toString(); } public static String type2Str(byte t) { switch (t) { case DATA: return "DATA"; case XMIT_REQ: return "XMIT_REQ"; case SEND_FIRST_SEQNO: return "SEND_FIRST_SEQNO"; case STABLE: return "STABLE"; default: return "<unknown>"; } } public final int size() { int retval = Global.BYTE_SIZE; // type switch (type) { case DATA: retval += Util.size(seqno) // seqno + Global.SHORT_SIZE // conn_id + Global.BYTE_SIZE; // first break; case XMIT_REQ: break; case STABLE: retval += Util.size(seqno, high_seqno) + Global.SHORT_SIZE; // conn_id break; case SEND_FIRST_SEQNO: retval += Util.size(seqno); break; } return retval; } public Unicast2Header copy() { return new Unicast2Header(type, seqno, high_seqno, conn_id, first); } public void writeTo(DataOutput out) throws Exception { out.writeByte(type); switch (type) { case DATA: Util.writeLong(seqno, out); out.writeShort(conn_id); out.writeBoolean(first); break; case XMIT_REQ: break; case STABLE: Util.writeLongSequence(seqno, high_seqno, out); out.writeShort(conn_id); break; case SEND_FIRST_SEQNO: Util.writeLong(seqno, out); break; } } public void readFrom(DataInput in) throws Exception { type = in.readByte(); switch (type) { case DATA: seqno = Util.readLong(in); conn_id = in.readShort(); first = in.readBoolean(); break; case XMIT_REQ: break; case STABLE: long[] seqnos = Util.readLongSequence(in); seqno = seqnos[0]; high_seqno = seqnos[1]; conn_id = in.readShort(); break; case SEND_FIRST_SEQNO: seqno = Util.readLong(in); break; } } } protected final class SenderEntry { // stores (and retransmits) msgs sent by us to a given peer final Table<Message> sent_msgs; final AtomicLong sent_msgs_seqno = new AtomicLong(DEFAULT_FIRST_SEQNO); // seqno for msgs sent by us final short send_conn_id; protected final AtomicLong timestamp = new AtomicLong(0); public SenderEntry(short send_conn_id) { this.send_conn_id = send_conn_id; this.sent_msgs = new Table<Message>( xmit_table_num_rows, xmit_table_msgs_per_row, 0, xmit_table_resize_factor, xmit_table_max_compaction_time); update(); } void update() { timestamp.set(System.currentTimeMillis()); } long age() { return System.currentTimeMillis() - timestamp.longValue(); } public String toString() { StringBuilder sb = new StringBuilder(); if (sent_msgs != null) sb.append(sent_msgs).append(", "); sb.append("send_conn_id=" + send_conn_id).append(" (" + age() + " ms old)"); return sb.toString(); } } protected final class ReceiverEntry { protected final Table<Message> received_msgs; // stores all msgs rcvd by a certain peer in seqno-order protected final short recv_conn_id; protected int received_bytes = 0; protected final AtomicLong timestamp = new AtomicLong(0); protected final Lock lock = new ReentrantLock(); protected long last_highest = -1; protected int num_stable_msgs = 0; public ReceiverEntry(Table<Message> received_msgs, short recv_conn_id) { this.received_msgs = received_msgs; this.recv_conn_id = recv_conn_id; update(); } /** * Adds len bytes, if max_bytes is exceeded, the value is reset and true returned, else false */ boolean incrementStable(int len) { lock.lock(); try { if (received_bytes + len >= max_bytes) { received_bytes = 0; return true; } received_bytes += len; return false; } finally { lock.unlock(); } } void reset() { received_bytes = 0; last_highest = -1; num_stable_msgs = 0; } void update() { timestamp.set(System.currentTimeMillis()); } long age() { return System.currentTimeMillis() - timestamp.longValue(); } public String toString() { StringBuilder sb = new StringBuilder(); if (received_msgs != null) sb.append(received_msgs).append(", "); sb.append("recv_conn_id=" + recv_conn_id); sb.append(" (" + age() + " ms old)"); return sb.toString(); } } protected class ConnectionReaper implements Runnable { public void run() { reapIdleConnections(); } } /** * Retransmitter task which periodically (every xmit_interval ms) looks at all the retransmit * tables and sends retransmit request to all members from which we have missing messages */ protected class RetransmitTask implements Runnable { public void run() { for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { Address target = entry.getKey(); // target to send retransmit requests to ReceiverEntry val = entry.getValue(); Table<Message> buf = val != null ? val.received_msgs : null; if (buf != null && buf.getNumMissing() > 0) { SeqnoList missing = buf.getMissing(); if (missing != null) retransmit(missing, target); } } } } }
/** * JChannel is a default implementation of a Channel abstraction. * * <p>JChannel is instantiated using an appropriate form of a protocol stack description. Protocol * stack can be described using a file, URL or a stream containing XML stack description. * * @author Bela Ban * @since 2.0 */ @MBean(description = "JGroups channel") public class JChannel extends Channel { /** The default protocol stack used by the default constructor */ public static final String DEFAULT_PROTOCOL_STACK = "udp.xml"; /*the address of this JChannel instance*/ protected Address local_addr; protected List<AddressGenerator> address_generators; protected String name; /* the channel (also know as group) name */ protected String cluster_name; /* the latest view of the group membership */ protected View my_view; /*the protocol stack, used to send and receive messages from the protocol stack*/ protected ProtocolStack prot_stack; protected final Promise<StateTransferResult> state_promise = new Promise<>(); /** * True if a state transfer protocol is available, false otherwise (set by CONFIG event from * STATE_TRANSFER protocol) */ protected boolean state_transfer_supported = false; /** * True if a flush protocol is available, false otherwise (set by CONFIG event from FLUSH * protocol) */ protected volatile boolean flush_supported = false; protected final ConcurrentMap<String, Object> config = Util.createConcurrentMap(16); /** Collect statistics */ @ManagedAttribute(description = "Collect channel statistics", writable = true) protected boolean stats = true; protected long sent_msgs = 0, received_msgs = 0, sent_bytes = 0, received_bytes = 0; protected final DiagnosticsHandler.ProbeHandler probe_handler = new MyProbeHandler(); /** * Creates a JChannel without a protocol stack; used for programmatic creation of channel and * protocol stack * * @param create_protocol_stack If true, the default configuration will be used. If false, no * protocol stack will be created */ public JChannel(boolean create_protocol_stack) { if (create_protocol_stack) { try { init(ConfiguratorFactory.getStackConfigurator(DEFAULT_PROTOCOL_STACK)); } catch (Exception e) { throw new RuntimeException(e); } } } /** * Constructs a {@code JChannel} instance with the protocol stack specified by the {@code * DEFAULT_PROTOCOL_STACK} member. * * @throws Exception If problems occur during the initialization of the protocol stack. */ public JChannel() throws Exception { this(DEFAULT_PROTOCOL_STACK); } /** * Constructs a JChannel instance with the protocol stack configuration contained by the specified * file. * * @param properties A file containing a JGroups XML protocol stack configuration. * @throws Exception If problems occur during the configuration or initialization of the protocol * stack. */ public JChannel(File properties) throws Exception { this(ConfiguratorFactory.getStackConfigurator(properties)); } /** * Constructs a JChannel instance with the protocol stack configuration contained by the specified * XML element. * * @param properties An XML element containing a JGroups XML protocol stack configuration. * @throws Exception If problems occur during the configuration or initialization of the protocol * stack. */ public JChannel(Element properties) throws Exception { this(ConfiguratorFactory.getStackConfigurator(properties)); } /** * Constructs a JChannel instance with the protocol stack configuration indicated by the specified * URL. * * @param properties A URL pointing to a JGroups XML protocol stack configuration. * @throws Exception If problems occur during the configuration or initialization of the protocol * stack. */ public JChannel(URL properties) throws Exception { this(ConfiguratorFactory.getStackConfigurator(properties)); } /** * Constructs a JChannel instance with the protocol stack configuration based upon the specified * properties parameter. * * @param props A file containing a JGroups XML configuration, a URL pointing to an XML * configuration, or an old style plain configuration string. * @throws Exception If problems occur during the configuration or initialization of the protocol * stack. */ public JChannel(String props) throws Exception { this(ConfiguratorFactory.getStackConfigurator(props)); } /** * Creates a channel with a configuration based on an input stream. * * @param input An input stream, pointing to a streamed configuration * @throws Exception */ public JChannel(InputStream input) throws Exception { this(ConfiguratorFactory.getStackConfigurator(input)); } /** * Constructs a JChannel with the protocol stack configuration contained by the protocol stack * configurator parameter. * * <p>All of the public constructors of this class eventually delegate to this method. * * @param configurator A protocol stack configurator containing a JGroups protocol stack * configuration. * @throws Exception If problems occur during the initialization of the protocol stack. */ public JChannel(ProtocolStackConfigurator configurator) throws Exception { init(configurator); } /** * Creates a channel from an array of protocols. Note that after a {@link * org.jgroups.JChannel#close()}, the protocol list <em>should not</em> be reused, ie. new * JChannel(protocols) would reuse the same protocol list, and this might lead to problems ! * * @param protocols The list of protocols, from bottom to top, ie. the first protocol in the list * is the transport, the last the top protocol * @throws Exception */ public JChannel(Protocol... protocols) throws Exception { this(Arrays.asList(protocols)); } /** * Creates a channel from an array of protocols. Note that after a {@link * org.jgroups.JChannel#close()}, the protocol list <em>should not</em> be reused, ie. new * JChannel(protocols) would reuse the same protocol list, and this might lead to problems ! * * @param protocols The list of protocols, from bottom to top, ie. the first protocol in the list * is the transport, the last the top protocol * @throws Exception */ public JChannel(Collection<Protocol> protocols) throws Exception { prot_stack = new ProtocolStack(); setProtocolStack(prot_stack); for (Protocol prot : protocols) { prot_stack.addProtocol(prot); prot.setProtocolStack(prot_stack); } prot_stack.init(); // Substitute vars with defined system props (if any) List<Protocol> prots = prot_stack.getProtocols(); Map<String, String> map = new HashMap<>(); for (Protocol prot : prots) Configurator.resolveAndAssignFields(prot, map); } /** * Creates a channel with the same configuration as the channel passed to this constructor. This * is used by testing code, and should not be used by clients ! * * @param ch * @throws Exception */ public JChannel(JChannel ch) throws Exception { init(ch); discard_own_messages = ch.discard_own_messages; } /** Returns the protocol stack */ public ProtocolStack getProtocolStack() { return prot_stack; } public void setProtocolStack(ProtocolStack stack) { this.prot_stack = stack; if (prot_stack != null) prot_stack.setChannel(this); } /** * Returns the protocol stack configuration in string format. An example of this property is<br> * "UDP:PING:FD:STABLE:NAKACK:UNICAST:FRAG:FLUSH:GMS:VIEW_ENFORCER:STATE_TRANSFER:QUEUE" */ public String getProperties() { return prot_stack != null ? prot_stack.printProtocolSpec(true) : null; } public boolean statsEnabled() { return stats; } public void enableStats(boolean stats) { this.stats = stats; } @ManagedOperation public void resetStats() { sent_msgs = received_msgs = sent_bytes = received_bytes = 0; } @ManagedAttribute public long getSentMessages() { return sent_msgs; } @ManagedAttribute public long getSentBytes() { return sent_bytes; } @ManagedAttribute public long getReceivedMessages() { return received_msgs; } @ManagedAttribute public long getReceivedBytes() { return received_bytes; } @ManagedAttribute public int getNumberOfTasksInTimer() { TimeScheduler timer = getTimer(); return timer != null ? timer.size() : -1; } @ManagedAttribute public int getTimerThreads() { TimeScheduler timer = getTimer(); return timer != null ? timer.getMinThreads() : -1; } @ManagedOperation public String dumpTimerQueue() { TimeScheduler timer = getTimer(); return timer != null ? timer.dumpTimerTasks() : "<n/a"; } /** * Returns a pretty-printed form of all the protocols. If include_properties is set, the * properties for each protocol will also be printed. */ @ManagedOperation public String printProtocolSpec(boolean include_properties) { ProtocolStack ps = getProtocolStack(); return ps != null ? ps.printProtocolSpec(include_properties) : null; } @ManagedOperation(description = "Connects the channel to a group") public synchronized void connect(String cluster_name) throws Exception { connect(cluster_name, true); } /** * Connects the channel to a group. * * @see JChannel#connect(String) */ @ManagedOperation(description = "Connects the channel to a group") protected synchronized void connect(String cluster_name, boolean useFlushIfPresent) throws Exception { if (!_preConnect(cluster_name)) return; if (cluster_name != null) { // only connect if we are not a unicast channel Event connect_event = useFlushIfPresent ? new Event(Event.CONNECT_USE_FLUSH, cluster_name) : new Event(Event.CONNECT, cluster_name); _connect(connect_event); } state = State.CONNECTED; notifyChannelConnected(this); } public synchronized void connect(String cluster_name, Address target, long timeout) throws Exception { connect(cluster_name, target, timeout, true); } /** * Connects this channel to a group and gets a state from a specified state provider. * * <p>This method invokes {@code connect()} and then {@code getState}. * * <p>If the FLUSH protocol is in the channel's stack definition, only one flush round is executed * for both connecting and fetching the state rather than two flushes if we invoke {@code connect} * and {@code getState} in succession. * * <p>If the channel is already connected, an error message will be printed to the error log. If * the channel is closed a ChannelClosed exception will be thrown. * * @param cluster_name The cluster name to connect to. Cannot be null. * @param target The state provider. If null, the state will be fetched from the coordinator, * unless this channel is the coordinator. * @param timeout The timeout for the state transfer. * @exception Exception The protocol stack cannot be started, or the JOIN failed * @exception IllegalStateException The channel is closed or disconnected * @exception StateTransferException State transfer was not successful */ public synchronized void connect( String cluster_name, Address target, long timeout, boolean useFlushIfPresent) throws Exception { if (!_preConnect(cluster_name)) return; if (cluster_name == null) { // only connect if we are not a unicast channel state = State.CONNECTED; return; } boolean canFetchState = false; try { Event connect_event = useFlushIfPresent ? new Event(Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH, cluster_name) : new Event(Event.CONNECT_WITH_STATE_TRANSFER, cluster_name); _connect(connect_event); state = State.CONNECTED; notifyChannelConnected(this); canFetchState = getView() != null && getView().size() > 1; // if I am not the only member in cluster then if (canFetchState) getState(target, timeout, false); // fetch state from target } finally { // stopFlush if we fetched the state or failed to connect... if ((flushSupported() && useFlushIfPresent) && (canFetchState || state != State.CONNECTED)) stopFlush(); } } @ManagedOperation(description = "Disconnects the channel if connected") public synchronized void disconnect() { switch (state) { case OPEN: case CLOSED: return; case CONNECTING: case CONNECTED: if (cluster_name != null) { // Send down a DISCONNECT event, which travels down to the GMS, where a response is // returned try { down(new Event(Event.DISCONNECT, local_addr)); // DISCONNECT is handled by each layer } catch (Throwable t) { log.error(Util.getMessage("DisconnectFailure"), local_addr, t); } } state = State.OPEN; stopStack(true, false); notifyChannelDisconnected(this); init(); // sets local_addr=null; changed March 18 2003 (bela) -- prevented successful // rejoining break; default: throw new IllegalStateException("state " + state + " unknown"); } } @ManagedOperation(description = "Disconnects and destroys the channel") public synchronized void close() { _close(true); // by default disconnect before closing channel and close mq } @ManagedOperation public Map<String, Object> dumpStats() { Map<String, Object> retval = prot_stack.dumpStats(); if (retval != null) { Map<String, Long> tmp = dumpChannelStats(); if (tmp != null) retval.put("channel", tmp); } return retval; } public Map<String, Object> dumpStats(String protocol_name, List<String> attrs) { return prot_stack.dumpStats(protocol_name, attrs); } @ManagedOperation public Map<String, Object> dumpStats(String protocol_name) { return prot_stack.dumpStats(protocol_name, null); } protected Map<String, Long> dumpChannelStats() { Map<String, Long> retval = new HashMap<>(); retval.put("sent_msgs", sent_msgs); retval.put("sent_bytes", sent_bytes); retval.put("received_msgs", received_msgs); retval.put("received_bytes", received_bytes); return retval; } public void send(Message msg) throws Exception { checkClosedOrNotConnected(); if (msg == null) throw new NullPointerException("msg is null"); down(new Event(Event.MSG, msg)); } public void send(Address dst, Object obj) throws Exception { send(new Message(dst, obj)); } public void send(Address dst, byte[] buf) throws Exception { send(new Message(dst, buf)); } public void send(Address dst, byte[] buf, int offset, int length) throws Exception { send(new Message(dst, buf, offset, length)); } public View getView() { return state == State.CONNECTED ? my_view : null; } @ManagedAttribute(name = "view") public String getViewAsString() { View v = getView(); return v != null ? v.toString() : "n/a"; } @ManagedAttribute public static String getVersion() { return Version.printDescription(); } public Address getAddress() { return state == State.CLOSED ? null : local_addr; } @ManagedAttribute(name = "address") public String getAddressAsString() { return local_addr != null ? local_addr.toString() : "n/a"; } @ManagedAttribute(name = "address_uuid") public String getAddressAsUUID() { return local_addr instanceof UUID ? ((UUID) local_addr).toStringLong() : null; } public String getName() { return name; } public String getName(Address member) { return member != null ? UUID.get(member) : null; } @ManagedAttribute( writable = true, description = "The logical name of this channel. Stays with the channel until " + "the channel is closed") public void setName(String name) { if (name != null) { if (isConnected()) throw new IllegalStateException( "name cannot be set if channel is connected (should be done before)"); this.name = name; if (local_addr != null) UUID.add(local_addr, this.name); } } public JChannel name(String name) { setName(name); return this; } public JChannel receiver(Receiver r) { setReceiver(r); return this; } @ManagedAttribute(description = "Returns cluster name this channel is connected to") public String getClusterName() { return state == State.CONNECTED ? cluster_name : null; } /** * Sets the new {@link AddressGenerator}. New addresses will be generated using the new generator. * This should <em>not</em> be done while a channel is connected, but before connecting. * * @param address_generator * @since 2.12 */ public void addAddressGenerator(AddressGenerator address_generator) { if (address_generator == null) return; if (address_generators == null) address_generators = new ArrayList<>(3); address_generators.add(address_generator); } public boolean removeAddressGenerator(AddressGenerator address_generator) { return address_generator != null && address_generators != null && address_generators.remove(address_generator); } public void getState(Address target, long timeout) throws Exception { getState(target, timeout, true); } /** Retrieves state from the target member. See {@link #getState(Address,long)} for details. */ public void getState(Address target, long timeout, boolean useFlushIfPresent) throws Exception { Callable<Boolean> flusher = () -> Util.startFlush(JChannel.this); getState(target, timeout, useFlushIfPresent ? flusher : null); } protected boolean _preConnect(String cluster_name) throws Exception { if (state == State.CONNECTED) { if (log.isTraceEnabled()) log.trace("already connected to " + this.cluster_name); return false; } checkClosed(); setAddress(); State old_state = state; state = State.CONNECTING; try { startStack(cluster_name); } catch (Exception ex) { state = old_state; throw ex; } return true; } protected void _connect(Event connect_event) throws Exception { try { down(connect_event); } catch (Throwable t) { stopStack(true, false); state = State.OPEN; init(); throw new Exception("connecting to channel \"" + connect_event.getArg() + "\" failed", t); } } protected void getState(Address target, long timeout, Callable<Boolean> flushInvoker) throws Exception { checkClosedOrNotConnected(); if (!state_transfer_supported) throw new IllegalStateException( "fetching state will fail as state transfer is not supported. " + "Add one of the state transfer protocols to your configuration"); if (target == null) target = determineCoordinator(); if (target != null && local_addr != null && target.equals(local_addr)) { log.trace( local_addr + ": cannot get state from myself (" + target + "): probably the first member"); return; } boolean initiateFlush = flushSupported() && flushInvoker != null; if (initiateFlush) { boolean successfulFlush = false; try { successfulFlush = flushInvoker.call(); } catch (Throwable e) { successfulFlush = false; // http://jira.jboss.com/jira/browse/JGRP-759 } if (!successfulFlush) throw new IllegalStateException( "Node " + local_addr + " could not flush the cluster for state retrieval"); } state_promise.reset(); StateTransferInfo state_info = new StateTransferInfo(target, timeout); long start = System.currentTimeMillis(); down(new Event(Event.GET_STATE, state_info)); StateTransferResult result = state_promise.getResult(state_info.timeout); if (initiateFlush) stopFlush(); if (result == null) throw new StateTransferException( "timeout during state transfer (" + (System.currentTimeMillis() - start) + "ms)"); if (result.hasException()) throw new StateTransferException("state transfer failed", result.getException()); } /** * Callback method <br> * Called by the ProtocolStack when a message is received. * * @param evt the event carrying the message from the protocol stack */ public Object up(Event evt) { switch (evt.getType()) { case Event.MSG: Message msg = (Message) evt.getArg(); if (stats) { received_msgs++; received_bytes += msg.getLength(); } // discard local messages (sent by myself to me) if (discard_own_messages && local_addr != null && msg.getSrc() != null && local_addr.equals(msg.getSrc())) return null; break; case Event.VIEW_CHANGE: View tmp = (View) evt.getArg(); if (tmp instanceof MergeView) my_view = new View(tmp.getViewId(), tmp.getMembers()); else my_view = tmp; // Bela&Vladimir Oct 27th,2006 (JGroups 2.4): we need to set connected=true because a client // can // call channel.getView() in viewAccepted() callback invoked on this thread (see // Event.VIEW_CHANGE handling below) // not good: we are only connected when we returned from connect() - bela June 22 2007 // Changed: when a channel gets a view of which it is a member then it should be // connected even if connect() hasn't returned yet ! (bela Noc 2010) if (state != State.CONNECTED) state = State.CONNECTED; break; case Event.CONFIG: Map<String, Object> cfg = (Map<String, Object>) evt.getArg(); if (cfg != null) { if (cfg.containsKey("state_transfer")) { state_transfer_supported = (Boolean) cfg.get("state_transfer"); } if (cfg.containsKey("flush_supported")) { flush_supported = (Boolean) cfg.get("flush_supported"); } } break; case Event.GET_STATE_OK: StateTransferResult result = (StateTransferResult) evt.getArg(); if (up_handler != null) { try { Object retval = up_handler.up(evt); state_promise.setResult(new StateTransferResult()); return retval; } catch (Throwable t) { state_promise.setResult(new StateTransferResult(t)); } } if (receiver != null) { try { if (result.hasBuffer()) { byte[] tmp_state = result.getBuffer(); ByteArrayInputStream input = new ByteArrayInputStream(tmp_state); receiver.setState(input); } state_promise.setResult(result); } catch (Throwable t) { state_promise.setResult(new StateTransferResult(t)); } } break; case Event.STATE_TRANSFER_INPUTSTREAM_CLOSED: state_promise.setResult((StateTransferResult) evt.getArg()); break; case Event.STATE_TRANSFER_INPUTSTREAM: // Oct 13,2006 moved to down() when Event.STATE_TRANSFER_INPUTSTREAM_CLOSED is received // state_promise.setResult(is != null? Boolean.TRUE : Boolean.FALSE); if (up_handler != null) return up_handler.up(evt); InputStream is = (InputStream) evt.getArg(); if (is != null && receiver != null) { try { receiver.setState(is); } catch (Throwable t) { throw new RuntimeException("failed calling setState() in state requester", t); } } break; case Event.STATE_TRANSFER_OUTPUTSTREAM: if (receiver != null && evt.getArg() != null) { try { receiver.getState((OutputStream) evt.getArg()); } catch (Exception e) { throw new RuntimeException("failed calling getState() in state provider", e); } } break; case Event.GET_LOCAL_ADDRESS: return local_addr; default: break; } // If UpHandler is installed, pass all events to it and return (UpHandler is e.g. a building // block) if (up_handler != null) return up_handler.up(evt); if (receiver != null) return invokeCallback(evt.getType(), evt.getArg()); return null; } /** Callback invoked by the protocol stack to deliver a message batch */ public void up(MessageBatch batch) { if (stats) { received_msgs += batch.size(); received_bytes += batch.length(); } // discard local messages (sent by myself to me) if (discard_own_messages && local_addr != null && batch.sender() != null && local_addr.equals(batch.sender())) return; for (Message msg : batch) { if (up_handler != null) { try { up_handler.up(new Event(Event.MSG, msg)); } catch (Throwable t) { log.error(Util.getMessage("UpHandlerFailure"), t); } } else if (receiver != null) { try { receiver.receive(msg); } catch (Throwable t) { log.error(Util.getMessage("ReceiverFailure"), t); } } } } /** * Sends an event down the protocol stack. Note that - contrary to {@link #send(Message)}, if the * event is a message, no checks are performed whether the channel is closed or disconnected. * * @param evt the message to send down, encapsulated in an event */ public Object down(Event evt) { if (evt == null) return null; if (stats && evt.getType() == Event.MSG) { sent_msgs++; sent_bytes += ((Message) evt.getArg()).getLength(); } return prot_stack.down(evt); } @ManagedOperation public String toString(boolean details) { StringBuilder sb = new StringBuilder(); sb.append("local_addr=") .append(local_addr) .append('\n') .append("cluster_name=") .append(cluster_name) .append('\n') .append("my_view=") .append(my_view) .append('\n') .append("state=") .append(state) .append('\n'); if (details) { sb.append("discard_own_messages=").append(discard_own_messages).append('\n'); sb.append("state_transfer_supported=").append(state_transfer_supported).append('\n'); sb.append("props=").append(getProperties()).append('\n'); } return sb.toString(); } /* ----------------------------------- Private Methods ------------------------------------- */ protected Object invokeCallback(int type, Object arg) { switch (type) { case Event.MSG: receiver.receive((Message) arg); break; case Event.VIEW_CHANGE: receiver.viewAccepted((View) arg); break; case Event.SUSPECT: receiver.suspect((Address) arg); break; case Event.GET_APPLSTATE: byte[] tmp_state = null; if (receiver != null) { ByteArrayOutputStream output = new ByteArrayOutputStream(1024); try { receiver.getState(output); tmp_state = output.toByteArray(); } catch (Exception e) { throw new RuntimeException(local_addr + ": failed getting state from application", e); } } return new StateTransferInfo(null, 0L, tmp_state); case Event.BLOCK: receiver.block(); return true; case Event.UNBLOCK: receiver.unblock(); } return null; } protected final void init(ProtocolStackConfigurator configurator) throws Exception { List<ProtocolConfiguration> configs = configurator.getProtocolStack(); // replace vars with system props configs.forEach(ProtocolConfiguration::substituteVariables); prot_stack = new ProtocolStack(this); prot_stack.setup(configs); // Setup protocol stack (creates protocol, calls init() on them) } protected final void init(JChannel ch) throws Exception { if (ch == null) throw new IllegalArgumentException("channel is null"); prot_stack = new ProtocolStack(this); prot_stack.setup( ch.getProtocolStack()); // Setup protocol stack (creates protocol, calls init() on them) } /** * Initializes all variables. Used after <tt>close()</tt> or <tt>disconnect()</tt>, to be ready * for new <tt>connect()</tt> */ protected void init() { if (local_addr != null) down(new Event(Event.REMOVE_ADDRESS, local_addr)); local_addr = null; cluster_name = null; my_view = null; } protected void startStack(String cluster_name) throws Exception { /*make sure the channel is not closed*/ checkClosed(); /*make sure we have a valid channel name*/ if (cluster_name == null) log.debug("cluster_name is null, assuming unicast channel"); else this.cluster_name = cluster_name; if (socket_factory != null) prot_stack.getTopProtocol().setSocketFactory(socket_factory); prot_stack.startStack( cluster_name, local_addr); // calls start() in all protocols, from top to bottom /*create a temporary view, assume this channel is the only member and is the coordinator*/ List<Address> t = new ArrayList<>(1); t.add(local_addr); my_view = new View(local_addr, 0, t); // create a dummy view TP transport = prot_stack.getTransport(); transport.registerProbeHandler(probe_handler); } /** * Generates new UUID and sets local address. Sends down a REMOVE_ADDRESS (if existing address was * present) and a SET_LOCAL_ADDRESS */ protected void setAddress() { Address old_addr = local_addr; local_addr = generateAddress(); if (old_addr != null) down(new Event(Event.REMOVE_ADDRESS, old_addr)); if (name == null || name.isEmpty()) // generate a logical name if not set name = Util.generateLocalName(); if (name != null && !name.isEmpty()) UUID.add(local_addr, name); Event evt = new Event(Event.SET_LOCAL_ADDRESS, local_addr); down(evt); if (up_handler != null) up_handler.up(evt); } protected Address generateAddress() { if (address_generators == null || address_generators.isEmpty()) return UUID.randomUUID(); if (address_generators.size() == 1) return address_generators.get(0).generateAddress(); // at this point we have multiple AddressGenerators installed Address[] addrs = new Address[address_generators.size()]; for (int i = 0; i < addrs.length; i++) addrs[i] = address_generators.get(i).generateAddress(); for (int i = 0; i < addrs.length; i++) { if (!(addrs[i] instanceof ExtendedUUID)) { log.error( "address generator %s does not subclass %s which is required if multiple address generators " + "are installed, removing it", addrs[i].getClass().getSimpleName(), ExtendedUUID.class.getSimpleName()); addrs[i] = null; } } ExtendedUUID uuid = null; for (int i = 0; i < addrs.length; i++) { // we only have ExtendedUUIDs in addrs if (addrs[i] != null) { if (uuid == null) uuid = (ExtendedUUID) addrs[i]; else uuid.addContents((ExtendedUUID) addrs[i]); } } return uuid != null ? uuid : UUID.randomUUID(); } /** * health check<br> * throws a ChannelClosed exception if the channel is closed */ protected void checkClosed() { if (state == State.CLOSED) throw new IllegalStateException("channel is closed"); } protected void checkClosedOrNotConnected() { if (state == State.CLOSED) throw new IllegalStateException("channel is closed"); if (!(state == State.CONNECTING || state == State.CONNECTED)) throw new IllegalStateException("channel is disconnected"); } /** * Disconnects and closes the channel. This method does the following things * * <ol> * <li>Calls {@code this.disconnect} if the disconnect parameter is true * <li>Calls {@code ProtocolStack.stop} on the protocol stack * <li>Calls {@code ProtocolStack.destroy} on the protocol stack * <li>Sets the channel closed and channel connected flags to true and false * <li>Notifies any channel listener of the channel close operation * </ol> */ protected void _close(boolean disconnect) { Address old_addr = local_addr; if (state == State.CLOSED) return; if (disconnect) disconnect(); // leave group if connected stopStack(true, true); state = State.CLOSED; notifyChannelClosed(this); init(); // sets local_addr=null; changed March 18 2003 (bela) -- prevented successful rejoining if (old_addr != null) UUID.remove(old_addr); } protected void stopStack(boolean stop, boolean destroy) { if (prot_stack != null) { try { if (stop) prot_stack.stopStack(cluster_name); if (destroy) prot_stack.destroy(); } catch (Exception e) { log.error(Util.getMessage("StackDestroyFailure"), e); } TP transport = prot_stack.getTransport(); if (transport != null) transport.unregisterProbeHandler(probe_handler); } } public boolean flushSupported() { return flush_supported; } public void startFlush(boolean automatic_resume) throws Exception { if (!flushSupported()) throw new IllegalStateException( "Flush is not supported, add pbcast.FLUSH protocol to your configuration"); try { down(new Event(Event.SUSPEND)); } catch (Exception e) { throw new Exception("Flush failed", e.getCause()); } finally { if (automatic_resume) stopFlush(); } } public void startFlush(List<Address> flushParticipants, boolean automatic_resume) throws Exception { if (!flushSupported()) throw new IllegalStateException( "Flush is not supported, add pbcast.FLUSH protocol to your configuration"); View v = getView(); boolean validParticipants = v != null && v.getMembers().containsAll(flushParticipants); if (!validParticipants) throw new IllegalArgumentException( "Current view " + v + " does not contain all flush participants " + flushParticipants); try { down(new Event(Event.SUSPEND, flushParticipants)); } catch (Exception e) { throw new Exception("Flush failed", e.getCause()); } finally { if (automatic_resume) stopFlush(flushParticipants); } } public void stopFlush() { if (!flushSupported()) throw new IllegalStateException( "Flush is not supported, add pbcast.FLUSH protocol to your configuration"); down(new Event(Event.RESUME)); } public void stopFlush(List<Address> flushParticipants) { if (!flushSupported()) throw new IllegalStateException( "Flush is not supported, add pbcast.FLUSH protocol to your configuration"); down(new Event(Event.RESUME, flushParticipants)); } Address determineCoordinator() { List<Address> mbrs = my_view != null ? my_view.getMembers() : null; if (mbrs == null) return null; if (!mbrs.isEmpty()) return mbrs.iterator().next(); return null; } protected TimeScheduler getTimer() { if (prot_stack != null) { TP transport = prot_stack.getTransport(); if (transport != null) return transport.getTimer(); } return null; } /* ------------------------------- End of Private Methods ---------------------------------- */ class MyProbeHandler implements DiagnosticsHandler.ProbeHandler { public Map<String, String> handleProbe(String... keys) { Map<String, String> map = new HashMap<>(3); for (String key : keys) { if (key.startsWith("jmx")) { handleJmx(map, key); continue; } if (key.startsWith("reset-stats")) { resetAllStats(); continue; } if (key.startsWith("invoke") || key.startsWith("op")) { int index = key.indexOf("="); if (index != -1) { try { handleOperation(map, key.substring(index + 1)); } catch (Throwable throwable) { log.error( Util.getMessage("OperationInvocationFailure"), key.substring(index + 1), throwable); } } } } return map; } public String[] supportedKeys() { return new String[] {"reset-stats", "jmx", "op=<operation>[<args>]"}; } protected void resetAllStats() { List<Protocol> prots = getProtocolStack().getProtocols(); prots.forEach(Protocol::resetStatistics); resetStats(); } protected void handleJmx(Map<String, String> map, String input) { Map<String, Object> tmp_stats; int index = input.indexOf("="); if (index > -1) { List<String> list = null; String protocol_name = input.substring(index + 1); index = protocol_name.indexOf("."); if (index > -1) { String rest = protocol_name; protocol_name = protocol_name.substring(0, index); String attrs = rest.substring(index + 1); // e.g. "num_sent,msgs,num_received_msgs" list = Util.parseStringList(attrs, ","); // check if there are any attribute-sets in the list for (Iterator<String> it = list.iterator(); it.hasNext(); ) { String tmp = it.next(); index = tmp.indexOf("="); if (index != -1) { String attrname = tmp.substring(0, index); String attrvalue = tmp.substring(index + 1); Protocol prot = prot_stack.findProtocol(protocol_name); Field field = prot != null ? Util.getField(prot.getClass(), attrname) : null; if (field != null) { Object value = MethodCall.convert(attrvalue, field.getType()); if (value != null) prot.setValue(attrname, value); } else { // try to find a setter for X, e.g. x(type-of-x) or setX(type-of-x) ResourceDMBean.Accessor setter = ResourceDMBean.findSetter( prot, attrname); // Util.getSetter(prot.getClass(), attrname); if (setter != null) { try { Class<?> type = setter instanceof ResourceDMBean.FieldAccessor ? ((ResourceDMBean.FieldAccessor) setter).getField().getType() : setter instanceof ResourceDMBean.MethodAccessor ? ((ResourceDMBean.MethodAccessor) setter) .getMethod() .getParameterTypes()[0] .getClass() : null; Object converted_value = MethodCall.convert(attrvalue, type); setter.invoke(converted_value); } catch (Exception e) { log.error("unable to invoke %s() on %s: %s", setter, protocol_name, e); } } else log.warn(Util.getMessage("FieldNotFound"), attrname, protocol_name); } it.remove(); } } } tmp_stats = dumpStats(protocol_name, list); if (tmp_stats != null) { for (Map.Entry<String, Object> entry : tmp_stats.entrySet()) { Map<String, Object> tmp_map = (Map<String, Object>) entry.getValue(); String key = entry.getKey(); map.put(key, tmp_map != null ? tmp_map.toString() : null); } } } else { tmp_stats = dumpStats(); if (tmp_stats != null) { for (Map.Entry<String, Object> entry : tmp_stats.entrySet()) { Map<String, Object> tmp_map = (Map<String, Object>) entry.getValue(); String key = entry.getKey(); map.put(key, tmp_map != null ? tmp_map.toString() : null); } } } } /** * Invokes an operation and puts the return value into map * * @param map * @param operation Protocol.OperationName[args], e.g. STABLE.foo[arg1 arg2 arg3] */ protected void handleOperation(Map<String, String> map, String operation) throws Exception { int index = operation.indexOf("."); if (index == -1) throw new IllegalArgumentException( "operation " + operation + " is missing the protocol name"); String prot_name = operation.substring(0, index); Protocol prot = prot_stack.findProtocol(prot_name); if (prot == null) return; // less drastic than throwing an exception... int args_index = operation.indexOf("["); String method_name; if (args_index != -1) method_name = operation.substring(index + 1, args_index).trim(); else method_name = operation.substring(index + 1).trim(); String[] args = null; if (args_index != -1) { int end_index = operation.indexOf("]"); if (end_index == -1) throw new IllegalArgumentException("] not found"); List<String> str_args = Util.parseCommaDelimitedStrings(operation.substring(args_index + 1, end_index)); Object[] strings = str_args.toArray(); args = new String[strings.length]; for (int i = 0; i < strings.length; i++) args[i] = (String) strings[i]; } Method method = MethodCall.findMethod(prot.getClass(), method_name, args); if (method == null) { log.warn( Util.getMessage("MethodNotFound"), local_addr, prot.getClass().getSimpleName(), method_name); return; } MethodCall call = new MethodCall(method); Object[] converted_args = null; if (args != null) { converted_args = new Object[args.length]; Class<?>[] types = method.getParameterTypes(); for (int i = 0; i < args.length; i++) converted_args[i] = MethodCall.convert(args[i], types[i]); } Object retval = call.invoke(prot, converted_args); if (retval != null) map.put(prot_name + "." + method_name, retval.toString()); } } }
/** * Reliable unicast layer. Uses acknowledgement scheme similar to TCP to provide lossless * transmission of unicast messages (for reliable multicast see NAKACK layer). When a message is * sent to a peer for the first time, we add the pair <peer_addr, Entry> to the hashtable (peer * address is the key). All messages sent to that peer will be added to * hashtable.peer_addr.sent_msgs. When we receive a message from a peer for the first time, another * entry will be created and added to the hashtable (unless already existing). Msgs will then be * added to hashtable.peer_addr.received_msgs. * * <p>This layer is used to reliably transmit point-to-point messages, that is, either messages sent * to a single receiver (vs. messages multicast to a group) or for example replies to a multicast * message. The sender uses an <code>AckSenderWindow</code> which retransmits messages for which it * hasn't received an ACK, the receiver uses <code>AckReceiverWindow</code> which keeps track of the * lowest seqno received so far, and keeps messages in order. * * <p>Messages in both AckSenderWindows and AckReceiverWindows will be removed. A message will be * removed from AckSenderWindow when an ACK has been received for it and messages will be removed * from AckReceiverWindow whenever a message is received: the new message is added and then we try * to remove as many messages as possible (until we stop at a gap, or there are no more messages). * * @author Bela Ban */ @MBean(description = "Reliable unicast layer") public class UNICAST extends Protocol implements AgeOutCache.Handler<Address> { public static final long DEFAULT_FIRST_SEQNO = Global.DEFAULT_FIRST_UNICAST_SEQNO; /* ------------------------------------------ Properties ------------------------------------------ */ @Deprecated protected int[] timeout = { 400, 800, 1600, 3200 }; // for AckSenderWindow: max time to wait for missing acks @Property( description = "Max number of messages to be removed from a retransmit window. This property might " + "get removed anytime, so don't use it !") protected int max_msg_batch_size = 500; @Property( description = "Time (in milliseconds) after which an idle incoming or outgoing connection is closed. The " + "connection will get re-established when used again. 0 disables connection reaping") protected long conn_expiry_timeout = 0; @Deprecated @Property( description = "Size (in bytes) of a Segment in the segments table. Only for experts, do not use !", deprecatedMessage = "not used anymore") protected int segment_capacity = 1000; @Property( description = "Number of rows of the matrix in the retransmission table (only for experts)", writable = false) protected int xmit_table_num_rows = 100; @Property( description = "Number of elements of a row of the matrix in the retransmission table (only for experts). " + "The capacity of the matrix is xmit_table_num_rows * xmit_table_msgs_per_row", writable = false) protected int xmit_table_msgs_per_row = 1000; @Property( description = "Resize factor of the matrix in the retransmission table (only for experts)", writable = false) protected double xmit_table_resize_factor = 1.2; @Property( description = "Number of milliseconds after which the matrix in the retransmission table " + "is compacted (only for experts)", writable = false) protected long xmit_table_max_compaction_time = 10 * 60 * 1000; // @Property(description="Max time (in ms) after which a connection to a non-member is closed") protected long max_retransmit_time = 60 * 1000L; @Property( description = "Interval (in milliseconds) at which messages in the send windows are resent") protected long xmit_interval = 2000; /* --------------------------------------------- JMX ---------------------------------------------- */ protected long num_msgs_sent = 0, num_msgs_received = 0; protected long num_acks_sent = 0, num_acks_received = 0, num_xmits = 0; /* --------------------------------------------- Fields ------------------------------------------------ */ protected final ConcurrentMap<Address, SenderEntry> send_table = Util.createConcurrentMap(); protected final ConcurrentMap<Address, ReceiverEntry> recv_table = Util.createConcurrentMap(); protected final ReentrantLock recv_table_lock = new ReentrantLock(); /** RetransmitTask running every xmit_interval ms */ protected Future<?> xmit_task; protected volatile List<Address> members = new ArrayList<Address>(11); protected Address local_addr = null; protected TimeScheduler timer = null; // used for retransmissions (passed to AckSenderWindow) protected volatile boolean running = false; protected short last_conn_id = 0; protected AgeOutCache<Address> cache = null; protected Future<?> connection_reaper; // closes idle connections public int[] getTimeout() { return timeout; } @Deprecated @Property( name = "timeout", converter = PropertyConverters.IntegerArray.class, deprecatedMessage = "not used anymore") public void setTimeout(int[] val) { if (val != null) timeout = val; } public void setMaxMessageBatchSize(int size) { if (size >= 1) max_msg_batch_size = size; } @ManagedAttribute public String getLocalAddress() { return local_addr != null ? local_addr.toString() : "null"; } @ManagedAttribute public String getMembers() { return members.toString(); } @ManagedAttribute(description = "Whether the ConnectionReaper task is running") public boolean isConnectionReaperRunning() { return connection_reaper != null && !connection_reaper.isDone(); } @ManagedAttribute(description = "Returns the number of outgoing (send) connections") public int getNumSendConnections() { return send_table.size(); } @ManagedAttribute(description = "Returns the number of incoming (receive) connections") public int getNumReceiveConnections() { return recv_table.size(); } @ManagedAttribute( description = "Returns the total number of outgoing (send) and incoming (receive) connections") public int getNumConnections() { return getNumReceiveConnections() + getNumSendConnections(); } @ManagedOperation public String printConnections() { StringBuilder sb = new StringBuilder(); if (!send_table.isEmpty()) { sb.append("\nsend connections:\n"); for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); } } if (!recv_table.isEmpty()) { sb.append("\nreceive connections:\n"); for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); } } return sb.toString(); } @ManagedAttribute public long getNumMessagesSent() { return num_msgs_sent; } @ManagedAttribute public long getNumMessagesReceived() { return num_msgs_received; } @ManagedAttribute public long getNumAcksSent() { return num_acks_sent; } @ManagedAttribute public long getNumAcksReceived() { return num_acks_received; } @ManagedAttribute public long getNumXmits() { return num_xmits; } public long getMaxRetransmitTime() { return max_retransmit_time; } @Property( description = "Max number of milliseconds we try to retransmit a message to any given member. After that, " + "the connection is removed. Any new connection to that member will start with seqno #1 again. 0 disables this") public void setMaxRetransmitTime(long max_retransmit_time) { this.max_retransmit_time = max_retransmit_time; if (cache != null && max_retransmit_time > 0) cache.setTimeout(max_retransmit_time); } @ManagedAttribute(description = "Is the retransmit task running") public boolean isXmitTaskRunning() { return xmit_task != null && !xmit_task.isDone(); } @ManagedAttribute public int getAgeOutCacheSize() { return cache != null ? cache.size() : 0; } @ManagedOperation public String printAgeOutCache() { return cache != null ? cache.toString() : "n/a"; } public AgeOutCache<Address> getAgeOutCache() { return cache; } /** Used for testing only */ public boolean hasSendConnectionTo(Address dest) { return send_table.containsKey(dest); } /** The number of messages in all Entry.sent_msgs tables (haven't received an ACK yet) */ @ManagedAttribute public int getNumUnackedMessages() { int num = 0; for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) num += entry.sent_msgs.size(); } return num; } @ManagedAttribute public int getNumberOfMessagesInReceiveWindows() { int num = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) num += entry.received_msgs.size(); } return num; } @ManagedAttribute(description = "Total number of undelivered messages in all receive windows") public long getXmitTableUndeliveredMessages() { long retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.size(); } return retval; } @ManagedAttribute(description = "Total number of missing messages in all receive windows") public long getXmitTableMissingMessages() { long retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumMissing(); } return retval; } @ManagedAttribute(description = "Number of compactions in all (receive and send) windows") public int getXmitTableNumCompactions() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumCompactions(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumCompactions(); } return retval; } @ManagedAttribute(description = "Number of moves in all (receive and send) windows") public int getXmitTableNumMoves() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumMoves(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumMoves(); } return retval; } @ManagedAttribute(description = "Number of resizes in all (receive and send) windows") public int getXmitTableNumResizes() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumResizes(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumResizes(); } return retval; } @ManagedAttribute(description = "Number of purges in all (receive and send) windows") public int getXmitTableNumPurges() { int retval = 0; for (ReceiverEntry entry : recv_table.values()) { if (entry.received_msgs != null) retval += entry.received_msgs.getNumPurges(); } for (SenderEntry entry : send_table.values()) { if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumPurges(); } return retval; } @ManagedOperation(description = "Prints the contents of the receive windows for all members") public String printReceiveWindowMessages() { StringBuilder ret = new StringBuilder(local_addr + ":\n"); for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { Address addr = entry.getKey(); Table<Message> buf = entry.getValue().received_msgs; ret.append(addr).append(": ").append(buf.toString()).append('\n'); } return ret.toString(); } @ManagedOperation(description = "Prints the contents of the send windows for all members") public String printSendWindowMessages() { StringBuilder ret = new StringBuilder(local_addr + ":\n"); for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { Address addr = entry.getKey(); Table<Message> buf = entry.getValue().sent_msgs; ret.append(addr).append(": ").append(buf.toString()).append('\n'); } return ret.toString(); } public void resetStats() { num_msgs_sent = num_msgs_received = num_acks_sent = num_acks_received = 0; num_xmits = 0; } public Map<String, Object> dumpStats() { Map<String, Object> m = super.dumpStats(); m.put("num_unacked_msgs", getNumUnackedMessages()); m.put("num_msgs_in_recv_windows", getNumberOfMessagesInReceiveWindows()); return m; } public void start() throws Exception { timer = getTransport().getTimer(); if (timer == null) throw new Exception("timer is null"); if (max_retransmit_time > 0) cache = new AgeOutCache<Address>(timer, max_retransmit_time, this); running = true; if (conn_expiry_timeout > 0) startConnectionReaper(); startRetransmitTask(); } public void stop() { running = false; stopRetransmitTask(); stopConnectionReaper(); removeAllConnections(); } public Object up(Event evt) { switch (evt.getType()) { case Event.MSG: Message msg = (Message) evt.getArg(); if (msg.getDest() == null || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) // only handle unicast messages break; // pass up UnicastHeader hdr = (UnicastHeader) msg.getHeader(this.id); if (hdr == null) break; Address sender = msg.getSrc(); switch (hdr.type) { case UnicastHeader.DATA: // received regular message handleDataReceived(sender, hdr.seqno, hdr.conn_id, hdr.first, msg, evt); break; default: handleUpEvent(sender, hdr); break; } return null; } return up_prot.up(evt); // Pass up to the layer above us } protected void handleUpEvent(Address sender, UnicastHeader hdr) { switch (hdr.type) { case UnicastHeader.DATA: // received regular message throw new IllegalStateException( "header of type DATA is not supposed to be handled by this method"); case UnicastHeader.ACK: // received ACK for previously sent message handleAckReceived(sender, hdr.seqno, hdr.conn_id); break; case UnicastHeader.SEND_FIRST_SEQNO: handleResendingOfFirstMessage(sender, hdr.seqno); break; default: log.error("UnicastHeader type " + hdr.type + " not known !"); break; } } public void up(MessageBatch batch) { if (batch.dest() == null) { // not a unicast batch up_prot.up(batch); return; } int size = batch.size(); Map<Short, List<Message>> msgs = new TreeMap<Short, List<Message>>(); // map of messages, keyed by conn-id for (Message msg : batch) { if (msg == null || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) continue; UnicastHeader hdr = (UnicastHeader) msg.getHeader(id); if (hdr == null) continue; batch.remove(msg); // remove the message from the batch, so it won't be passed up the stack if (hdr.type != UnicastHeader.DATA) { try { handleUpEvent(msg.getSrc(), hdr); } catch (Throwable t) { // we cannot let an exception terminate the processing of this batch log.error(local_addr + ": failed handling event", t); } continue; } List<Message> list = msgs.get(hdr.conn_id); if (list == null) msgs.put(hdr.conn_id, list = new ArrayList<Message>(size)); list.add(msg); } if (!msgs.isEmpty()) handleBatchReceived(batch.sender(), msgs); // process msgs: if (!batch.isEmpty()) up_prot.up(batch); } public Object down(Event evt) { switch (evt.getType()) { case Event.MSG: // Add UnicastHeader, add to AckSenderWindow and pass down Message msg = (Message) evt.getArg(); Address dst = msg.getDest(); /* only handle unicast messages */ if (dst == null || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) break; if (!running) { if (log.isTraceEnabled()) log.trace("discarded message as start() has not yet been called, message: " + msg); return null; } SenderEntry entry = send_table.get(dst); if (entry == null) { entry = new SenderEntry(getNewConnectionId()); SenderEntry existing = send_table.putIfAbsent(dst, entry); if (existing != null) entry = existing; else { if (log.isTraceEnabled()) log.trace( local_addr + ": created sender window for " + dst + " (conn-id=" + entry.send_conn_id + ")"); if (cache != null && !members.contains(dst)) cache.add(dst); } } short send_conn_id = entry.send_conn_id; long seqno = entry.sent_msgs_seqno.getAndIncrement(); long sleep = 10; do { try { msg.putHeader( this.id, UnicastHeader.createDataHeader(seqno, send_conn_id, seqno == DEFAULT_FIRST_SEQNO)); entry.sent_msgs.add(seqno, msg); // add *including* UnicastHeader, adds to retransmitter if (conn_expiry_timeout > 0) entry.update(); break; } catch (Throwable t) { if (!running) break; Util.sleep(sleep); sleep = Math.min(5000, sleep * 2); } } while (running); if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr) .append(" --> DATA(") .append(dst) .append(": #") .append(seqno) .append(", conn_id=") .append(send_conn_id); if (seqno == DEFAULT_FIRST_SEQNO) sb.append(", first"); sb.append(')'); log.trace(sb); } num_msgs_sent++; return down_prot.down(evt); case Event.VIEW_CHANGE: // remove connections to peers that are not members anymore ! View view = (View) evt.getArg(); List<Address> new_members = view.getMembers(); Set<Address> non_members = new HashSet<Address>(send_table.keySet()); non_members.addAll(recv_table.keySet()); members = new_members; non_members.removeAll(new_members); if (cache != null) cache.removeAll(new_members); if (!non_members.isEmpty()) { if (log.isTraceEnabled()) log.trace("removing non members " + non_members); for (Address non_mbr : non_members) removeConnection(non_mbr); } break; case Event.SET_LOCAL_ADDRESS: local_addr = (Address) evt.getArg(); break; } return down_prot.down(evt); // Pass on to the layer below us } /** * Removes and resets from connection table (which is already locked). Returns true if member was * found, otherwise false. This method is public only so it can be invoked by unit testing, but * should not otherwise be used ! */ public void removeConnection(Address mbr) { removeSendConnection(mbr); removeReceiveConnection(mbr); } public void removeSendConnection(Address mbr) { send_table.remove(mbr); } public void removeReceiveConnection(Address mbr) { recv_table.remove(mbr); } /** * This method is public only so it can be invoked by unit testing, but should not otherwise be * used ! */ @ManagedOperation( description = "Trashes all connections to other nodes. This is only used for testing") public void removeAllConnections() { send_table.clear(); recv_table.clear(); } /** Called by AckSenderWindow to resend messages for which no ACK has been received yet */ public void retransmit(Message msg) { if (log.isTraceEnabled()) { UnicastHeader hdr = (UnicastHeader) msg.getHeader(id); long seqno = hdr != null ? hdr.seqno : -1; log.trace(local_addr + " --> XMIT(" + msg.getDest() + ": #" + seqno + ')'); } down_prot.down(new Event(Event.MSG, msg)); num_xmits++; } /** * Called by AgeOutCache, to removed expired connections * * @param key */ public void expired(Address key) { if (key != null) { if (log.isDebugEnabled()) log.debug("removing connection to " + key + " because it expired"); removeConnection(key); } } /** * Check whether the hashtable contains an entry e for <code>sender</code> (create if not). If * e.received_msgs is null and <code>first</code> is true: create a new AckReceiverWindow(seqno) * and add message. Set e.received_msgs to the new window. Else just add the message. */ protected void handleDataReceived( Address sender, long seqno, short conn_id, boolean first, Message msg, Event evt) { if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr).append(" <-- DATA(").append(sender).append(": #").append(seqno); if (conn_id != 0) sb.append(", conn_id=").append(conn_id); if (first) sb.append(", first"); sb.append(')'); log.trace(sb); } ReceiverEntry entry = getReceiverEntry(sender, seqno, first, conn_id); if (entry == null) return; if (conn_expiry_timeout > 0) entry.update(); Table<Message> win = entry.received_msgs; boolean added = win.add(seqno, msg); // win is guaranteed to be non-null if we get here num_msgs_received++; // An OOB message is passed up immediately. Later, when remove() is called, we discard it. This // affects ordering ! // http://jira.jboss.com/jira/browse/JGRP-377 if (msg.isFlagSet(Message.Flag.OOB) && added) { try { up_prot.up(evt); } catch (Throwable t) { log.error("couldn't deliver OOB message " + msg, t); } } final AtomicBoolean processing = win.getProcessing(); if (!processing.compareAndSet(false, true)) { return; } // try to remove (from the AckReceiverWindow) as many messages as possible as pass them up // Prevents concurrent passing up of messages by different threads // (http://jira.jboss.com/jira/browse/JGRP-198); // this is all the more important once we have a concurrent stack // (http://jira.jboss.com/jira/browse/JGRP-181), // where lots of threads can come up to this point concurrently, but only 1 is allowed to pass // at a time // We *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1, // P2, Q2 can result in // delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to be delivered // only in the // order in which they were sent by their senders removeAndDeliver(processing, win, sender); sendAck(sender, win.getHighestDelivered(), conn_id); } protected void handleBatchReceived(Address sender, Map<Short, List<Message>> map) { for (Map.Entry<Short, List<Message>> element : map.entrySet()) { final List<Message> msg_list = element.getValue(); if (log.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append(local_addr) .append(" <-- DATA(") .append(sender) .append(": " + printMessageList(msg_list)) .append(')'); log.trace(sb); } short conn_id = element.getKey(); ReceiverEntry entry = null; for (Message msg : msg_list) { UnicastHeader hdr = (UnicastHeader) msg.getHeader(id); entry = getReceiverEntry(sender, hdr.seqno, hdr.first, conn_id); if (entry == null) continue; Table<Message> win = entry.received_msgs; boolean msg_added = win.add(hdr.seqno, msg); // win is guaranteed to be non-null if we get here num_msgs_received++; if (hdr.first && msg_added) sendAck( sender, hdr.seqno, conn_id); // send an ack immediately when we received the first message of a conn // An OOB message is passed up immediately. Later, when remove() is called, we discard it. // This affects ordering ! // http://jira.jboss.com/jira/browse/JGRP-377 if (msg.isFlagSet(Message.Flag.OOB) && msg_added) { try { up_prot.up(new Event(Event.MSG, msg)); } catch (Throwable t) { log.error("couldn't deliver OOB message " + msg, t); } } } if (entry != null && conn_expiry_timeout > 0) entry.update(); } ReceiverEntry entry = recv_table.get(sender); Table<Message> win = entry != null ? entry.received_msgs : null; if (win != null) { final AtomicBoolean processing = win.getProcessing(); if (processing.compareAndSet(false, true)) { removeAndDeliver(processing, win, sender); sendAck(sender, win.getHighestDeliverable(), entry.recv_conn_id); } } } /** * Try to remove as many messages as possible from the table as pass them up. Prevents concurrent * passing up of messages by different threads (http://jira.jboss.com/jira/browse/JGRP-198); lots * of threads can come up to this point concurrently, but only 1 is allowed to pass at a time. We * *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1, P2, Q2 * can result in delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to * be delivered in the order in which they were sent */ protected int removeAndDeliver( final AtomicBoolean processing, Table<Message> win, Address sender) { int retval = 0; boolean released_processing = false; try { while (true) { List<Message> list = win.removeMany(processing, true, max_msg_batch_size); if (list == null) { released_processing = true; return retval; } MessageBatch batch = new MessageBatch(local_addr, sender, null, false, list); for (Message msg_to_deliver : batch) { // discard OOB msg: it has already been delivered // (http://jira.jboss.com/jira/browse/JGRP-377) if (msg_to_deliver.isFlagSet(Message.Flag.OOB)) batch.remove(msg_to_deliver); } try { if (log.isTraceEnabled()) { Message first = batch.first(), last = batch.last(); StringBuilder sb = new StringBuilder(local_addr + ": delivering"); if (first != null && last != null) { UnicastHeader hdr1 = (UnicastHeader) first.getHeader(id), hdr2 = (UnicastHeader) last.getHeader(id); sb.append(" #").append(hdr1.seqno).append(" - #").append(hdr2.seqno); } sb.append(" (" + batch.size()).append(" messages)"); log.trace(sb); } up_prot.up(batch); } catch (Throwable t) { log.error("failed to deliver batch " + batch, t); } } } finally { // processing is always set in win.remove(processing) above and never here ! This code is just // a // 2nd line of defense should there be an exception before win.remove(processing) sets // processing if (!released_processing) processing.set(false); } } protected ReceiverEntry getReceiverEntry( Address sender, long seqno, boolean first, short conn_id) { ReceiverEntry entry = recv_table.get(sender); if (entry != null && entry.recv_conn_id == conn_id) return entry; recv_table_lock.lock(); try { entry = recv_table.get(sender); if (first) { if (entry == null) { entry = getOrCreateReceiverEntry(sender, seqno, conn_id); } else { // entry != null && win != null if (conn_id != entry.recv_conn_id) { if (log.isTraceEnabled()) log.trace( local_addr + ": conn_id=" + conn_id + " != " + entry.recv_conn_id + "; resetting receiver window"); recv_table.remove(sender); entry = getOrCreateReceiverEntry(sender, seqno, conn_id); } else {; } } } else { // entry == null && win == null OR entry != null && win == null OR entry != null && // win != null if (entry == null || entry.recv_conn_id != conn_id) { recv_table_lock.unlock(); sendRequestForFirstSeqno(sender, seqno); // drops the message and returns (see below) return null; } } return entry; } finally { if (recv_table_lock.isHeldByCurrentThread()) recv_table_lock.unlock(); } } protected ReceiverEntry getOrCreateReceiverEntry(Address sender, long seqno, short conn_id) { Table<Message> table = new Table<Message>( xmit_table_num_rows, xmit_table_msgs_per_row, seqno - 1, xmit_table_resize_factor, xmit_table_max_compaction_time); ReceiverEntry entry = new ReceiverEntry(table, conn_id); ReceiverEntry entry2 = recv_table.putIfAbsent(sender, entry); if (entry2 != null) return entry2; if (log.isTraceEnabled()) log.trace( local_addr + ": created receiver window for " + sender + " at seqno=#" + seqno + " for conn-id=" + conn_id); return entry; } protected void handleAckReceived(Address sender, long seqno, short conn_id) { if (log.isTraceEnabled()) log.trace( new StringBuilder() .append(local_addr) .append(" <-- ACK(") .append(sender) .append(": #") .append(seqno) .append(", conn-id=") .append(conn_id) .append(')')); SenderEntry entry = send_table.get(sender); if (entry != null && entry.send_conn_id != conn_id) { if (log.isTraceEnabled()) log.trace( local_addr + ": my conn_id (" + entry.send_conn_id + ") != received conn_id (" + conn_id + "); discarding ACK"); return; } Table<Message> win = entry != null ? entry.sent_msgs : null; if (win != null) { win.purge(seqno, true); // removes all messages <= seqno (forced purge) num_acks_received++; } } /** * We need to resend our first message with our conn_id * * @param sender * @param seqno Resend the non null messages in the range [lowest .. seqno] */ protected void handleResendingOfFirstMessage(Address sender, long seqno) { if (log.isTraceEnabled()) log.trace(local_addr + " <-- SEND_FIRST_SEQNO(" + sender + "," + seqno + ")"); SenderEntry entry = send_table.get(sender); Table<Message> win = entry != null ? entry.sent_msgs : null; if (win == null) { if (log.isWarnEnabled()) log.warn(local_addr + ": sender window for " + sender + " not found"); return; } boolean first_sent = false; for (long i = win.getLow() + 1; i <= seqno; i++) { Message rsp = win.get(i); if (rsp == null) continue; if (first_sent) { down_prot.down(new Event(Event.MSG, rsp)); } else { first_sent = true; // We need to copy the UnicastHeader and put it back into the message because Message.copy() // doesn't copy // the headers and therefore we'd modify the original message in the sender retransmission // window // (https://jira.jboss.org/jira/browse/JGRP-965) Message copy = rsp.copy(); UnicastHeader hdr = (UnicastHeader) copy.getHeader(this.id); UnicastHeader newhdr = hdr.copy(); newhdr.first = true; copy.putHeader(this.id, newhdr); down_prot.down(new Event(Event.MSG, copy)); } } } protected void startRetransmitTask() { if (xmit_task == null || xmit_task.isDone()) xmit_task = timer.scheduleWithFixedDelay( new RetransmitTask(), 0, xmit_interval, TimeUnit.MILLISECONDS); } protected void stopRetransmitTask() { if (xmit_task != null) { xmit_task.cancel(true); xmit_task = null; } } protected void sendAck(Address dst, long seqno, short conn_id) { if (!running) // if we are disconnected, then don't send any acks which throw exceptions on // shutdown return; Message ack = new Message(dst) .setFlag(Message.Flag.INTERNAL) .putHeader(this.id, UnicastHeader.createAckHeader(seqno, conn_id)); if (log.isTraceEnabled()) log.trace( new StringBuilder() .append(local_addr) .append(" --> ACK(") .append(dst) .append(": #") .append(seqno) .append(')')); try { down_prot.down(new Event(Event.MSG, ack)); num_acks_sent++; } catch (Throwable t) { log.error("failed sending ACK(" + seqno + ") to " + dst, t); } } protected synchronized void startConnectionReaper() { if (connection_reaper == null || connection_reaper.isDone()) connection_reaper = timer.scheduleWithFixedDelay( new ConnectionReaper(), conn_expiry_timeout, conn_expiry_timeout, TimeUnit.MILLISECONDS); } protected synchronized void stopConnectionReaper() { if (connection_reaper != null) connection_reaper.cancel(false); } protected synchronized short getNewConnectionId() { short retval = last_conn_id; if (last_conn_id >= Short.MAX_VALUE || last_conn_id < 0) last_conn_id = 0; else last_conn_id++; return retval; } protected void sendRequestForFirstSeqno(Address dest, long seqno_received) { Message msg = new Message(dest).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL); UnicastHeader hdr = UnicastHeader.createSendFirstSeqnoHeader(seqno_received); msg.putHeader(this.id, hdr); if (log.isTraceEnabled()) log.trace(local_addr + " --> SEND_FIRST_SEQNO(" + dest + "," + seqno_received + ")"); down_prot.down(new Event(Event.MSG, msg)); } @ManagedOperation( description = "Closes connections that have been idle for more than conn_expiry_timeout ms") public void reapIdleConnections() { // remove expired connections from send_table for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) { SenderEntry val = entry.getValue(); long age = val.age(); if (age >= conn_expiry_timeout) { removeSendConnection(entry.getKey()); if (log.isDebugEnabled()) log.debug( local_addr + ": removed expired connection for " + entry.getKey() + " (" + age + " ms old) from send_table"); } } // remove expired connections from recv_table for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) { ReceiverEntry val = entry.getValue(); long age = val.age(); if (age >= conn_expiry_timeout) { removeReceiveConnection(entry.getKey()); if (log.isDebugEnabled()) log.debug( local_addr + ": removed expired connection for " + entry.getKey() + " (" + age + " ms old) from recv_table"); } } } protected String printMessageList(List<Message> list) { StringBuilder sb = new StringBuilder(); int size = list.size(); Message first = size > 0 ? list.get(0) : null, second = size > 1 ? list.get(size - 1) : first; UnicastHeader hdr; if (first != null) { hdr = (UnicastHeader) first.getHeader(id); if (hdr != null) sb.append("#" + hdr.seqno); } if (second != null) { hdr = (UnicastHeader) second.getHeader(id); if (hdr != null) sb.append(" - #" + hdr.seqno); } return sb.toString(); } /** * The following types and fields are serialized: * * <pre> * | DATA | seqno | conn_id | first | * | ACK | seqno | * | SEND_FIRST_SEQNO | * </pre> */ public static class UnicastHeader extends Header { public static final byte DATA = 0; public static final byte ACK = 1; public static final byte SEND_FIRST_SEQNO = 2; byte type; long seqno; // DATA and ACK short conn_id; // DATA boolean first; // DATA public UnicastHeader() {} // used for externalization public static UnicastHeader createDataHeader(long seqno, short conn_id, boolean first) { return new UnicastHeader(DATA, seqno, conn_id, first); } public static UnicastHeader createAckHeader(long seqno, short conn_id) { return new UnicastHeader(ACK, seqno, conn_id, false); } public static UnicastHeader createSendFirstSeqnoHeader(long seqno_received) { return new UnicastHeader(SEND_FIRST_SEQNO, seqno_received); } protected UnicastHeader(byte type, long seqno) { this.type = type; this.seqno = seqno; } protected UnicastHeader(byte type, long seqno, short conn_id, boolean first) { this.type = type; this.seqno = seqno; this.conn_id = conn_id; this.first = first; } public long getSeqno() { return seqno; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append(type2Str(type)).append(", seqno=").append(seqno); if (conn_id != 0) sb.append(", conn_id=").append(conn_id); if (first) sb.append(", first"); return sb.toString(); } public static String type2Str(byte t) { switch (t) { case DATA: return "DATA"; case ACK: return "ACK"; case SEND_FIRST_SEQNO: return "SEND_FIRST_SEQNO"; default: return "<unknown>"; } } public final int size() { int retval = Global.BYTE_SIZE; // type switch (type) { case DATA: retval += Bits.size(seqno) // seqno + Global.SHORT_SIZE // conn_id + Global.BYTE_SIZE; // first break; case ACK: retval += Bits.size(seqno) + Global.SHORT_SIZE; // conn_id break; case SEND_FIRST_SEQNO: retval += Bits.size(seqno); break; } return retval; } public UnicastHeader copy() { return new UnicastHeader(type, seqno, conn_id, first); } public void writeTo(DataOutput out) throws Exception { out.writeByte(type); switch (type) { case DATA: Bits.writeLong(seqno, out); out.writeShort(conn_id); out.writeBoolean(first); break; case ACK: Bits.writeLong(seqno, out); out.writeShort(conn_id); break; case SEND_FIRST_SEQNO: Bits.writeLong(seqno, out); break; } } public void readFrom(DataInput in) throws Exception { type = in.readByte(); switch (type) { case DATA: seqno = Bits.readLong(in); conn_id = in.readShort(); first = in.readBoolean(); break; case ACK: seqno = Bits.readLong(in); conn_id = in.readShort(); break; case SEND_FIRST_SEQNO: seqno = Bits.readLong(in); break; } } } protected final class SenderEntry { // stores (and retransmits) msgs sent by us to a certain peer final Table<Message> sent_msgs; final AtomicLong sent_msgs_seqno = new AtomicLong(DEFAULT_FIRST_SEQNO); // seqno for msgs sent by us final short send_conn_id; protected final AtomicLong timestamp = new AtomicLong(0); final Lock lock = new ReentrantLock(); public SenderEntry(short send_conn_id) { this.send_conn_id = send_conn_id; this.sent_msgs = new Table<Message>( xmit_table_num_rows, xmit_table_msgs_per_row, 0, xmit_table_resize_factor, xmit_table_max_compaction_time); update(); } void update() { timestamp.set(System.currentTimeMillis()); } long age() { return System.currentTimeMillis() - timestamp.longValue(); } public String toString() { StringBuilder sb = new StringBuilder(); if (sent_msgs != null) sb.append(sent_msgs).append(", "); sb.append("send_conn_id=" + send_conn_id).append(" (" + age() + " ms old)"); return sb.toString(); } } protected static final class ReceiverEntry { protected final Table<Message> received_msgs; // stores all msgs rcvd by a certain peer in seqno-order protected final short recv_conn_id; protected final AtomicLong timestamp = new AtomicLong(0); public ReceiverEntry(Table<Message> received_msgs, short recv_conn_id) { this.received_msgs = received_msgs; this.recv_conn_id = recv_conn_id; update(); } void update() { timestamp.set(System.currentTimeMillis()); } long age() { return System.currentTimeMillis() - timestamp.longValue(); } public String toString() { StringBuilder sb = new StringBuilder(); if (received_msgs != null) sb.append(received_msgs).append(", "); sb.append("recv_conn_id=" + recv_conn_id); sb.append(" (" + age() + " ms old)"); return sb.toString(); } } protected class ConnectionReaper implements Runnable { public void run() { reapIdleConnections(); } public String toString() { return UNICAST.class.getSimpleName() + ": ConnectionReaper (interval=" + conn_expiry_timeout + " ms)"; } } /** * Retransmitter task which periodically (every xmit_interval ms) looks at all the retransmit * (send) tables and re-sends messages for which we haven't received an ack yet */ protected class RetransmitTask implements Runnable { public void run() { for (SenderEntry val : send_table.values()) { Table<Message> buf = val != null ? val.sent_msgs : null; if (buf != null && !buf.isEmpty()) { long from = buf.getHighestDelivered() + 1, to = buf.getHighestReceived(); List<Message> list = buf.get(from, to); if (list != null) { for (Message msg : list) retransmit(msg); } } } } public String toString() { return UNICAST.class.getSimpleName() + ": RetransmitTask (interval=" + xmit_interval + " ms)"; } } }
/** * Starts the merge protocol (only run by the merge leader). Essentially sends a MERGE_REQ to all * coordinators of all subgroups found. Each coord receives its digest and view and returns it. * The leader then computes the digest and view for the new group from the return values. Finally, * it sends this merged view/digest to all subgroup coordinators; each coordinator will install it * in their subgroup. */ class MergeTask implements Runnable { private Thread thread = null; /** List of all subpartition coordinators and their members */ private final ConcurrentMap<Address, Collection<Address>> coords = Util.createConcurrentMap(8, 0.75f, 8); /** * @param views Guaranteed to be non-null and to have >= 2 members, or else this thread would * not be started */ public synchronized void start(Map<Address, View> views) { if (thread != null && thread.isAlive()) // the merge thread is already running return; this.coords.clear(); // now remove all members which don't have us in their view, so RPCs won't block (e.g. FLUSH) // https://jira.jboss.org/browse/JGRP-1061 sanitizeViews(views); // Add all different coordinators of the views into the hashmap and sets their members: Collection<Address> coordinators = Util.determineMergeCoords(views); for (Address coord : coordinators) { View view = views.get(coord); if (view != null) this.coords.put(coord, new ArrayList<Address>(view.getMembers())); } // For the merge participants which are not coordinator, we simply add them, and the // associated // membership list consists only of themselves Collection<Address> merge_participants = Util.determineMergeParticipants(views); merge_participants.removeAll(coordinators); for (Address merge_participant : merge_participants) { Collection<Address> tmp = new ArrayList<Address>(); tmp.add(merge_participant); coords.putIfAbsent(merge_participant, tmp); } thread = gms.getThreadFactory().newThread(this, "MergeTask"); thread.setDaemon(true); thread.start(); } public synchronized void stop() { Thread tmp = thread; if (thread != null && thread.isAlive()) tmp.interrupt(); thread = null; } public synchronized boolean isRunning() { return thread != null && thread.isAlive(); } public void run() { // 1. Generate merge_id final MergeId new_merge_id = MergeId.create(gms.local_addr); final Collection<Address> coordsCopy = new ArrayList<Address>(coords.keySet()); long start = System.currentTimeMillis(); try { _run(new_merge_id, coordsCopy); // might remove members from coordsCopy } catch (Throwable ex) { if (log.isWarnEnabled()) log.warn(gms.local_addr + ": " + ex + ", merge is cancelled"); sendMergeCancelledMessage(coordsCopy, new_merge_id); cancelMerge( new_merge_id); // the message above cancels the merge, too, but this is a 2nd line of // defense } finally { /* 5. if flush is in stack stop the flush for entire cluster [JGRP-700] - FLUSH: flushing should span merge */ if (gms.flushProtocolInStack) gms.stopFlush(); thread = null; } long diff = System.currentTimeMillis() - start; if (log.isDebugEnabled()) log.debug(gms.local_addr + ": merge " + new_merge_id + " took " + diff + " ms"); } /** Runs the merge protocol as a leader */ protected void _run(MergeId new_merge_id, final Collection<Address> coordsCopy) throws Exception { boolean success = setMergeId(null, new_merge_id); if (!success) { log.warn("failed to set my own merge_id (" + merge_id + ") to " + new_merge_id); return; } if (log.isDebugEnabled()) log.debug( gms.local_addr + ": merge task " + merge_id + " started with " + coords.keySet().size() + " coords"); /* 2. Fetch the current Views/Digests from all subgroup coordinators */ success = getMergeDataFromSubgroupCoordinators(coords, new_merge_id, gms.merge_timeout); List<Address> missing = null; if (!success) { missing = merge_rsps.getMissing(); if (log.isDebugEnabled()) log.debug( "merge leader " + gms.local_addr + " did not get responses from all " + coords.keySet().size() + " partition coordinators; missing responses from " + missing.size() + " members, removing them from the merge"); merge_rsps.remove(missing); } /* 3. Remove null or rejected merge responses from merge_rsp and coords (so we'll send the new view * only to members who accepted the merge request) */ if (missing != null && !missing.isEmpty()) { coords.keySet().removeAll(missing); coordsCopy.removeAll(missing); } removeRejectedMergeRequests(coords.keySet()); if (merge_rsps.size() == 0) throw new Exception("did not get any merge responses from partition coordinators"); if (!coords .keySet() .contains( gms.local_addr)) // another member might have invoked a merge req on us before we got // there... throw new Exception("merge leader rejected merge request"); /* 4. Combine all views and digests into 1 View/1 Digest */ List<MergeData> merge_data = new ArrayList<MergeData>(merge_rsps.getResults().values()); MergeData combined_merge_data = consolidateMergeData(merge_data); if (combined_merge_data == null) throw new Exception("could not consolidate merge"); /* 4. Send the new View/Digest to all coordinators (including myself). On reception, they will install the digest and view in all of their subgroup members */ if (log.isDebugEnabled()) log.debug( gms.local_addr + ": installing merge view " + combined_merge_data.view.getViewId() + " (" + combined_merge_data.view.size() + " members) in " + coords.keySet().size() + " coords"); sendMergeView(coords.keySet(), combined_merge_data, new_merge_id); } /** * Sends a MERGE_REQ to all coords and populates a list of MergeData (in merge_rsps). Returns * after coords.size() response have been received, or timeout msecs have elapsed (whichever is * first). * * <p>If a subgroup coordinator rejects the MERGE_REQ (e.g. because of participation in a * different merge), <em>that member will be removed from coords !</em> * * @param coords A map of coordinatgor addresses and associated membership lists * @param new_merge_id The new merge id * @param timeout Max number of msecs to wait for the merge responses from the subgroup coords */ protected boolean getMergeDataFromSubgroupCoordinators( Map<Address, Collection<Address>> coords, MergeId new_merge_id, long timeout) { boolean gotAllResponses; long start = System.currentTimeMillis(); merge_rsps.reset(coords.keySet()); if (log.isTraceEnabled()) log.trace(gms.local_addr + ": sending MERGE_REQ to " + coords.keySet()); for (Map.Entry<Address, Collection<Address>> entry : coords.entrySet()) { Address coord = entry.getKey(); Collection<Address> mbrs = entry.getValue(); Message msg = new Message(coord).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL); GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.MERGE_REQ, mbrs); hdr.mbr = gms.local_addr; hdr.merge_id = new_merge_id; msg.putHeader(gms.getId(), hdr); gms.getDownProtocol().down(new Event(Event.MSG, msg)); } // wait until num_rsps_expected >= num_rsps or timeout elapsed merge_rsps.waitForAllResponses(timeout); gotAllResponses = merge_rsps.hasAllResponses(); long stop = System.currentTimeMillis(); if (log.isTraceEnabled()) log.trace( gms.local_addr + ": collected " + merge_rsps.numberOfValidResponses() + " merge response(s) in " + (stop - start) + " ms"); return gotAllResponses; } /** * Removed rejected merge requests from merge_rsps and coords. This method has a lock on * merge_rsps */ private void removeRejectedMergeRequests(Collection<Address> coords) { int num_removed = 0; for (Iterator<Map.Entry<Address, MergeData>> it = merge_rsps.getResults().entrySet().iterator(); it.hasNext(); ) { Map.Entry<Address, MergeData> entry = it.next(); MergeData data = entry.getValue(); if (data.merge_rejected) { if (data.getSender() != null) coords.remove(data.getSender()); it.remove(); num_removed++; } } if (num_removed > 0) { if (log.isTraceEnabled()) log.trace(gms.local_addr + ": removed " + num_removed + " rejected merge responses"); } } /** * Merge all MergeData. All MergeData elements should be disjunct (both views and digests). * However, this method is prepared to resolve duplicate entries (for the same member). * Resolution strategy for views is to merge only 1 of the duplicate members. Resolution * strategy for digests is to take the higher seqnos for duplicate digests. * * <p>After merging all members into a Membership and subsequent sorting, the first member of * the sorted membership will be the new coordinator. This method has a lock on merge_rsps. * * @param merge_rsps A list of MergeData items. Elements with merge_rejected=true were removed * before. Is guaranteed not to be null and to contain at least 1 member. */ private MergeData consolidateMergeData(List<MergeData> merge_rsps) { long logical_time = 0; // for new_vid List<View> subgroups = new ArrayList<View>(11); // contains a list of Views, each View is a subgroup Collection<Collection<Address>> sub_mbrships = new ArrayList<Collection<Address>>(); for (MergeData tmp_data : merge_rsps) { View tmp_view = tmp_data.getView(); if (tmp_view != null) { ViewId tmp_vid = tmp_view.getVid(); if (tmp_vid != null) { // compute the new view id (max of all vids +1) logical_time = Math.max(logical_time, tmp_vid.getId()); } // merge all membership lists into one (prevent duplicates) sub_mbrships.add(new ArrayList<Address>(tmp_view.getMembers())); subgroups.add(tmp_view.copy()); } } // determine the new digest Digest new_digest = consolidateDigests(merge_rsps, merge_rsps.size()); if (new_digest == null) return null; // remove all members from the new member list that are not in the digest Collection<Address> digest_mbrs = new_digest.getMembers(); for (Collection<Address> coll : sub_mbrships) coll.retainAll(digest_mbrs); List<Address> merged_mbrs = gms.computeNewMembership(sub_mbrships); // the new coordinator is the first member of the consolidated & sorted membership list Address new_coord = merged_mbrs.isEmpty() ? null : merged_mbrs.get(0); if (new_coord == null) return null; // should be the highest view ID seen up to now plus 1 ViewId new_vid = new ViewId(new_coord, logical_time + 1); // determine the new view MergeView new_view = new MergeView(new_vid, merged_mbrs, subgroups); if (log.isTraceEnabled()) log.trace( gms.local_addr + ": consolidated view=" + new_view + "\nconsolidated digest=" + new_digest); return new MergeData(gms.local_addr, new_view, new_digest); } /** * Merge all digests into one. For each sender, the new value is max(highest_delivered), * max(highest_received). This method has a lock on merge_rsps */ private Digest consolidateDigests(List<MergeData> merge_rsps, int num_mbrs) { MutableDigest retval = new MutableDigest(num_mbrs); for (MergeData data : merge_rsps) { Digest tmp_digest = data.getDigest(); if (tmp_digest == null) continue; retval.merge(tmp_digest); } return retval.copy(); } }