Exemplo n.º 1
0
/**
 * Reliable unicast layer. Implemented with negative acks. Every sender keeps its messages in an
 * AckSenderWindow. A receiver stores incoming messages in a NakReceiverWindow, and asks the sender
 * for retransmission if a gap is detected. Every now and then (stable_interval), a timer task sends
 * a STABLE message to all senders, including the highest received and delivered seqnos. A sender
 * purges messages lower than highest delivered and asks the STABLE sender for messages it might
 * have missed (smaller than highest received). A STABLE message can also be sent when a receiver
 * has received more than max_bytes from a given sender.
 *
 * <p>The advantage of this protocol over {@link org.jgroups.protocols.UNICAST} is that it doesn't
 * send acks for every message. Instead, it sends 'acks' after receiving max_bytes and/ or
 * periodically (stable_interval).
 *
 * @author Bela Ban
 */
@MBean(description = "Reliable unicast layer")
public class UNICAST2 extends Protocol implements AgeOutCache.Handler<Address> {
  public static final long DEFAULT_FIRST_SEQNO = Global.DEFAULT_FIRST_UNICAST_SEQNO;

  /* ------------------------------------------ Properties  ------------------------------------------ */
  @Deprecated
  protected int[] timeout = {
    400, 800, 1600, 3200
  }; // for NakSenderWindow: max time to wait for missing acks

  /**
   * The first value (in milliseconds) to use in the exponential backoff retransmission mechanism.
   * Only enabled if the value is > 0
   */
  @Deprecated
  @Property(
      description =
          "The first value (in milliseconds) to use in the exponential backoff. Enabled if greater than 0",
      deprecatedMessage = "Not used anymore")
  protected int exponential_backoff = 300;

  @Property(
      description =
          "Max number of messages to be removed from a NakReceiverWindow. This property might "
              + "get removed anytime, so don't use it !")
  protected int max_msg_batch_size = 500;

  @Property(description = "Max number of bytes before a stability message is sent to the sender")
  protected long max_bytes = 10000000;

  @Property(
      description =
          "Max number of milliseconds before a stability message is sent to the sender(s)")
  protected long stable_interval = 60000L;

  @Property(
      description =
          "Max number of STABLE messages sent for the same highest_received seqno. A value < 1 is invalid")
  protected int max_stable_msgs = 5;

  @Property(
      description = "Number of rows of the matrix in the retransmission table (only for experts)",
      writable = false)
  protected int xmit_table_num_rows = 100;

  @Property(
      description =
          "Number of elements of a row of the matrix in the retransmission table (only for experts). "
              + "The capacity of the matrix is xmit_table_num_rows * xmit_table_msgs_per_row",
      writable = false)
  protected int xmit_table_msgs_per_row = 2000;

  @Property(
      description = "Resize factor of the matrix in the retransmission table (only for experts)",
      writable = false)
  protected double xmit_table_resize_factor = 1.2;

  @Property(
      description =
          "Number of milliseconds after which the matrix in the retransmission table "
              + "is compacted (only for experts)",
      writable = false)
  protected long xmit_table_max_compaction_time = 10 * 60 * 1000;

  @Deprecated
  @Property(
      description =
          "If enabled, the removal of a message from the retransmission table causes an "
              + "automatic purge (only for experts)",
      writable = false,
      deprecatedMessage = "not used anymore")
  protected boolean xmit_table_automatic_purging = true;

  @Property(
      description =
          "Whether to use the old retransmitter which retransmits individual messages or the new one "
              + "which uses ranges of retransmitted messages. Default is true. Note that this property will be removed in 3.0; "
              + "it is only used to switch back to the old (and proven) retransmitter mechanism if issues occur")
  protected boolean use_range_based_retransmitter = true;

  @Property(
      description =
          "Time (in milliseconds) after which an idle incoming or outgoing connection is closed. The "
              + "connection will get re-established when used again. 0 disables connection reaping")
  protected long conn_expiry_timeout = 60000;

  @Property(
      description =
          "Interval (in milliseconds) at which missing messages (from all retransmit buffers) "
              + "are retransmitted")
  protected long xmit_interval = 1000;
  /* --------------------------------------------- JMX  ---------------------------------------------- */

  protected long num_msgs_sent = 0, num_msgs_received = 0;

  /* --------------------------------------------- Fields ------------------------------------------------ */

  protected final ConcurrentMap<Address, SenderEntry> send_table = Util.createConcurrentMap();
  protected final ConcurrentMap<Address, ReceiverEntry> recv_table = Util.createConcurrentMap();

  /** RetransmitTask running every xmit_interval ms */
  protected Future<?> xmit_task;

  protected final ReentrantLock recv_table_lock = new ReentrantLock();

  protected volatile List<Address> members = new ArrayList<Address>(11);

  protected Address local_addr = null;

  protected TimeScheduler timer = null; // used for retransmissions (passed to AckSenderWindow)

  protected volatile boolean running = false;

  protected short last_conn_id = 0;

  protected long max_retransmit_time = 60 * 1000L;

  protected AgeOutCache<Address> cache = null;

  protected Future<?> stable_task_future =
      null; // bcasts periodic STABLE message (added to timer below)

  protected Future<?> connection_reaper; // closes idle connections

  public int[] getTimeout() {
    return timeout;
  }

  @Deprecated
  @Property(
      name = "timeout",
      converter = PropertyConverters.IntegerArray.class,
      description = "list of timeouts",
      deprecatedMessage = "not used anymore")
  public void setTimeout(int[] val) {
    if (val != null) timeout = val;
  }

  public void setMaxMessageBatchSize(int size) {
    if (size >= 1) max_msg_batch_size = size;
  }

  @ManagedAttribute
  public String getLocalAddress() {
    return local_addr != null ? local_addr.toString() : "null";
  }

  @ManagedAttribute
  public String getMembers() {
    return members.toString();
  }

  @ManagedAttribute(description = "Returns the number of outgoing (send) connections")
  public int getNumSendConnections() {
    return send_table.size();
  }

  @ManagedAttribute(description = "Returns the number of incoming (receive) connections")
  public int getNumReceiveConnections() {
    return recv_table.size();
  }

  @ManagedAttribute(
      description =
          "Returns the total number of outgoing (send) and incoming (receive) connections")
  public int getNumConnections() {
    return getNumReceiveConnections() + getNumSendConnections();
  }

  @ManagedOperation
  public String printConnections() {
    StringBuilder sb = new StringBuilder();
    if (!send_table.isEmpty()) {
      sb.append("send connections:\n");
      for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
        sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
      }
    }

    if (!recv_table.isEmpty()) {
      sb.append("\nreceive connections:\n");
      for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
        sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
      }
    }
    return sb.toString();
  }

  @ManagedAttribute(description = "Whether the ConnectionReaper task is running")
  public boolean isConnectionReaperRunning() {
    return connection_reaper != null && !connection_reaper.isDone();
  }

  @ManagedAttribute
  public long getNumMessagesSent() {
    return num_msgs_sent;
  }

  @ManagedAttribute
  public long getNumMessagesReceived() {
    return num_msgs_received;
  }

  @ManagedAttribute(description = "Total number of undelivered messages in all receive windows")
  public long getXmitTableUndeliveredMessages() {
    long retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.size();
    }
    return retval;
  }

  @ManagedAttribute(description = "Total number of missing messages in all receive windows")
  public long getXmitTableMissingMessages() {
    long retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumMissing();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of compactions in all (receive and send) windows")
  public int getXmitTableNumCompactions() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumCompactions();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumCompactions();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of moves in all (receive and send) windows")
  public int getXmitTableNumMoves() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumMoves();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumMoves();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of resizes in all (receive and send) windows")
  public int getXmitTableNumResizes() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumResizes();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumResizes();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of purges in all (receive and send) windows")
  public int getXmitTableNumPurges() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumPurges();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumPurges();
    }
    return retval;
  }

  @ManagedOperation(description = "Prints the contents of the receive windows for all members")
  public String printReceiveWindowMessages() {
    StringBuilder ret = new StringBuilder(local_addr + ":\n");
    for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
      Address addr = entry.getKey();
      Table<Message> buf = entry.getValue().received_msgs;
      ret.append(addr).append(": ").append(buf.toString()).append('\n');
    }
    return ret.toString();
  }

  @ManagedOperation(description = "Prints the contents of the send windows for all members")
  public String printSendWindowMessages() {
    StringBuilder ret = new StringBuilder(local_addr + ":\n");
    for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
      Address addr = entry.getKey();
      Table<Message> buf = entry.getValue().sent_msgs;
      ret.append(addr).append(": ").append(buf.toString()).append('\n');
    }
    return ret.toString();
  }

  @ManagedAttribute(description = "Number of retransmit requests received")
  protected final AtomicLong xmit_reqs_received = new AtomicLong(0);

  @ManagedAttribute(description = "Number of retransmit requests sent")
  protected final AtomicLong xmit_reqs_sent = new AtomicLong(0);

  @ManagedAttribute(description = "Number of retransmit responses sent")
  protected final AtomicLong xmit_rsps_sent = new AtomicLong(0);

  @ManagedAttribute(description = "Is the retransmit task running")
  public boolean isXmitTaskRunning() {
    return xmit_task != null && !xmit_task.isDone();
  }

  public long getMaxRetransmitTime() {
    return max_retransmit_time;
  }

  @Property(
      description =
          "Max number of milliseconds we try to retransmit a message to any given member. After that, "
              + "the connection is removed. Any new connection to that member will start with seqno #1 again. 0 disables this")
  public void setMaxRetransmitTime(long max_retransmit_time) {
    this.max_retransmit_time = max_retransmit_time;
    if (cache != null && max_retransmit_time > 0) cache.setTimeout(max_retransmit_time);
  }

  @ManagedAttribute
  public int getAgeOutCacheSize() {
    return cache != null ? cache.size() : 0;
  }

  @ManagedOperation
  public String printAgeOutCache() {
    return cache != null ? cache.toString() : "n/a";
  }

  public AgeOutCache<Address> getAgeOutCache() {
    return cache;
  }

  public void resetStats() {
    num_msgs_sent = num_msgs_received = 0;
    xmit_reqs_received.set(0);
    xmit_reqs_sent.set(0);
    xmit_rsps_sent.set(0);
  }

  public TimeScheduler getTimer() {
    return timer;
  }

  /**
   * Only used for unit tests, don't use !
   *
   * @param timer
   */
  public void setTimer(TimeScheduler timer) {
    this.timer = timer;
  }

  public void init() throws Exception {
    super.init();
    if (max_stable_msgs < 1)
      throw new IllegalArgumentException("max_stable_msgs ( " + max_stable_msgs + ") must be > 0");
    if (max_bytes <= 0) throw new IllegalArgumentException("max_bytes has to be > 0");
  }

  public void start() throws Exception {
    timer = getTransport().getTimer();
    if (timer == null) throw new Exception("timer is null");
    if (max_retransmit_time > 0) cache = new AgeOutCache<Address>(timer, max_retransmit_time, this);
    running = true;
    if (stable_interval > 0) startStableTask();
    if (conn_expiry_timeout > 0) startConnectionReaper();
    startRetransmitTask();
  }

  public void stop() {
    running = false;
    stopStableTask();
    stopConnectionReaper();
    stopRetransmitTask();
    removeAllConnections();
  }

  public Object up(Event evt) {
    Message msg;
    Address dst, src;
    Unicast2Header hdr;

    switch (evt.getType()) {
      case Event.MSG:
        msg = (Message) evt.getArg();
        dst = msg.getDest();

        if (dst == null || msg.isFlagSet(Message.NO_RELIABILITY)) // only handle unicast messages
        break; // pass up

        // changed from removeHeader(): we cannot remove the header because if we do loopback=true
        // at the
        // transport level, we will not have the header on retransmit ! (bela Aug 22 2006)
        hdr = (Unicast2Header) msg.getHeader(this.id);
        if (hdr == null) break;
        src = msg.getSrc();
        switch (hdr.type) {
          case Unicast2Header.DATA: // received regular message
            handleDataReceived(src, hdr.seqno, hdr.conn_id, hdr.first, msg, evt);
            return null; // we pass the deliverable message up in handleDataReceived()
          case Unicast2Header.XMIT_REQ: // received ACK for previously sent message
            handleXmitRequest(src, (SeqnoList) msg.getObject());
            break;
          case Unicast2Header.SEND_FIRST_SEQNO:
            handleResendingOfFirstMessage(src, hdr.seqno);
            break;
          case Unicast2Header.STABLE:
            stable(msg.getSrc(), hdr.conn_id, hdr.seqno, hdr.high_seqno);
            break;
          default:
            log.error("UnicastHeader type " + hdr.type + " not known !");
            break;
        }
        return null;
    }

    return up_prot.up(evt); // Pass up to the layer above us
  }

  public Object down(Event evt) {
    switch (evt.getType()) {
      case Event.MSG: // Add UnicastHeader, add to AckSenderWindow and pass down
        Message msg = (Message) evt.getArg();
        Address dst = msg.getDest();

        /* only handle unicast messages */
        if (dst == null || msg.isFlagSet(Message.NO_RELIABILITY)) break;

        if (!running) {
          if (log.isTraceEnabled())
            log.trace("discarded message as start() has not yet been called, message: " + msg);
          return null;
        }

        SenderEntry entry = send_table.get(dst);
        if (entry == null) {
          entry = new SenderEntry(getNewConnectionId());
          SenderEntry existing = send_table.putIfAbsent(dst, entry);
          if (existing != null) entry = existing;
          else {
            if (log.isTraceEnabled())
              log.trace(
                  local_addr
                      + ": created connection to "
                      + dst
                      + " (conn_id="
                      + entry.send_conn_id
                      + ")");
            if (cache != null && !members.contains(dst)) cache.add(dst);
          }
        }

        short send_conn_id = entry.send_conn_id;
        long seqno = entry.sent_msgs_seqno.getAndIncrement();
        long sleep = 10;
        while (running) {
          try {
            msg.putHeader(
                this.id,
                Unicast2Header.createDataHeader(seqno, send_conn_id, seqno == DEFAULT_FIRST_SEQNO));
            entry.sent_msgs.add(seqno, msg); // add *including* UnicastHeader, adds to retransmitter
            if (conn_expiry_timeout > 0) entry.update();
            break;
          } catch (Throwable t) {
            if (!running) break;
            if (log.isWarnEnabled()) log.warn("failed sending message", t);
            Util.sleep(sleep);
            sleep = Math.min(5000, sleep * 2);
          }
        }

        if (log.isTraceEnabled()) {
          StringBuilder sb = new StringBuilder();
          sb.append(local_addr)
              .append(" --> DATA(")
              .append(dst)
              .append(": #")
              .append(seqno)
              .append(", conn_id=")
              .append(send_conn_id);
          if (seqno == DEFAULT_FIRST_SEQNO) sb.append(", first");
          sb.append(')');
          log.trace(sb);
        }

        try {
          down_prot.down(evt);
          num_msgs_sent++;
        } catch (Throwable t) {
          log.warn("failed sending the message", t);
        }
        return null; // we already passed the msg down

      case Event.VIEW_CHANGE: // remove connections to peers that are not members anymore !
        View view = (View) evt.getArg();
        List<Address> new_members = view.getMembers();
        Set<Address> non_members = new HashSet<Address>(send_table.keySet());
        non_members.addAll(recv_table.keySet());

        members = new_members;
        non_members.removeAll(new_members);
        if (cache != null) cache.removeAll(new_members);

        if (!non_members.isEmpty()) {
          if (log.isTraceEnabled()) log.trace("removing non members " + non_members);
          for (Address non_mbr : non_members) removeConnection(non_mbr);
        }
        break;

      case Event.SET_LOCAL_ADDRESS:
        local_addr = (Address) evt.getArg();
        break;
    }

    return down_prot.down(evt); // Pass on to the layer below us
  }

  /**
   * Purge all messages in window for local_addr, which are <= low. Check if the window's highest
   * received message is > high: if true, retransmit all messages from high - win.high to sender
   *
   * @param sender
   * @param hd Highest delivered seqno
   * @param hr Highest received seqno
   */
  protected void stable(Address sender, short conn_id, long hd, long hr) {
    SenderEntry entry = send_table.get(sender);
    Table<Message> win = entry != null ? entry.sent_msgs : null;
    if (win == null) return;

    if (log.isTraceEnabled())
      log.trace(
          new StringBuilder()
                  .append(local_addr)
                  .append(" <-- STABLE(")
                  .append(sender)
                  .append(": ")
                  .append(hd)
                  .append("-")
                  .append(hr)
                  .append(", conn_id=" + conn_id)
              + ")");

    if (entry.send_conn_id != conn_id) {
      log.warn(
          local_addr
              + ": my conn_id ("
              + entry.send_conn_id
              + ") != received conn_id ("
              + conn_id
              + "); discarding STABLE message !");
      return;
    }

    win.purge(hd, true);
    long win_hr = win.getHighestReceived();
    if (win_hr > hr) {
      for (long seqno = hr; seqno <= win_hr; seqno++) {
        Message msg =
            win.get(
                seqno); // destination is still the same (the member which sent the STABLE message)
        if (msg != null) down_prot.down(new Event(Event.MSG, msg));
      }
    }
  }

  @ManagedOperation(
      description =
          "Sends a STABLE message to all senders. This causes message purging and potential"
              + " retransmissions from senders")
  public void sendStableMessages() {
    for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
      Address dest = entry.getKey();
      ReceiverEntry val = entry.getValue();
      Table<Message> win = val != null ? val.received_msgs : null;
      if (win != null) {
        long[] tmp = win.getDigest();
        long low = tmp[0], high = tmp[1];

        if (val.last_highest == high) {
          if (val.num_stable_msgs >= max_stable_msgs) {
            continue;
          } else val.num_stable_msgs++;
        } else {
          val.last_highest = high;
          val.num_stable_msgs = 1;
        }
        sendStableMessage(dest, val.recv_conn_id, low, high);
      }
    }
  }

  protected void sendStableMessage(Address dest, short conn_id, long hd, long hr) {
    Message stable_msg = new Message(dest, null, null);
    Unicast2Header hdr = Unicast2Header.createStableHeader(conn_id, hd, hr);
    stable_msg.putHeader(this.id, hdr);
    stable_msg.setFlag(Message.OOB);
    if (log.isTraceEnabled()) {
      StringBuilder sb = new StringBuilder();
      sb.append(local_addr)
          .append(" --> STABLE(")
          .append(dest)
          .append(": ")
          .append(hd)
          .append("-")
          .append(hr)
          .append(", conn_id=")
          .append(conn_id)
          .append(")");
      log.trace(sb.toString());
    }
    down_prot.down(new Event(Event.MSG, stable_msg));
  }

  protected void startStableTask() {
    if (stable_task_future == null || stable_task_future.isDone()) {
      final Runnable stable_task =
          new Runnable() {
            public void run() {
              try {
                sendStableMessages();
              } catch (Throwable t) {
                log.error("sending of STABLE messages failed", t);
              }
            }
          };
      stable_task_future =
          timer.scheduleWithFixedDelay(
              stable_task, stable_interval, stable_interval, TimeUnit.MILLISECONDS);
      if (log.isTraceEnabled()) log.trace("stable task started");
    }
  }

  protected void stopStableTask() {
    if (stable_task_future != null) {
      stable_task_future.cancel(false);
      stable_task_future = null;
    }
  }

  protected synchronized void startConnectionReaper() {
    if (connection_reaper == null || connection_reaper.isDone())
      connection_reaper =
          timer.scheduleWithFixedDelay(
              new ConnectionReaper(),
              conn_expiry_timeout,
              conn_expiry_timeout,
              TimeUnit.MILLISECONDS);
  }

  protected synchronized void stopConnectionReaper() {
    if (connection_reaper != null) connection_reaper.cancel(false);
  }

  /**
   * Removes and resets from connection table (which is already locked). Returns true if member was
   * found, otherwise false. This method is public only so it can be invoked by unit testing, but
   * should not otherwise be used !
   */
  public void removeConnection(Address mbr) {
    removeSendConnection(mbr);
    removeReceiveConnection(mbr);
  }

  public void removeSendConnection(Address mbr) {
    send_table.remove(mbr);
  }

  public void removeReceiveConnection(Address mbr) {
    ReceiverEntry entry2 = recv_table.remove(mbr);
    if (entry2 != null) {
      Table<Message> win = entry2.received_msgs;
      if (win != null)
        sendStableMessage(
            mbr, entry2.recv_conn_id, win.getHighestDelivered(), win.getHighestReceived());
      entry2.reset();
    }
  }

  /**
   * This method is public only so it can be invoked by unit testing, but should not otherwise be
   * used !
   */
  @ManagedOperation(
      description = "Trashes all connections to other nodes. This is only used for testing")
  public void removeAllConnections() {
    send_table.clear();
    sendStableMessages();
    for (ReceiverEntry entry2 : recv_table.values()) entry2.reset();
    recv_table.clear();
  }

  public void retransmit(SeqnoList missing, Address sender) {
    Unicast2Header hdr = Unicast2Header.createXmitReqHeader();
    Message retransmit_msg = new Message(sender, null, missing);
    retransmit_msg.setFlag(Message.OOB);
    if (log.isTraceEnabled())
      log.trace(local_addr + ": sending XMIT_REQ (" + missing + ") to " + sender);
    retransmit_msg.putHeader(this.id, hdr);
    down_prot.down(new Event(Event.MSG, retransmit_msg));
    xmit_reqs_sent.addAndGet(missing.size());
  }

  /**
   * Called by AgeOutCache, to removed expired connections
   *
   * @param key
   */
  public void expired(Address key) {
    if (key != null) {
      if (log.isDebugEnabled()) log.debug("removing connection to " + key + " because it expired");
      removeConnection(key);
    }
  }

  /**
   * Check whether the hashmap contains an entry e for <code>sender</code> (create if not). If
   * e.received_msgs is null and <code>first</code> is true: create a new AckReceiverWindow(seqno)
   * and add message. Set e.received_msgs to the new window. Else just add the message.
   */
  protected void handleDataReceived(
      Address sender, long seqno, short conn_id, boolean first, Message msg, Event evt) {
    if (log.isTraceEnabled()) {
      StringBuilder sb = new StringBuilder();
      sb.append(local_addr).append(" <-- DATA(").append(sender).append(": #").append(seqno);
      if (conn_id != 0) sb.append(", conn_id=").append(conn_id);
      if (first) sb.append(", first");
      sb.append(')');
      log.trace(sb);
    }

    ReceiverEntry entry = getReceiverEntry(sender, seqno, first, conn_id);
    if (entry == null) return;
    if (conn_expiry_timeout > 0) entry.update();
    Table<Message> win = entry.received_msgs;
    boolean added = win.add(seqno, msg); // win is guaranteed to be non-null if we get here
    num_msgs_received++;

    if (added) {
      int len = msg.getLength();
      if (len > 0 && entry.incrementStable(len))
        sendStableMessage(
            sender, entry.recv_conn_id, win.getHighestDelivered(), win.getHighestReceived());
    }

    // An OOB message is passed up immediately. Later, when remove() is called, we discard it. This
    // affects ordering !
    // http://jira.jboss.com/jira/browse/JGRP-377
    if (msg.isFlagSet(Message.OOB) && added) {
      try {
        up_prot.up(evt);
      } catch (Throwable t) {
        log.error("couldn't deliver OOB message " + msg, t);
      }
    }

    final AtomicBoolean processing = win.getProcessing();
    if (!processing.compareAndSet(false, true)) {
      return;
    }

    // Try to remove as many messages as possible and pass them up.
    // Prevents concurrent passing up of messages by different threads
    // (http://jira.jboss.com/jira/browse/JGRP-198);
    // this is all the more important once we have a concurrent stack
    // (http://jira.jboss.com/jira/browse/JGRP-181),
    // where lots of threads can come up to this point concurrently, but only 1 is allowed to pass
    // at a time
    // We *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1,
    // P2, Q2 can result in
    // delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to be delivered
    // only in the
    // order in which they were sent by their senders
    boolean released_processing = false;
    try {
      while (true) {
        List<Message> msgs =
            win.removeMany(processing, true, max_msg_batch_size); // remove my own messages
        if (msgs == null || msgs.isEmpty()) {
          released_processing = true;
          return;
        }

        for (Message m : msgs) {
          // discard OOB msg: it has already been delivered
          // (http://jira.jboss.com/jira/browse/JGRP-377)
          if (m.isFlagSet(Message.OOB)) continue;
          try {
            up_prot.up(new Event(Event.MSG, m));
          } catch (Throwable t) {
            log.error("couldn't deliver message " + m, t);
          }
        }
      }
    } finally {
      // processing is always set in win.remove(processing) above and never here ! This code is just
      // a
      // 2nd line of defense should there be an exception before win.remove(processing) sets
      // processing
      if (!released_processing) processing.set(false);
    }
  }

  protected ReceiverEntry getReceiverEntry(
      Address sender, long seqno, boolean first, short conn_id) {
    ReceiverEntry entry = recv_table.get(sender);
    if (entry != null && entry.recv_conn_id == conn_id) return entry;

    recv_table_lock.lock();
    try {
      entry = recv_table.get(sender);
      if (first) {
        if (entry == null) {
          entry = getOrCreateReceiverEntry(sender, seqno, conn_id);
        } else { // entry != null && win != null
          if (conn_id != entry.recv_conn_id) {
            if (log.isTraceEnabled())
              log.trace(
                  local_addr
                      + ": conn_id="
                      + conn_id
                      + " != "
                      + entry.recv_conn_id
                      + "; resetting receiver window");

            recv_table.remove(sender);
            entry = getOrCreateReceiverEntry(sender, seqno, conn_id);
          } else {;
          }
        }
      } else { // entry == null && win == null OR entry != null && win == null OR entry != null &&
               // win != null
        if (entry == null || entry.recv_conn_id != conn_id) {
          recv_table_lock.unlock();
          sendRequestForFirstSeqno(sender, seqno); // drops the message and returns (see below)
          return null;
        }
      }
      return entry;
    } finally {
      if (recv_table_lock.isHeldByCurrentThread()) recv_table_lock.unlock();
    }
  }

  protected ReceiverEntry getOrCreateReceiverEntry(Address sender, long seqno, short conn_id) {
    Table<Message> table =
        new Table<Message>(
            xmit_table_num_rows,
            xmit_table_msgs_per_row,
            seqno - 1,
            xmit_table_resize_factor,
            xmit_table_max_compaction_time);
    ReceiverEntry entry = new ReceiverEntry(table, conn_id);
    ReceiverEntry entry2 = recv_table.putIfAbsent(sender, entry);
    if (entry2 != null) return entry2;
    if (log.isTraceEnabled())
      log.trace(
          local_addr
              + ": created receiver window for "
              + sender
              + " at seqno=#"
              + seqno
              + " for conn-id="
              + conn_id);
    return entry;
  }

  protected void handleXmitRequest(Address sender, SeqnoList missing) {
    if (log.isTraceEnabled())
      log.trace(
          new StringBuilder()
              .append(local_addr)
              .append(" <-- XMIT(")
              .append(sender)
              .append(": #")
              .append(missing)
              .append(')'));

    SenderEntry entry = send_table.get(sender);
    xmit_reqs_received.addAndGet(missing.size());
    Table<Message> win = entry != null ? entry.sent_msgs : null;
    if (win != null) {
      for (long seqno : missing) {
        Message msg = win.get(seqno);
        if (msg == null) {
          if (log.isWarnEnabled() && !local_addr.equals(sender)) {
            StringBuilder sb = new StringBuilder();
            sb.append("(requester=").append(sender).append(", local_addr=").append(this.local_addr);
            sb.append(") message ").append(sender).append("::").append(seqno);
            sb.append(" not found in retransmission table of ")
                .append(sender)
                .append(":\n")
                .append(win);
            log.warn(sb.toString());
          }
          continue;
        }

        down_prot.down(new Event(Event.MSG, msg));
        xmit_rsps_sent.incrementAndGet();
      }
    }
  }

  /**
   * We need to resend our first message with our conn_id
   *
   * @param sender
   * @param seqno Resend the non null messages in the range [lowest .. seqno]
   */
  protected void handleResendingOfFirstMessage(Address sender, long seqno) {
    if (log.isTraceEnabled())
      log.trace(local_addr + " <-- SEND_FIRST_SEQNO(" + sender + "," + seqno + ")");
    SenderEntry entry = send_table.get(sender);
    Table<Message> win = entry != null ? entry.sent_msgs : null;
    if (win == null) {
      if (log.isErrorEnabled())
        log.error(local_addr + ": sender window for " + sender + " not found");
      return;
    }

    boolean first_sent = false;
    for (long i = win.getLow() + 1; i <= seqno; i++) {
      Message rsp = win.get(i);
      if (rsp == null) continue;
      if (first_sent) {
        down_prot.down(new Event(Event.MSG, rsp));
      } else {
        first_sent = true;
        // We need to copy the UnicastHeader and put it back into the message because Message.copy()
        // doesn't copy
        // the headers and therefore we'd modify the original message in the sender retransmission
        // window
        // (https://jira.jboss.org/jira/browse/JGRP-965)
        Message copy = rsp.copy();
        Unicast2Header hdr = (Unicast2Header) copy.getHeader(this.id);
        Unicast2Header newhdr = hdr.copy();
        newhdr.first = true;
        copy.putHeader(this.id, newhdr);
        down_prot.down(new Event(Event.MSG, copy));
      }
    }
  }

  protected void startRetransmitTask() {
    if (xmit_task == null || xmit_task.isDone())
      xmit_task =
          timer.scheduleWithFixedDelay(
              new RetransmitTask(), 0, xmit_interval, TimeUnit.MILLISECONDS);
  }

  protected void stopRetransmitTask() {
    if (xmit_task != null) {
      xmit_task.cancel(true);
      xmit_task = null;
    }
  }

  protected synchronized short getNewConnectionId() {
    short retval = last_conn_id;
    if (last_conn_id >= Short.MAX_VALUE || last_conn_id < 0) last_conn_id = 0;
    else last_conn_id++;
    return retval;
  }

  protected void sendRequestForFirstSeqno(Address dest, long seqno_received) {
    Message msg = new Message(dest);
    msg.setFlag(Message.OOB);
    Unicast2Header hdr = Unicast2Header.createSendFirstSeqnoHeader(seqno_received);
    msg.putHeader(this.id, hdr);
    if (log.isTraceEnabled())
      log.trace(local_addr + " --> SEND_FIRST_SEQNO(" + dest + "," + seqno_received + ")");
    down_prot.down(new Event(Event.MSG, msg));
  }

  @ManagedOperation(
      description = "Closes connections that have been idle for more than conn_expiry_timeout ms")
  public void reapIdleConnections() {
    if (conn_expiry_timeout <= 0) return;

    // remove expired connections from send_table
    for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
      SenderEntry val = entry.getValue();
      long age = val.age();
      if (age >= conn_expiry_timeout) {
        removeSendConnection(entry.getKey());
        if (log.isDebugEnabled())
          log.debug(
              local_addr
                  + ": removed expired connection for "
                  + entry.getKey()
                  + " ("
                  + age
                  + " ms old) from send_table");
      }
    }

    // remove expired connections from recv_table
    for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
      ReceiverEntry val = entry.getValue();
      long age = val.age();
      if (age >= conn_expiry_timeout) {
        removeReceiveConnection(entry.getKey());
        if (log.isDebugEnabled())
          log.debug(
              local_addr
                  + ": removed expired connection for "
                  + entry.getKey()
                  + " ("
                  + age
                  + " ms old) from recv_table");
      }
    }
  }

  /**
   * The following types and fields are serialized:
   *
   * <pre>
   * | DATA | seqno | conn_id | first |
   * | ACK  | seqno |
   * | SEND_FIRST_SEQNO | seqno |
   * </pre>
   */
  public static class Unicast2Header extends Header {
    public static final byte DATA = 0;
    public static final byte XMIT_REQ = 1;
    public static final byte SEND_FIRST_SEQNO = 2;
    public static final byte STABLE = 3;

    byte type;
    long seqno; // DATA and STABLE
    long high_seqno; // STABLE
    short conn_id; // DATA, STABLE
    boolean first; // DATA

    public Unicast2Header() {} // used for externalization

    public static Unicast2Header createDataHeader(long seqno, short conn_id, boolean first) {
      return new Unicast2Header(DATA, seqno, 0L, conn_id, first);
    }

    public static Unicast2Header createXmitReqHeader() {
      return new Unicast2Header(XMIT_REQ);
    }

    public static Unicast2Header createStableHeader(short conn_id, long low, long high) {
      if (low > high)
        throw new IllegalArgumentException("low (" + low + ") needs to be <= high (" + high + ")");
      Unicast2Header retval = new Unicast2Header(STABLE, low);
      retval.high_seqno = high;
      retval.conn_id = conn_id;
      return retval;
    }

    public static Unicast2Header createSendFirstSeqnoHeader(long seqno_received) {
      return new Unicast2Header(SEND_FIRST_SEQNO, seqno_received);
    }

    protected Unicast2Header(byte type) {
      this.type = type;
    }

    protected Unicast2Header(byte type, long seqno) {
      this.type = type;
      this.seqno = seqno;
    }

    protected Unicast2Header(byte type, long seqno, long high, short conn_id, boolean first) {
      this.type = type;
      this.seqno = seqno;
      this.high_seqno = high;
      this.conn_id = conn_id;
      this.first = first;
    }

    public byte getType() {
      return type;
    }

    public long getSeqno() {
      return seqno;
    }

    public long getHighSeqno() {
      return high_seqno;
    }

    public short getConnId() {
      return conn_id;
    }

    public boolean isFirst() {
      return first;
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append(type2Str(type)).append(", seqno=").append(seqno);
      if (conn_id != 0) sb.append(", conn_id=").append(conn_id);
      if (first) sb.append(", first");
      return sb.toString();
    }

    public static String type2Str(byte t) {
      switch (t) {
        case DATA:
          return "DATA";
        case XMIT_REQ:
          return "XMIT_REQ";
        case SEND_FIRST_SEQNO:
          return "SEND_FIRST_SEQNO";
        case STABLE:
          return "STABLE";
        default:
          return "<unknown>";
      }
    }

    public final int size() {
      int retval = Global.BYTE_SIZE; // type
      switch (type) {
        case DATA:
          retval +=
              Util.size(seqno) // seqno
                  + Global.SHORT_SIZE // conn_id
                  + Global.BYTE_SIZE; // first
          break;
        case XMIT_REQ:
          break;
        case STABLE:
          retval += Util.size(seqno, high_seqno) + Global.SHORT_SIZE; // conn_id
          break;
        case SEND_FIRST_SEQNO:
          retval += Util.size(seqno);
          break;
      }
      return retval;
    }

    public Unicast2Header copy() {
      return new Unicast2Header(type, seqno, high_seqno, conn_id, first);
    }

    public void writeTo(DataOutput out) throws Exception {
      out.writeByte(type);
      switch (type) {
        case DATA:
          Util.writeLong(seqno, out);
          out.writeShort(conn_id);
          out.writeBoolean(first);
          break;
        case XMIT_REQ:
          break;
        case STABLE:
          Util.writeLongSequence(seqno, high_seqno, out);
          out.writeShort(conn_id);
          break;
        case SEND_FIRST_SEQNO:
          Util.writeLong(seqno, out);
          break;
      }
    }

    public void readFrom(DataInput in) throws Exception {
      type = in.readByte();
      switch (type) {
        case DATA:
          seqno = Util.readLong(in);
          conn_id = in.readShort();
          first = in.readBoolean();
          break;
        case XMIT_REQ:
          break;
        case STABLE:
          long[] seqnos = Util.readLongSequence(in);
          seqno = seqnos[0];
          high_seqno = seqnos[1];
          conn_id = in.readShort();
          break;
        case SEND_FIRST_SEQNO:
          seqno = Util.readLong(in);
          break;
      }
    }
  }

  protected final class SenderEntry {
    // stores (and retransmits) msgs sent by us to a given peer
    final Table<Message> sent_msgs;
    final AtomicLong sent_msgs_seqno =
        new AtomicLong(DEFAULT_FIRST_SEQNO); // seqno for msgs sent by us
    final short send_conn_id;
    protected final AtomicLong timestamp = new AtomicLong(0);

    public SenderEntry(short send_conn_id) {
      this.send_conn_id = send_conn_id;
      this.sent_msgs =
          new Table<Message>(
              xmit_table_num_rows,
              xmit_table_msgs_per_row,
              0,
              xmit_table_resize_factor,
              xmit_table_max_compaction_time);
      update();
    }

    void update() {
      timestamp.set(System.currentTimeMillis());
    }

    long age() {
      return System.currentTimeMillis() - timestamp.longValue();
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      if (sent_msgs != null) sb.append(sent_msgs).append(", ");
      sb.append("send_conn_id=" + send_conn_id).append(" (" + age() + " ms old)");
      return sb.toString();
    }
  }

  protected final class ReceiverEntry {
    protected final Table<Message>
        received_msgs; // stores all msgs rcvd by a certain peer in seqno-order
    protected final short recv_conn_id;
    protected int received_bytes = 0;
    protected final AtomicLong timestamp = new AtomicLong(0);
    protected final Lock lock = new ReentrantLock();

    protected long last_highest = -1;
    protected int num_stable_msgs = 0;

    public ReceiverEntry(Table<Message> received_msgs, short recv_conn_id) {
      this.received_msgs = received_msgs;
      this.recv_conn_id = recv_conn_id;
      update();
    }

    /**
     * Adds len bytes, if max_bytes is exceeded, the value is reset and true returned, else false
     */
    boolean incrementStable(int len) {
      lock.lock();
      try {
        if (received_bytes + len >= max_bytes) {
          received_bytes = 0;
          return true;
        }
        received_bytes += len;
        return false;
      } finally {
        lock.unlock();
      }
    }

    void reset() {
      received_bytes = 0;
      last_highest = -1;
      num_stable_msgs = 0;
    }

    void update() {
      timestamp.set(System.currentTimeMillis());
    }

    long age() {
      return System.currentTimeMillis() - timestamp.longValue();
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      if (received_msgs != null) sb.append(received_msgs).append(", ");
      sb.append("recv_conn_id=" + recv_conn_id);
      sb.append(" (" + age() + " ms old)");
      return sb.toString();
    }
  }

  protected class ConnectionReaper implements Runnable {
    public void run() {
      reapIdleConnections();
    }
  }

  /**
   * Retransmitter task which periodically (every xmit_interval ms) looks at all the retransmit
   * tables and sends retransmit request to all members from which we have missing messages
   */
  protected class RetransmitTask implements Runnable {

    public void run() {
      for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
        Address target = entry.getKey(); // target to send retransmit requests to
        ReceiverEntry val = entry.getValue();
        Table<Message> buf = val != null ? val.received_msgs : null;
        if (buf != null && buf.getNumMissing() > 0) {
          SeqnoList missing = buf.getMissing();
          if (missing != null) retransmit(missing, target);
        }
      }
    }
  }
}
Exemplo n.º 2
0
/**
 * JChannel is a default implementation of a Channel abstraction.
 *
 * <p>JChannel is instantiated using an appropriate form of a protocol stack description. Protocol
 * stack can be described using a file, URL or a stream containing XML stack description.
 *
 * @author Bela Ban
 * @since 2.0
 */
@MBean(description = "JGroups channel")
public class JChannel extends Channel {

  /** The default protocol stack used by the default constructor */
  public static final String DEFAULT_PROTOCOL_STACK = "udp.xml";

  /*the address of this JChannel instance*/
  protected Address local_addr;

  protected List<AddressGenerator> address_generators;

  protected String name;

  /* the channel (also know as group) name */
  protected String cluster_name;

  /* the latest view of the group membership */
  protected View my_view;

  /*the protocol stack, used to send and receive messages from the protocol stack*/
  protected ProtocolStack prot_stack;

  protected final Promise<StateTransferResult> state_promise = new Promise<>();

  /**
   * True if a state transfer protocol is available, false otherwise (set by CONFIG event from
   * STATE_TRANSFER protocol)
   */
  protected boolean state_transfer_supported = false;

  /**
   * True if a flush protocol is available, false otherwise (set by CONFIG event from FLUSH
   * protocol)
   */
  protected volatile boolean flush_supported = false;

  protected final ConcurrentMap<String, Object> config = Util.createConcurrentMap(16);

  /** Collect statistics */
  @ManagedAttribute(description = "Collect channel statistics", writable = true)
  protected boolean stats = true;

  protected long sent_msgs = 0, received_msgs = 0, sent_bytes = 0, received_bytes = 0;

  protected final DiagnosticsHandler.ProbeHandler probe_handler = new MyProbeHandler();

  /**
   * Creates a JChannel without a protocol stack; used for programmatic creation of channel and
   * protocol stack
   *
   * @param create_protocol_stack If true, the default configuration will be used. If false, no
   *     protocol stack will be created
   */
  public JChannel(boolean create_protocol_stack) {
    if (create_protocol_stack) {
      try {
        init(ConfiguratorFactory.getStackConfigurator(DEFAULT_PROTOCOL_STACK));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    }
  }

  /**
   * Constructs a {@code JChannel} instance with the protocol stack specified by the {@code
   * DEFAULT_PROTOCOL_STACK} member.
   *
   * @throws Exception If problems occur during the initialization of the protocol stack.
   */
  public JChannel() throws Exception {
    this(DEFAULT_PROTOCOL_STACK);
  }

  /**
   * Constructs a JChannel instance with the protocol stack configuration contained by the specified
   * file.
   *
   * @param properties A file containing a JGroups XML protocol stack configuration.
   * @throws Exception If problems occur during the configuration or initialization of the protocol
   *     stack.
   */
  public JChannel(File properties) throws Exception {
    this(ConfiguratorFactory.getStackConfigurator(properties));
  }

  /**
   * Constructs a JChannel instance with the protocol stack configuration contained by the specified
   * XML element.
   *
   * @param properties An XML element containing a JGroups XML protocol stack configuration.
   * @throws Exception If problems occur during the configuration or initialization of the protocol
   *     stack.
   */
  public JChannel(Element properties) throws Exception {
    this(ConfiguratorFactory.getStackConfigurator(properties));
  }

  /**
   * Constructs a JChannel instance with the protocol stack configuration indicated by the specified
   * URL.
   *
   * @param properties A URL pointing to a JGroups XML protocol stack configuration.
   * @throws Exception If problems occur during the configuration or initialization of the protocol
   *     stack.
   */
  public JChannel(URL properties) throws Exception {
    this(ConfiguratorFactory.getStackConfigurator(properties));
  }

  /**
   * Constructs a JChannel instance with the protocol stack configuration based upon the specified
   * properties parameter.
   *
   * @param props A file containing a JGroups XML configuration, a URL pointing to an XML
   *     configuration, or an old style plain configuration string.
   * @throws Exception If problems occur during the configuration or initialization of the protocol
   *     stack.
   */
  public JChannel(String props) throws Exception {
    this(ConfiguratorFactory.getStackConfigurator(props));
  }

  /**
   * Creates a channel with a configuration based on an input stream.
   *
   * @param input An input stream, pointing to a streamed configuration
   * @throws Exception
   */
  public JChannel(InputStream input) throws Exception {
    this(ConfiguratorFactory.getStackConfigurator(input));
  }

  /**
   * Constructs a JChannel with the protocol stack configuration contained by the protocol stack
   * configurator parameter.
   *
   * <p>All of the public constructors of this class eventually delegate to this method.
   *
   * @param configurator A protocol stack configurator containing a JGroups protocol stack
   *     configuration.
   * @throws Exception If problems occur during the initialization of the protocol stack.
   */
  public JChannel(ProtocolStackConfigurator configurator) throws Exception {
    init(configurator);
  }

  /**
   * Creates a channel from an array of protocols. Note that after a {@link
   * org.jgroups.JChannel#close()}, the protocol list <em>should not</em> be reused, ie. new
   * JChannel(protocols) would reuse the same protocol list, and this might lead to problems !
   *
   * @param protocols The list of protocols, from bottom to top, ie. the first protocol in the list
   *     is the transport, the last the top protocol
   * @throws Exception
   */
  public JChannel(Protocol... protocols) throws Exception {
    this(Arrays.asList(protocols));
  }

  /**
   * Creates a channel from an array of protocols. Note that after a {@link
   * org.jgroups.JChannel#close()}, the protocol list <em>should not</em> be reused, ie. new
   * JChannel(protocols) would reuse the same protocol list, and this might lead to problems !
   *
   * @param protocols The list of protocols, from bottom to top, ie. the first protocol in the list
   *     is the transport, the last the top protocol
   * @throws Exception
   */
  public JChannel(Collection<Protocol> protocols) throws Exception {
    prot_stack = new ProtocolStack();
    setProtocolStack(prot_stack);
    for (Protocol prot : protocols) {
      prot_stack.addProtocol(prot);
      prot.setProtocolStack(prot_stack);
    }
    prot_stack.init();

    // Substitute vars with defined system props (if any)
    List<Protocol> prots = prot_stack.getProtocols();
    Map<String, String> map = new HashMap<>();
    for (Protocol prot : prots) Configurator.resolveAndAssignFields(prot, map);
  }

  /**
   * Creates a channel with the same configuration as the channel passed to this constructor. This
   * is used by testing code, and should not be used by clients !
   *
   * @param ch
   * @throws Exception
   */
  public JChannel(JChannel ch) throws Exception {
    init(ch);
    discard_own_messages = ch.discard_own_messages;
  }

  /** Returns the protocol stack */
  public ProtocolStack getProtocolStack() {
    return prot_stack;
  }

  public void setProtocolStack(ProtocolStack stack) {
    this.prot_stack = stack;
    if (prot_stack != null) prot_stack.setChannel(this);
  }

  /**
   * Returns the protocol stack configuration in string format. An example of this property is<br>
   * "UDP:PING:FD:STABLE:NAKACK:UNICAST:FRAG:FLUSH:GMS:VIEW_ENFORCER:STATE_TRANSFER:QUEUE"
   */
  public String getProperties() {
    return prot_stack != null ? prot_stack.printProtocolSpec(true) : null;
  }

  public boolean statsEnabled() {
    return stats;
  }

  public void enableStats(boolean stats) {
    this.stats = stats;
  }

  @ManagedOperation
  public void resetStats() {
    sent_msgs = received_msgs = sent_bytes = received_bytes = 0;
  }

  @ManagedAttribute
  public long getSentMessages() {
    return sent_msgs;
  }

  @ManagedAttribute
  public long getSentBytes() {
    return sent_bytes;
  }

  @ManagedAttribute
  public long getReceivedMessages() {
    return received_msgs;
  }

  @ManagedAttribute
  public long getReceivedBytes() {
    return received_bytes;
  }

  @ManagedAttribute
  public int getNumberOfTasksInTimer() {
    TimeScheduler timer = getTimer();
    return timer != null ? timer.size() : -1;
  }

  @ManagedAttribute
  public int getTimerThreads() {
    TimeScheduler timer = getTimer();
    return timer != null ? timer.getMinThreads() : -1;
  }

  @ManagedOperation
  public String dumpTimerQueue() {
    TimeScheduler timer = getTimer();
    return timer != null ? timer.dumpTimerTasks() : "<n/a";
  }

  /**
   * Returns a pretty-printed form of all the protocols. If include_properties is set, the
   * properties for each protocol will also be printed.
   */
  @ManagedOperation
  public String printProtocolSpec(boolean include_properties) {
    ProtocolStack ps = getProtocolStack();
    return ps != null ? ps.printProtocolSpec(include_properties) : null;
  }

  @ManagedOperation(description = "Connects the channel to a group")
  public synchronized void connect(String cluster_name) throws Exception {
    connect(cluster_name, true);
  }

  /**
   * Connects the channel to a group.
   *
   * @see JChannel#connect(String)
   */
  @ManagedOperation(description = "Connects the channel to a group")
  protected synchronized void connect(String cluster_name, boolean useFlushIfPresent)
      throws Exception {
    if (!_preConnect(cluster_name)) return;

    if (cluster_name != null) { // only connect if we are not a unicast channel
      Event connect_event =
          useFlushIfPresent
              ? new Event(Event.CONNECT_USE_FLUSH, cluster_name)
              : new Event(Event.CONNECT, cluster_name);
      _connect(connect_event);
    }
    state = State.CONNECTED;
    notifyChannelConnected(this);
  }

  public synchronized void connect(String cluster_name, Address target, long timeout)
      throws Exception {
    connect(cluster_name, target, timeout, true);
  }

  /**
   * Connects this channel to a group and gets a state from a specified state provider.
   *
   * <p>This method invokes {@code connect()} and then {@code getState}.
   *
   * <p>If the FLUSH protocol is in the channel's stack definition, only one flush round is executed
   * for both connecting and fetching the state rather than two flushes if we invoke {@code connect}
   * and {@code getState} in succession.
   *
   * <p>If the channel is already connected, an error message will be printed to the error log. If
   * the channel is closed a ChannelClosed exception will be thrown.
   *
   * @param cluster_name The cluster name to connect to. Cannot be null.
   * @param target The state provider. If null, the state will be fetched from the coordinator,
   *     unless this channel is the coordinator.
   * @param timeout The timeout for the state transfer.
   * @exception Exception The protocol stack cannot be started, or the JOIN failed
   * @exception IllegalStateException The channel is closed or disconnected
   * @exception StateTransferException State transfer was not successful
   */
  public synchronized void connect(
      String cluster_name, Address target, long timeout, boolean useFlushIfPresent)
      throws Exception {
    if (!_preConnect(cluster_name)) return;

    if (cluster_name == null) { // only connect if we are not a unicast channel
      state = State.CONNECTED;
      return;
    }

    boolean canFetchState = false;
    try {
      Event connect_event =
          useFlushIfPresent
              ? new Event(Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH, cluster_name)
              : new Event(Event.CONNECT_WITH_STATE_TRANSFER, cluster_name);
      _connect(connect_event);
      state = State.CONNECTED;
      notifyChannelConnected(this);
      canFetchState = getView() != null && getView().size() > 1;

      // if I am not the only member in cluster then
      if (canFetchState) getState(target, timeout, false); // fetch state from target
    } finally {
      // stopFlush if we fetched the state or failed to connect...
      if ((flushSupported() && useFlushIfPresent) && (canFetchState || state != State.CONNECTED))
        stopFlush();
    }
  }

  @ManagedOperation(description = "Disconnects the channel if connected")
  public synchronized void disconnect() {
    switch (state) {
      case OPEN:
      case CLOSED:
        return;
      case CONNECTING:
      case CONNECTED:
        if (cluster_name != null) {
          // Send down a DISCONNECT event, which travels down to the GMS, where a response is
          // returned
          try {
            down(new Event(Event.DISCONNECT, local_addr)); // DISCONNECT is handled by each layer
          } catch (Throwable t) {
            log.error(Util.getMessage("DisconnectFailure"), local_addr, t);
          }
        }
        state = State.OPEN;
        stopStack(true, false);
        notifyChannelDisconnected(this);
        init(); // sets local_addr=null; changed March 18 2003 (bela) -- prevented successful
                // rejoining
        break;
      default:
        throw new IllegalStateException("state " + state + " unknown");
    }
  }

  @ManagedOperation(description = "Disconnects and destroys the channel")
  public synchronized void close() {
    _close(true); // by default disconnect before closing channel and close mq
  }

  @ManagedOperation
  public Map<String, Object> dumpStats() {
    Map<String, Object> retval = prot_stack.dumpStats();
    if (retval != null) {
      Map<String, Long> tmp = dumpChannelStats();
      if (tmp != null) retval.put("channel", tmp);
    }
    return retval;
  }

  public Map<String, Object> dumpStats(String protocol_name, List<String> attrs) {
    return prot_stack.dumpStats(protocol_name, attrs);
  }

  @ManagedOperation
  public Map<String, Object> dumpStats(String protocol_name) {
    return prot_stack.dumpStats(protocol_name, null);
  }

  protected Map<String, Long> dumpChannelStats() {
    Map<String, Long> retval = new HashMap<>();
    retval.put("sent_msgs", sent_msgs);
    retval.put("sent_bytes", sent_bytes);
    retval.put("received_msgs", received_msgs);
    retval.put("received_bytes", received_bytes);
    return retval;
  }

  public void send(Message msg) throws Exception {
    checkClosedOrNotConnected();
    if (msg == null) throw new NullPointerException("msg is null");
    down(new Event(Event.MSG, msg));
  }

  public void send(Address dst, Object obj) throws Exception {
    send(new Message(dst, obj));
  }

  public void send(Address dst, byte[] buf) throws Exception {
    send(new Message(dst, buf));
  }

  public void send(Address dst, byte[] buf, int offset, int length) throws Exception {
    send(new Message(dst, buf, offset, length));
  }

  public View getView() {
    return state == State.CONNECTED ? my_view : null;
  }

  @ManagedAttribute(name = "view")
  public String getViewAsString() {
    View v = getView();
    return v != null ? v.toString() : "n/a";
  }

  @ManagedAttribute
  public static String getVersion() {
    return Version.printDescription();
  }

  public Address getAddress() {
    return state == State.CLOSED ? null : local_addr;
  }

  @ManagedAttribute(name = "address")
  public String getAddressAsString() {
    return local_addr != null ? local_addr.toString() : "n/a";
  }

  @ManagedAttribute(name = "address_uuid")
  public String getAddressAsUUID() {
    return local_addr instanceof UUID ? ((UUID) local_addr).toStringLong() : null;
  }

  public String getName() {
    return name;
  }

  public String getName(Address member) {
    return member != null ? UUID.get(member) : null;
  }

  @ManagedAttribute(
      writable = true,
      description =
          "The logical name of this channel. Stays with the channel until "
              + "the channel is closed")
  public void setName(String name) {
    if (name != null) {
      if (isConnected())
        throw new IllegalStateException(
            "name cannot be set if channel is connected (should be done before)");
      this.name = name;
      if (local_addr != null) UUID.add(local_addr, this.name);
    }
  }

  public JChannel name(String name) {
    setName(name);
    return this;
  }

  public JChannel receiver(Receiver r) {
    setReceiver(r);
    return this;
  }

  @ManagedAttribute(description = "Returns cluster name this channel is connected to")
  public String getClusterName() {
    return state == State.CONNECTED ? cluster_name : null;
  }

  /**
   * Sets the new {@link AddressGenerator}. New addresses will be generated using the new generator.
   * This should <em>not</em> be done while a channel is connected, but before connecting.
   *
   * @param address_generator
   * @since 2.12
   */
  public void addAddressGenerator(AddressGenerator address_generator) {
    if (address_generator == null) return;
    if (address_generators == null) address_generators = new ArrayList<>(3);
    address_generators.add(address_generator);
  }

  public boolean removeAddressGenerator(AddressGenerator address_generator) {
    return address_generator != null
        && address_generators != null
        && address_generators.remove(address_generator);
  }

  public void getState(Address target, long timeout) throws Exception {
    getState(target, timeout, true);
  }

  /** Retrieves state from the target member. See {@link #getState(Address,long)} for details. */
  public void getState(Address target, long timeout, boolean useFlushIfPresent) throws Exception {
    Callable<Boolean> flusher = () -> Util.startFlush(JChannel.this);
    getState(target, timeout, useFlushIfPresent ? flusher : null);
  }

  protected boolean _preConnect(String cluster_name) throws Exception {
    if (state == State.CONNECTED) {
      if (log.isTraceEnabled()) log.trace("already connected to " + this.cluster_name);
      return false;
    }
    checkClosed();
    setAddress();
    State old_state = state;
    state = State.CONNECTING;
    try {
      startStack(cluster_name);
    } catch (Exception ex) {
      state = old_state;
      throw ex;
    }
    return true;
  }

  protected void _connect(Event connect_event) throws Exception {
    try {
      down(connect_event);
    } catch (Throwable t) {
      stopStack(true, false);
      state = State.OPEN;
      init();
      throw new Exception("connecting to channel \"" + connect_event.getArg() + "\" failed", t);
    }
  }

  protected void getState(Address target, long timeout, Callable<Boolean> flushInvoker)
      throws Exception {
    checkClosedOrNotConnected();
    if (!state_transfer_supported)
      throw new IllegalStateException(
          "fetching state will fail as state transfer is not supported. "
              + "Add one of the state transfer protocols to your configuration");

    if (target == null) target = determineCoordinator();
    if (target != null && local_addr != null && target.equals(local_addr)) {
      log.trace(
          local_addr
              + ": cannot get state from myself ("
              + target
              + "): probably the first member");
      return;
    }

    boolean initiateFlush = flushSupported() && flushInvoker != null;

    if (initiateFlush) {
      boolean successfulFlush = false;
      try {
        successfulFlush = flushInvoker.call();
      } catch (Throwable e) {
        successfulFlush = false; // http://jira.jboss.com/jira/browse/JGRP-759
      }
      if (!successfulFlush)
        throw new IllegalStateException(
            "Node " + local_addr + " could not flush the cluster for state retrieval");
    }

    state_promise.reset();
    StateTransferInfo state_info = new StateTransferInfo(target, timeout);
    long start = System.currentTimeMillis();
    down(new Event(Event.GET_STATE, state_info));
    StateTransferResult result = state_promise.getResult(state_info.timeout);

    if (initiateFlush) stopFlush();

    if (result == null)
      throw new StateTransferException(
          "timeout during state transfer (" + (System.currentTimeMillis() - start) + "ms)");
    if (result.hasException())
      throw new StateTransferException("state transfer failed", result.getException());
  }

  /**
   * Callback method <br>
   * Called by the ProtocolStack when a message is received.
   *
   * @param evt the event carrying the message from the protocol stack
   */
  public Object up(Event evt) {
    switch (evt.getType()) {
      case Event.MSG:
        Message msg = (Message) evt.getArg();
        if (stats) {
          received_msgs++;
          received_bytes += msg.getLength();
        }

        // discard local messages (sent by myself to me)
        if (discard_own_messages
            && local_addr != null
            && msg.getSrc() != null
            && local_addr.equals(msg.getSrc())) return null;
        break;

      case Event.VIEW_CHANGE:
        View tmp = (View) evt.getArg();
        if (tmp instanceof MergeView) my_view = new View(tmp.getViewId(), tmp.getMembers());
        else my_view = tmp;

        // Bela&Vladimir Oct 27th,2006 (JGroups 2.4): we need to set connected=true because a client
        // can
        // call channel.getView() in viewAccepted() callback invoked on this thread (see
        // Event.VIEW_CHANGE handling below)

        // not good: we are only connected when we returned from connect() - bela June 22 2007
        // Changed: when a channel gets a view of which it is a member then it should be
        // connected even if connect() hasn't returned yet ! (bela Noc 2010)
        if (state != State.CONNECTED) state = State.CONNECTED;
        break;

      case Event.CONFIG:
        Map<String, Object> cfg = (Map<String, Object>) evt.getArg();
        if (cfg != null) {
          if (cfg.containsKey("state_transfer")) {
            state_transfer_supported = (Boolean) cfg.get("state_transfer");
          }
          if (cfg.containsKey("flush_supported")) {
            flush_supported = (Boolean) cfg.get("flush_supported");
          }
        }
        break;

      case Event.GET_STATE_OK:
        StateTransferResult result = (StateTransferResult) evt.getArg();
        if (up_handler != null) {
          try {
            Object retval = up_handler.up(evt);
            state_promise.setResult(new StateTransferResult());
            return retval;
          } catch (Throwable t) {
            state_promise.setResult(new StateTransferResult(t));
          }
        }

        if (receiver != null) {
          try {
            if (result.hasBuffer()) {
              byte[] tmp_state = result.getBuffer();
              ByteArrayInputStream input = new ByteArrayInputStream(tmp_state);
              receiver.setState(input);
            }
            state_promise.setResult(result);
          } catch (Throwable t) {
            state_promise.setResult(new StateTransferResult(t));
          }
        }
        break;

      case Event.STATE_TRANSFER_INPUTSTREAM_CLOSED:
        state_promise.setResult((StateTransferResult) evt.getArg());
        break;

      case Event.STATE_TRANSFER_INPUTSTREAM:
        // Oct 13,2006 moved to down() when Event.STATE_TRANSFER_INPUTSTREAM_CLOSED is received
        // state_promise.setResult(is != null? Boolean.TRUE : Boolean.FALSE);

        if (up_handler != null) return up_handler.up(evt);

        InputStream is = (InputStream) evt.getArg();
        if (is != null && receiver != null) {
          try {
            receiver.setState(is);
          } catch (Throwable t) {
            throw new RuntimeException("failed calling setState() in state requester", t);
          }
        }
        break;

      case Event.STATE_TRANSFER_OUTPUTSTREAM:
        if (receiver != null && evt.getArg() != null) {
          try {
            receiver.getState((OutputStream) evt.getArg());
          } catch (Exception e) {
            throw new RuntimeException("failed calling getState() in state provider", e);
          }
        }
        break;

      case Event.GET_LOCAL_ADDRESS:
        return local_addr;

      default:
        break;
    }

    // If UpHandler is installed, pass all events to it and return (UpHandler is e.g. a building
    // block)
    if (up_handler != null) return up_handler.up(evt);

    if (receiver != null) return invokeCallback(evt.getType(), evt.getArg());
    return null;
  }

  /** Callback invoked by the protocol stack to deliver a message batch */
  public void up(MessageBatch batch) {
    if (stats) {
      received_msgs += batch.size();
      received_bytes += batch.length();
    }

    // discard local messages (sent by myself to me)
    if (discard_own_messages
        && local_addr != null
        && batch.sender() != null
        && local_addr.equals(batch.sender())) return;

    for (Message msg : batch) {
      if (up_handler != null) {
        try {
          up_handler.up(new Event(Event.MSG, msg));
        } catch (Throwable t) {
          log.error(Util.getMessage("UpHandlerFailure"), t);
        }
      } else if (receiver != null) {
        try {
          receiver.receive(msg);
        } catch (Throwable t) {
          log.error(Util.getMessage("ReceiverFailure"), t);
        }
      }
    }
  }

  /**
   * Sends an event down the protocol stack. Note that - contrary to {@link #send(Message)}, if the
   * event is a message, no checks are performed whether the channel is closed or disconnected.
   *
   * @param evt the message to send down, encapsulated in an event
   */
  public Object down(Event evt) {
    if (evt == null) return null;
    if (stats && evt.getType() == Event.MSG) {
      sent_msgs++;
      sent_bytes += ((Message) evt.getArg()).getLength();
    }
    return prot_stack.down(evt);
  }

  @ManagedOperation
  public String toString(boolean details) {
    StringBuilder sb = new StringBuilder();
    sb.append("local_addr=")
        .append(local_addr)
        .append('\n')
        .append("cluster_name=")
        .append(cluster_name)
        .append('\n')
        .append("my_view=")
        .append(my_view)
        .append('\n')
        .append("state=")
        .append(state)
        .append('\n');
    if (details) {
      sb.append("discard_own_messages=").append(discard_own_messages).append('\n');
      sb.append("state_transfer_supported=").append(state_transfer_supported).append('\n');
      sb.append("props=").append(getProperties()).append('\n');
    }

    return sb.toString();
  }

  /* ----------------------------------- Private Methods ------------------------------------- */
  protected Object invokeCallback(int type, Object arg) {
    switch (type) {
      case Event.MSG:
        receiver.receive((Message) arg);
        break;
      case Event.VIEW_CHANGE:
        receiver.viewAccepted((View) arg);
        break;
      case Event.SUSPECT:
        receiver.suspect((Address) arg);
        break;
      case Event.GET_APPLSTATE:
        byte[] tmp_state = null;
        if (receiver != null) {
          ByteArrayOutputStream output = new ByteArrayOutputStream(1024);
          try {
            receiver.getState(output);
            tmp_state = output.toByteArray();
          } catch (Exception e) {
            throw new RuntimeException(local_addr + ": failed getting state from application", e);
          }
        }
        return new StateTransferInfo(null, 0L, tmp_state);
      case Event.BLOCK:
        receiver.block();
        return true;
      case Event.UNBLOCK:
        receiver.unblock();
    }
    return null;
  }

  protected final void init(ProtocolStackConfigurator configurator) throws Exception {
    List<ProtocolConfiguration> configs = configurator.getProtocolStack();
    // replace vars with system props
    configs.forEach(ProtocolConfiguration::substituteVariables);

    prot_stack = new ProtocolStack(this);
    prot_stack.setup(configs); // Setup protocol stack (creates protocol, calls init() on them)
  }

  protected final void init(JChannel ch) throws Exception {
    if (ch == null) throw new IllegalArgumentException("channel is null");
    prot_stack = new ProtocolStack(this);
    prot_stack.setup(
        ch.getProtocolStack()); // Setup protocol stack (creates protocol, calls init() on them)
  }

  /**
   * Initializes all variables. Used after <tt>close()</tt> or <tt>disconnect()</tt>, to be ready
   * for new <tt>connect()</tt>
   */
  protected void init() {
    if (local_addr != null) down(new Event(Event.REMOVE_ADDRESS, local_addr));
    local_addr = null;
    cluster_name = null;
    my_view = null;
  }

  protected void startStack(String cluster_name) throws Exception {
    /*make sure the channel is not closed*/
    checkClosed();

    /*make sure we have a valid channel name*/
    if (cluster_name == null) log.debug("cluster_name is null, assuming unicast channel");
    else this.cluster_name = cluster_name;

    if (socket_factory != null) prot_stack.getTopProtocol().setSocketFactory(socket_factory);

    prot_stack.startStack(
        cluster_name, local_addr); // calls start() in all protocols, from top to bottom

    /*create a temporary view, assume this channel is the only member and is the coordinator*/
    List<Address> t = new ArrayList<>(1);
    t.add(local_addr);
    my_view = new View(local_addr, 0, t); // create a dummy view

    TP transport = prot_stack.getTransport();
    transport.registerProbeHandler(probe_handler);
  }

  /**
   * Generates new UUID and sets local address. Sends down a REMOVE_ADDRESS (if existing address was
   * present) and a SET_LOCAL_ADDRESS
   */
  protected void setAddress() {
    Address old_addr = local_addr;
    local_addr = generateAddress();
    if (old_addr != null) down(new Event(Event.REMOVE_ADDRESS, old_addr));
    if (name == null || name.isEmpty()) // generate a logical name if not set
    name = Util.generateLocalName();
    if (name != null && !name.isEmpty()) UUID.add(local_addr, name);

    Event evt = new Event(Event.SET_LOCAL_ADDRESS, local_addr);
    down(evt);
    if (up_handler != null) up_handler.up(evt);
  }

  protected Address generateAddress() {
    if (address_generators == null || address_generators.isEmpty()) return UUID.randomUUID();
    if (address_generators.size() == 1) return address_generators.get(0).generateAddress();

    // at this point we have multiple AddressGenerators installed
    Address[] addrs = new Address[address_generators.size()];
    for (int i = 0; i < addrs.length; i++) addrs[i] = address_generators.get(i).generateAddress();

    for (int i = 0; i < addrs.length; i++) {
      if (!(addrs[i] instanceof ExtendedUUID)) {
        log.error(
            "address generator %s does not subclass %s which is required if multiple address generators "
                + "are installed, removing it",
            addrs[i].getClass().getSimpleName(), ExtendedUUID.class.getSimpleName());
        addrs[i] = null;
      }
    }
    ExtendedUUID uuid = null;
    for (int i = 0; i < addrs.length; i++) { // we only have ExtendedUUIDs in addrs
      if (addrs[i] != null) {
        if (uuid == null) uuid = (ExtendedUUID) addrs[i];
        else uuid.addContents((ExtendedUUID) addrs[i]);
      }
    }
    return uuid != null ? uuid : UUID.randomUUID();
  }

  /**
   * health check<br>
   * throws a ChannelClosed exception if the channel is closed
   */
  protected void checkClosed() {
    if (state == State.CLOSED) throw new IllegalStateException("channel is closed");
  }

  protected void checkClosedOrNotConnected() {
    if (state == State.CLOSED) throw new IllegalStateException("channel is closed");

    if (!(state == State.CONNECTING || state == State.CONNECTED))
      throw new IllegalStateException("channel is disconnected");
  }

  /**
   * Disconnects and closes the channel. This method does the following things
   *
   * <ol>
   *   <li>Calls {@code this.disconnect} if the disconnect parameter is true
   *   <li>Calls {@code ProtocolStack.stop} on the protocol stack
   *   <li>Calls {@code ProtocolStack.destroy} on the protocol stack
   *   <li>Sets the channel closed and channel connected flags to true and false
   *   <li>Notifies any channel listener of the channel close operation
   * </ol>
   */
  protected void _close(boolean disconnect) {
    Address old_addr = local_addr;
    if (state == State.CLOSED) return;

    if (disconnect) disconnect(); // leave group if connected
    stopStack(true, true);
    state = State.CLOSED;
    notifyChannelClosed(this);
    init(); // sets local_addr=null; changed March 18 2003 (bela) -- prevented successful rejoining
    if (old_addr != null) UUID.remove(old_addr);
  }

  protected void stopStack(boolean stop, boolean destroy) {
    if (prot_stack != null) {
      try {
        if (stop) prot_stack.stopStack(cluster_name);

        if (destroy) prot_stack.destroy();
      } catch (Exception e) {
        log.error(Util.getMessage("StackDestroyFailure"), e);
      }

      TP transport = prot_stack.getTransport();
      if (transport != null) transport.unregisterProbeHandler(probe_handler);
    }
  }

  public boolean flushSupported() {
    return flush_supported;
  }

  public void startFlush(boolean automatic_resume) throws Exception {
    if (!flushSupported())
      throw new IllegalStateException(
          "Flush is not supported, add pbcast.FLUSH protocol to your configuration");

    try {
      down(new Event(Event.SUSPEND));
    } catch (Exception e) {
      throw new Exception("Flush failed", e.getCause());
    } finally {
      if (automatic_resume) stopFlush();
    }
  }

  public void startFlush(List<Address> flushParticipants, boolean automatic_resume)
      throws Exception {
    if (!flushSupported())
      throw new IllegalStateException(
          "Flush is not supported, add pbcast.FLUSH protocol to your configuration");
    View v = getView();
    boolean validParticipants = v != null && v.getMembers().containsAll(flushParticipants);
    if (!validParticipants)
      throw new IllegalArgumentException(
          "Current view " + v + " does not contain all flush participants " + flushParticipants);
    try {
      down(new Event(Event.SUSPEND, flushParticipants));
    } catch (Exception e) {
      throw new Exception("Flush failed", e.getCause());
    } finally {
      if (automatic_resume) stopFlush(flushParticipants);
    }
  }

  public void stopFlush() {
    if (!flushSupported())
      throw new IllegalStateException(
          "Flush is not supported, add pbcast.FLUSH protocol to your configuration");
    down(new Event(Event.RESUME));
  }

  public void stopFlush(List<Address> flushParticipants) {
    if (!flushSupported())
      throw new IllegalStateException(
          "Flush is not supported, add pbcast.FLUSH protocol to your configuration");
    down(new Event(Event.RESUME, flushParticipants));
  }

  Address determineCoordinator() {
    List<Address> mbrs = my_view != null ? my_view.getMembers() : null;
    if (mbrs == null) return null;
    if (!mbrs.isEmpty()) return mbrs.iterator().next();
    return null;
  }

  protected TimeScheduler getTimer() {
    if (prot_stack != null) {
      TP transport = prot_stack.getTransport();
      if (transport != null) return transport.getTimer();
    }
    return null;
  }

  /* ------------------------------- End of Private Methods ---------------------------------- */

  class MyProbeHandler implements DiagnosticsHandler.ProbeHandler {

    public Map<String, String> handleProbe(String... keys) {
      Map<String, String> map = new HashMap<>(3);
      for (String key : keys) {
        if (key.startsWith("jmx")) {
          handleJmx(map, key);
          continue;
        }
        if (key.startsWith("reset-stats")) {
          resetAllStats();
          continue;
        }
        if (key.startsWith("invoke") || key.startsWith("op")) {
          int index = key.indexOf("=");
          if (index != -1) {
            try {
              handleOperation(map, key.substring(index + 1));
            } catch (Throwable throwable) {
              log.error(
                  Util.getMessage("OperationInvocationFailure"),
                  key.substring(index + 1),
                  throwable);
            }
          }
        }
      }
      return map;
    }

    public String[] supportedKeys() {
      return new String[] {"reset-stats", "jmx", "op=<operation>[<args>]"};
    }

    protected void resetAllStats() {
      List<Protocol> prots = getProtocolStack().getProtocols();
      prots.forEach(Protocol::resetStatistics);
      resetStats();
    }

    protected void handleJmx(Map<String, String> map, String input) {
      Map<String, Object> tmp_stats;
      int index = input.indexOf("=");
      if (index > -1) {
        List<String> list = null;
        String protocol_name = input.substring(index + 1);
        index = protocol_name.indexOf(".");
        if (index > -1) {
          String rest = protocol_name;
          protocol_name = protocol_name.substring(0, index);
          String attrs = rest.substring(index + 1); // e.g. "num_sent,msgs,num_received_msgs"
          list = Util.parseStringList(attrs, ",");

          // check if there are any attribute-sets in the list
          for (Iterator<String> it = list.iterator(); it.hasNext(); ) {
            String tmp = it.next();
            index = tmp.indexOf("=");
            if (index != -1) {
              String attrname = tmp.substring(0, index);
              String attrvalue = tmp.substring(index + 1);
              Protocol prot = prot_stack.findProtocol(protocol_name);
              Field field = prot != null ? Util.getField(prot.getClass(), attrname) : null;
              if (field != null) {
                Object value = MethodCall.convert(attrvalue, field.getType());
                if (value != null) prot.setValue(attrname, value);
              } else {
                // try to find a setter for X, e.g. x(type-of-x) or setX(type-of-x)
                ResourceDMBean.Accessor setter =
                    ResourceDMBean.findSetter(
                        prot, attrname); // Util.getSetter(prot.getClass(), attrname);
                if (setter != null) {
                  try {
                    Class<?> type =
                        setter instanceof ResourceDMBean.FieldAccessor
                            ? ((ResourceDMBean.FieldAccessor) setter).getField().getType()
                            : setter instanceof ResourceDMBean.MethodAccessor
                                ? ((ResourceDMBean.MethodAccessor) setter)
                                    .getMethod()
                                    .getParameterTypes()[0]
                                    .getClass()
                                : null;
                    Object converted_value = MethodCall.convert(attrvalue, type);
                    setter.invoke(converted_value);
                  } catch (Exception e) {
                    log.error("unable to invoke %s() on %s: %s", setter, protocol_name, e);
                  }
                } else log.warn(Util.getMessage("FieldNotFound"), attrname, protocol_name);
              }

              it.remove();
            }
          }
        }
        tmp_stats = dumpStats(protocol_name, list);
        if (tmp_stats != null) {
          for (Map.Entry<String, Object> entry : tmp_stats.entrySet()) {
            Map<String, Object> tmp_map = (Map<String, Object>) entry.getValue();
            String key = entry.getKey();
            map.put(key, tmp_map != null ? tmp_map.toString() : null);
          }
        }
      } else {
        tmp_stats = dumpStats();
        if (tmp_stats != null) {
          for (Map.Entry<String, Object> entry : tmp_stats.entrySet()) {
            Map<String, Object> tmp_map = (Map<String, Object>) entry.getValue();
            String key = entry.getKey();
            map.put(key, tmp_map != null ? tmp_map.toString() : null);
          }
        }
      }
    }

    /**
     * Invokes an operation and puts the return value into map
     *
     * @param map
     * @param operation Protocol.OperationName[args], e.g. STABLE.foo[arg1 arg2 arg3]
     */
    protected void handleOperation(Map<String, String> map, String operation) throws Exception {
      int index = operation.indexOf(".");
      if (index == -1)
        throw new IllegalArgumentException(
            "operation " + operation + " is missing the protocol name");
      String prot_name = operation.substring(0, index);
      Protocol prot = prot_stack.findProtocol(prot_name);
      if (prot == null) return; // less drastic than throwing an exception...

      int args_index = operation.indexOf("[");
      String method_name;
      if (args_index != -1) method_name = operation.substring(index + 1, args_index).trim();
      else method_name = operation.substring(index + 1).trim();

      String[] args = null;
      if (args_index != -1) {
        int end_index = operation.indexOf("]");
        if (end_index == -1) throw new IllegalArgumentException("] not found");
        List<String> str_args =
            Util.parseCommaDelimitedStrings(operation.substring(args_index + 1, end_index));
        Object[] strings = str_args.toArray();
        args = new String[strings.length];
        for (int i = 0; i < strings.length; i++) args[i] = (String) strings[i];
      }

      Method method = MethodCall.findMethod(prot.getClass(), method_name, args);
      if (method == null) {
        log.warn(
            Util.getMessage("MethodNotFound"),
            local_addr,
            prot.getClass().getSimpleName(),
            method_name);
        return;
      }
      MethodCall call = new MethodCall(method);
      Object[] converted_args = null;
      if (args != null) {
        converted_args = new Object[args.length];
        Class<?>[] types = method.getParameterTypes();
        for (int i = 0; i < args.length; i++)
          converted_args[i] = MethodCall.convert(args[i], types[i]);
      }
      Object retval = call.invoke(prot, converted_args);
      if (retval != null) map.put(prot_name + "." + method_name, retval.toString());
    }
  }
}
Exemplo n.º 3
0
/**
 * Reliable unicast layer. Uses acknowledgement scheme similar to TCP to provide lossless
 * transmission of unicast messages (for reliable multicast see NAKACK layer). When a message is
 * sent to a peer for the first time, we add the pair <peer_addr, Entry> to the hashtable (peer
 * address is the key). All messages sent to that peer will be added to
 * hashtable.peer_addr.sent_msgs. When we receive a message from a peer for the first time, another
 * entry will be created and added to the hashtable (unless already existing). Msgs will then be
 * added to hashtable.peer_addr.received_msgs.
 *
 * <p>This layer is used to reliably transmit point-to-point messages, that is, either messages sent
 * to a single receiver (vs. messages multicast to a group) or for example replies to a multicast
 * message. The sender uses an <code>AckSenderWindow</code> which retransmits messages for which it
 * hasn't received an ACK, the receiver uses <code>AckReceiverWindow</code> which keeps track of the
 * lowest seqno received so far, and keeps messages in order.
 *
 * <p>Messages in both AckSenderWindows and AckReceiverWindows will be removed. A message will be
 * removed from AckSenderWindow when an ACK has been received for it and messages will be removed
 * from AckReceiverWindow whenever a message is received: the new message is added and then we try
 * to remove as many messages as possible (until we stop at a gap, or there are no more messages).
 *
 * @author Bela Ban
 */
@MBean(description = "Reliable unicast layer")
public class UNICAST extends Protocol implements AgeOutCache.Handler<Address> {
  public static final long DEFAULT_FIRST_SEQNO = Global.DEFAULT_FIRST_UNICAST_SEQNO;

  /* ------------------------------------------ Properties  ------------------------------------------ */

  @Deprecated
  protected int[] timeout = {
    400, 800, 1600, 3200
  }; // for AckSenderWindow: max time to wait for missing acks

  @Property(
      description =
          "Max number of messages to be removed from a retransmit window. This property might "
              + "get removed anytime, so don't use it !")
  protected int max_msg_batch_size = 500;

  @Property(
      description =
          "Time (in milliseconds) after which an idle incoming or outgoing connection is closed. The "
              + "connection will get re-established when used again. 0 disables connection reaping")
  protected long conn_expiry_timeout = 0;

  @Deprecated
  @Property(
      description =
          "Size (in bytes) of a Segment in the segments table. Only for experts, do not use !",
      deprecatedMessage = "not used anymore")
  protected int segment_capacity = 1000;

  @Property(
      description = "Number of rows of the matrix in the retransmission table (only for experts)",
      writable = false)
  protected int xmit_table_num_rows = 100;

  @Property(
      description =
          "Number of elements of a row of the matrix in the retransmission table (only for experts). "
              + "The capacity of the matrix is xmit_table_num_rows * xmit_table_msgs_per_row",
      writable = false)
  protected int xmit_table_msgs_per_row = 1000;

  @Property(
      description = "Resize factor of the matrix in the retransmission table (only for experts)",
      writable = false)
  protected double xmit_table_resize_factor = 1.2;

  @Property(
      description =
          "Number of milliseconds after which the matrix in the retransmission table "
              + "is compacted (only for experts)",
      writable = false)
  protected long xmit_table_max_compaction_time = 10 * 60 * 1000;

  // @Property(description="Max time (in ms) after which a connection to a non-member is closed")
  protected long max_retransmit_time = 60 * 1000L;

  @Property(
      description = "Interval (in milliseconds) at which messages in the send windows are resent")
  protected long xmit_interval = 2000;

  /* --------------------------------------------- JMX  ---------------------------------------------- */

  protected long num_msgs_sent = 0, num_msgs_received = 0;
  protected long num_acks_sent = 0, num_acks_received = 0, num_xmits = 0;

  /* --------------------------------------------- Fields ------------------------------------------------ */

  protected final ConcurrentMap<Address, SenderEntry> send_table = Util.createConcurrentMap();
  protected final ConcurrentMap<Address, ReceiverEntry> recv_table = Util.createConcurrentMap();

  protected final ReentrantLock recv_table_lock = new ReentrantLock();

  /** RetransmitTask running every xmit_interval ms */
  protected Future<?> xmit_task;

  protected volatile List<Address> members = new ArrayList<Address>(11);

  protected Address local_addr = null;

  protected TimeScheduler timer = null; // used for retransmissions (passed to AckSenderWindow)

  protected volatile boolean running = false;

  protected short last_conn_id = 0;

  protected AgeOutCache<Address> cache = null;

  protected Future<?> connection_reaper; // closes idle connections

  public int[] getTimeout() {
    return timeout;
  }

  @Deprecated
  @Property(
      name = "timeout",
      converter = PropertyConverters.IntegerArray.class,
      deprecatedMessage = "not used anymore")
  public void setTimeout(int[] val) {
    if (val != null) timeout = val;
  }

  public void setMaxMessageBatchSize(int size) {
    if (size >= 1) max_msg_batch_size = size;
  }

  @ManagedAttribute
  public String getLocalAddress() {
    return local_addr != null ? local_addr.toString() : "null";
  }

  @ManagedAttribute
  public String getMembers() {
    return members.toString();
  }

  @ManagedAttribute(description = "Whether the ConnectionReaper task is running")
  public boolean isConnectionReaperRunning() {
    return connection_reaper != null && !connection_reaper.isDone();
  }

  @ManagedAttribute(description = "Returns the number of outgoing (send) connections")
  public int getNumSendConnections() {
    return send_table.size();
  }

  @ManagedAttribute(description = "Returns the number of incoming (receive) connections")
  public int getNumReceiveConnections() {
    return recv_table.size();
  }

  @ManagedAttribute(
      description =
          "Returns the total number of outgoing (send) and incoming (receive) connections")
  public int getNumConnections() {
    return getNumReceiveConnections() + getNumSendConnections();
  }

  @ManagedOperation
  public String printConnections() {
    StringBuilder sb = new StringBuilder();
    if (!send_table.isEmpty()) {
      sb.append("\nsend connections:\n");
      for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
        sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
      }
    }

    if (!recv_table.isEmpty()) {
      sb.append("\nreceive connections:\n");
      for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
        sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
      }
    }
    return sb.toString();
  }

  @ManagedAttribute
  public long getNumMessagesSent() {
    return num_msgs_sent;
  }

  @ManagedAttribute
  public long getNumMessagesReceived() {
    return num_msgs_received;
  }

  @ManagedAttribute
  public long getNumAcksSent() {
    return num_acks_sent;
  }

  @ManagedAttribute
  public long getNumAcksReceived() {
    return num_acks_received;
  }

  @ManagedAttribute
  public long getNumXmits() {
    return num_xmits;
  }

  public long getMaxRetransmitTime() {
    return max_retransmit_time;
  }

  @Property(
      description =
          "Max number of milliseconds we try to retransmit a message to any given member. After that, "
              + "the connection is removed. Any new connection to that member will start with seqno #1 again. 0 disables this")
  public void setMaxRetransmitTime(long max_retransmit_time) {
    this.max_retransmit_time = max_retransmit_time;
    if (cache != null && max_retransmit_time > 0) cache.setTimeout(max_retransmit_time);
  }

  @ManagedAttribute(description = "Is the retransmit task running")
  public boolean isXmitTaskRunning() {
    return xmit_task != null && !xmit_task.isDone();
  }

  @ManagedAttribute
  public int getAgeOutCacheSize() {
    return cache != null ? cache.size() : 0;
  }

  @ManagedOperation
  public String printAgeOutCache() {
    return cache != null ? cache.toString() : "n/a";
  }

  public AgeOutCache<Address> getAgeOutCache() {
    return cache;
  }

  /** Used for testing only */
  public boolean hasSendConnectionTo(Address dest) {
    return send_table.containsKey(dest);
  }

  /** The number of messages in all Entry.sent_msgs tables (haven't received an ACK yet) */
  @ManagedAttribute
  public int getNumUnackedMessages() {
    int num = 0;
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) num += entry.sent_msgs.size();
    }
    return num;
  }

  @ManagedAttribute
  public int getNumberOfMessagesInReceiveWindows() {
    int num = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) num += entry.received_msgs.size();
    }
    return num;
  }

  @ManagedAttribute(description = "Total number of undelivered messages in all receive windows")
  public long getXmitTableUndeliveredMessages() {
    long retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.size();
    }
    return retval;
  }

  @ManagedAttribute(description = "Total number of missing messages in all receive windows")
  public long getXmitTableMissingMessages() {
    long retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumMissing();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of compactions in all (receive and send) windows")
  public int getXmitTableNumCompactions() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumCompactions();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumCompactions();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of moves in all (receive and send) windows")
  public int getXmitTableNumMoves() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumMoves();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumMoves();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of resizes in all (receive and send) windows")
  public int getXmitTableNumResizes() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumResizes();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumResizes();
    }
    return retval;
  }

  @ManagedAttribute(description = "Number of purges in all (receive and send) windows")
  public int getXmitTableNumPurges() {
    int retval = 0;
    for (ReceiverEntry entry : recv_table.values()) {
      if (entry.received_msgs != null) retval += entry.received_msgs.getNumPurges();
    }
    for (SenderEntry entry : send_table.values()) {
      if (entry.sent_msgs != null) retval += entry.sent_msgs.getNumPurges();
    }
    return retval;
  }

  @ManagedOperation(description = "Prints the contents of the receive windows for all members")
  public String printReceiveWindowMessages() {
    StringBuilder ret = new StringBuilder(local_addr + ":\n");
    for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
      Address addr = entry.getKey();
      Table<Message> buf = entry.getValue().received_msgs;
      ret.append(addr).append(": ").append(buf.toString()).append('\n');
    }
    return ret.toString();
  }

  @ManagedOperation(description = "Prints the contents of the send windows for all members")
  public String printSendWindowMessages() {
    StringBuilder ret = new StringBuilder(local_addr + ":\n");
    for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
      Address addr = entry.getKey();
      Table<Message> buf = entry.getValue().sent_msgs;
      ret.append(addr).append(": ").append(buf.toString()).append('\n');
    }
    return ret.toString();
  }

  public void resetStats() {
    num_msgs_sent = num_msgs_received = num_acks_sent = num_acks_received = 0;
    num_xmits = 0;
  }

  public Map<String, Object> dumpStats() {
    Map<String, Object> m = super.dumpStats();
    m.put("num_unacked_msgs", getNumUnackedMessages());
    m.put("num_msgs_in_recv_windows", getNumberOfMessagesInReceiveWindows());
    return m;
  }

  public void start() throws Exception {
    timer = getTransport().getTimer();
    if (timer == null) throw new Exception("timer is null");
    if (max_retransmit_time > 0) cache = new AgeOutCache<Address>(timer, max_retransmit_time, this);
    running = true;
    if (conn_expiry_timeout > 0) startConnectionReaper();
    startRetransmitTask();
  }

  public void stop() {
    running = false;
    stopRetransmitTask();
    stopConnectionReaper();
    removeAllConnections();
  }

  public Object up(Event evt) {
    switch (evt.getType()) {
      case Event.MSG:
        Message msg = (Message) evt.getArg();
        if (msg.getDest() == null
            || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) // only handle unicast messages
        break; // pass up

        UnicastHeader hdr = (UnicastHeader) msg.getHeader(this.id);
        if (hdr == null) break;
        Address sender = msg.getSrc();
        switch (hdr.type) {
          case UnicastHeader.DATA: // received regular message
            handleDataReceived(sender, hdr.seqno, hdr.conn_id, hdr.first, msg, evt);
            break;
          default:
            handleUpEvent(sender, hdr);
            break;
        }
        return null;
    }
    return up_prot.up(evt); // Pass up to the layer above us
  }

  protected void handleUpEvent(Address sender, UnicastHeader hdr) {
    switch (hdr.type) {
      case UnicastHeader.DATA: // received regular message
        throw new IllegalStateException(
            "header of type DATA is not supposed to be handled by this method");
      case UnicastHeader.ACK: // received ACK for previously sent message
        handleAckReceived(sender, hdr.seqno, hdr.conn_id);
        break;
      case UnicastHeader.SEND_FIRST_SEQNO:
        handleResendingOfFirstMessage(sender, hdr.seqno);
        break;
      default:
        log.error("UnicastHeader type " + hdr.type + " not known !");
        break;
    }
  }

  public void up(MessageBatch batch) {
    if (batch.dest() == null) { // not a unicast batch
      up_prot.up(batch);
      return;
    }

    int size = batch.size();
    Map<Short, List<Message>> msgs =
        new TreeMap<Short, List<Message>>(); // map of messages, keyed by conn-id
    for (Message msg : batch) {
      if (msg == null || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) continue;
      UnicastHeader hdr = (UnicastHeader) msg.getHeader(id);
      if (hdr == null) continue;
      batch.remove(msg); // remove the message from the batch, so it won't be passed up the stack

      if (hdr.type != UnicastHeader.DATA) {
        try {
          handleUpEvent(msg.getSrc(), hdr);
        } catch (Throwable t) { // we cannot let an exception terminate the processing of this batch
          log.error(local_addr + ": failed handling event", t);
        }
        continue;
      }

      List<Message> list = msgs.get(hdr.conn_id);
      if (list == null) msgs.put(hdr.conn_id, list = new ArrayList<Message>(size));
      list.add(msg);
    }

    if (!msgs.isEmpty()) handleBatchReceived(batch.sender(), msgs); // process msgs:
    if (!batch.isEmpty()) up_prot.up(batch);
  }

  public Object down(Event evt) {
    switch (evt.getType()) {
      case Event.MSG: // Add UnicastHeader, add to AckSenderWindow and pass down
        Message msg = (Message) evt.getArg();
        Address dst = msg.getDest();

        /* only handle unicast messages */
        if (dst == null || msg.isFlagSet(Message.Flag.NO_RELIABILITY)) break;

        if (!running) {
          if (log.isTraceEnabled())
            log.trace("discarded message as start() has not yet been called, message: " + msg);
          return null;
        }

        SenderEntry entry = send_table.get(dst);
        if (entry == null) {
          entry = new SenderEntry(getNewConnectionId());
          SenderEntry existing = send_table.putIfAbsent(dst, entry);
          if (existing != null) entry = existing;
          else {
            if (log.isTraceEnabled())
              log.trace(
                  local_addr
                      + ": created sender window for "
                      + dst
                      + " (conn-id="
                      + entry.send_conn_id
                      + ")");
            if (cache != null && !members.contains(dst)) cache.add(dst);
          }
        }

        short send_conn_id = entry.send_conn_id;
        long seqno = entry.sent_msgs_seqno.getAndIncrement();
        long sleep = 10;
        do {
          try {
            msg.putHeader(
                this.id,
                UnicastHeader.createDataHeader(seqno, send_conn_id, seqno == DEFAULT_FIRST_SEQNO));
            entry.sent_msgs.add(seqno, msg); // add *including* UnicastHeader, adds to retransmitter
            if (conn_expiry_timeout > 0) entry.update();
            break;
          } catch (Throwable t) {
            if (!running) break;
            Util.sleep(sleep);
            sleep = Math.min(5000, sleep * 2);
          }
        } while (running);

        if (log.isTraceEnabled()) {
          StringBuilder sb = new StringBuilder();
          sb.append(local_addr)
              .append(" --> DATA(")
              .append(dst)
              .append(": #")
              .append(seqno)
              .append(", conn_id=")
              .append(send_conn_id);
          if (seqno == DEFAULT_FIRST_SEQNO) sb.append(", first");
          sb.append(')');
          log.trace(sb);
        }

        num_msgs_sent++;
        return down_prot.down(evt);

      case Event.VIEW_CHANGE: // remove connections to peers that are not members anymore !
        View view = (View) evt.getArg();
        List<Address> new_members = view.getMembers();
        Set<Address> non_members = new HashSet<Address>(send_table.keySet());
        non_members.addAll(recv_table.keySet());

        members = new_members;
        non_members.removeAll(new_members);
        if (cache != null) cache.removeAll(new_members);

        if (!non_members.isEmpty()) {
          if (log.isTraceEnabled()) log.trace("removing non members " + non_members);
          for (Address non_mbr : non_members) removeConnection(non_mbr);
        }
        break;

      case Event.SET_LOCAL_ADDRESS:
        local_addr = (Address) evt.getArg();
        break;
    }

    return down_prot.down(evt); // Pass on to the layer below us
  }

  /**
   * Removes and resets from connection table (which is already locked). Returns true if member was
   * found, otherwise false. This method is public only so it can be invoked by unit testing, but
   * should not otherwise be used !
   */
  public void removeConnection(Address mbr) {
    removeSendConnection(mbr);
    removeReceiveConnection(mbr);
  }

  public void removeSendConnection(Address mbr) {
    send_table.remove(mbr);
  }

  public void removeReceiveConnection(Address mbr) {
    recv_table.remove(mbr);
  }

  /**
   * This method is public only so it can be invoked by unit testing, but should not otherwise be
   * used !
   */
  @ManagedOperation(
      description = "Trashes all connections to other nodes. This is only used for testing")
  public void removeAllConnections() {
    send_table.clear();
    recv_table.clear();
  }

  /** Called by AckSenderWindow to resend messages for which no ACK has been received yet */
  public void retransmit(Message msg) {
    if (log.isTraceEnabled()) {
      UnicastHeader hdr = (UnicastHeader) msg.getHeader(id);
      long seqno = hdr != null ? hdr.seqno : -1;
      log.trace(local_addr + " --> XMIT(" + msg.getDest() + ": #" + seqno + ')');
    }
    down_prot.down(new Event(Event.MSG, msg));
    num_xmits++;
  }

  /**
   * Called by AgeOutCache, to removed expired connections
   *
   * @param key
   */
  public void expired(Address key) {
    if (key != null) {
      if (log.isDebugEnabled()) log.debug("removing connection to " + key + " because it expired");
      removeConnection(key);
    }
  }

  /**
   * Check whether the hashtable contains an entry e for <code>sender</code> (create if not). If
   * e.received_msgs is null and <code>first</code> is true: create a new AckReceiverWindow(seqno)
   * and add message. Set e.received_msgs to the new window. Else just add the message.
   */
  protected void handleDataReceived(
      Address sender, long seqno, short conn_id, boolean first, Message msg, Event evt) {
    if (log.isTraceEnabled()) {
      StringBuilder sb = new StringBuilder();
      sb.append(local_addr).append(" <-- DATA(").append(sender).append(": #").append(seqno);
      if (conn_id != 0) sb.append(", conn_id=").append(conn_id);
      if (first) sb.append(", first");
      sb.append(')');
      log.trace(sb);
    }

    ReceiverEntry entry = getReceiverEntry(sender, seqno, first, conn_id);
    if (entry == null) return;
    if (conn_expiry_timeout > 0) entry.update();
    Table<Message> win = entry.received_msgs;
    boolean added = win.add(seqno, msg); // win is guaranteed to be non-null if we get here
    num_msgs_received++;

    // An OOB message is passed up immediately. Later, when remove() is called, we discard it. This
    // affects ordering !
    // http://jira.jboss.com/jira/browse/JGRP-377
    if (msg.isFlagSet(Message.Flag.OOB) && added) {
      try {
        up_prot.up(evt);
      } catch (Throwable t) {
        log.error("couldn't deliver OOB message " + msg, t);
      }
    }

    final AtomicBoolean processing = win.getProcessing();
    if (!processing.compareAndSet(false, true)) {
      return;
    }

    // try to remove (from the AckReceiverWindow) as many messages as possible as pass them up

    // Prevents concurrent passing up of messages by different threads
    // (http://jira.jboss.com/jira/browse/JGRP-198);
    // this is all the more important once we have a concurrent stack
    // (http://jira.jboss.com/jira/browse/JGRP-181),
    // where lots of threads can come up to this point concurrently, but only 1 is allowed to pass
    // at a time
    // We *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1,
    // P2, Q2 can result in
    // delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to be delivered
    // only in the
    // order in which they were sent by their senders
    removeAndDeliver(processing, win, sender);
    sendAck(sender, win.getHighestDelivered(), conn_id);
  }

  protected void handleBatchReceived(Address sender, Map<Short, List<Message>> map) {
    for (Map.Entry<Short, List<Message>> element : map.entrySet()) {
      final List<Message> msg_list = element.getValue();
      if (log.isTraceEnabled()) {
        StringBuilder sb = new StringBuilder();
        sb.append(local_addr)
            .append(" <-- DATA(")
            .append(sender)
            .append(": " + printMessageList(msg_list))
            .append(')');
        log.trace(sb);
      }

      short conn_id = element.getKey();
      ReceiverEntry entry = null;
      for (Message msg : msg_list) {
        UnicastHeader hdr = (UnicastHeader) msg.getHeader(id);
        entry = getReceiverEntry(sender, hdr.seqno, hdr.first, conn_id);
        if (entry == null) continue;
        Table<Message> win = entry.received_msgs;
        boolean msg_added =
            win.add(hdr.seqno, msg); // win is guaranteed to be non-null if we get here
        num_msgs_received++;

        if (hdr.first && msg_added)
          sendAck(
              sender, hdr.seqno,
              conn_id); // send an ack immediately when we received the first message of a conn

        // An OOB message is passed up immediately. Later, when remove() is called, we discard it.
        // This affects ordering !
        // http://jira.jboss.com/jira/browse/JGRP-377
        if (msg.isFlagSet(Message.Flag.OOB) && msg_added) {
          try {
            up_prot.up(new Event(Event.MSG, msg));
          } catch (Throwable t) {
            log.error("couldn't deliver OOB message " + msg, t);
          }
        }
      }
      if (entry != null && conn_expiry_timeout > 0) entry.update();
    }

    ReceiverEntry entry = recv_table.get(sender);
    Table<Message> win = entry != null ? entry.received_msgs : null;
    if (win != null) {
      final AtomicBoolean processing = win.getProcessing();
      if (processing.compareAndSet(false, true)) {
        removeAndDeliver(processing, win, sender);
        sendAck(sender, win.getHighestDeliverable(), entry.recv_conn_id);
      }
    }
  }

  /**
   * Try to remove as many messages as possible from the table as pass them up. Prevents concurrent
   * passing up of messages by different threads (http://jira.jboss.com/jira/browse/JGRP-198); lots
   * of threads can come up to this point concurrently, but only 1 is allowed to pass at a time. We
   * *can* deliver messages from *different* senders concurrently, e.g. reception of P1, Q1, P2, Q2
   * can result in delivery of P1, Q1, Q2, P2: FIFO (implemented by UNICAST) says messages need to
   * be delivered in the order in which they were sent
   */
  protected int removeAndDeliver(
      final AtomicBoolean processing, Table<Message> win, Address sender) {
    int retval = 0;
    boolean released_processing = false;
    try {
      while (true) {
        List<Message> list = win.removeMany(processing, true, max_msg_batch_size);
        if (list == null) {
          released_processing = true;
          return retval;
        }

        MessageBatch batch = new MessageBatch(local_addr, sender, null, false, list);
        for (Message msg_to_deliver : batch) {
          // discard OOB msg: it has already been delivered
          // (http://jira.jboss.com/jira/browse/JGRP-377)
          if (msg_to_deliver.isFlagSet(Message.Flag.OOB)) batch.remove(msg_to_deliver);
        }

        try {
          if (log.isTraceEnabled()) {
            Message first = batch.first(), last = batch.last();
            StringBuilder sb = new StringBuilder(local_addr + ": delivering");
            if (first != null && last != null) {
              UnicastHeader hdr1 = (UnicastHeader) first.getHeader(id),
                  hdr2 = (UnicastHeader) last.getHeader(id);
              sb.append(" #").append(hdr1.seqno).append(" - #").append(hdr2.seqno);
            }
            sb.append(" (" + batch.size()).append(" messages)");
            log.trace(sb);
          }
          up_prot.up(batch);
        } catch (Throwable t) {
          log.error("failed to deliver batch " + batch, t);
        }
      }
    } finally {
      // processing is always set in win.remove(processing) above and never here ! This code is just
      // a
      // 2nd line of defense should there be an exception before win.remove(processing) sets
      // processing
      if (!released_processing) processing.set(false);
    }
  }

  protected ReceiverEntry getReceiverEntry(
      Address sender, long seqno, boolean first, short conn_id) {
    ReceiverEntry entry = recv_table.get(sender);
    if (entry != null && entry.recv_conn_id == conn_id) return entry;

    recv_table_lock.lock();
    try {
      entry = recv_table.get(sender);
      if (first) {
        if (entry == null) {
          entry = getOrCreateReceiverEntry(sender, seqno, conn_id);
        } else { // entry != null && win != null
          if (conn_id != entry.recv_conn_id) {
            if (log.isTraceEnabled())
              log.trace(
                  local_addr
                      + ": conn_id="
                      + conn_id
                      + " != "
                      + entry.recv_conn_id
                      + "; resetting receiver window");

            recv_table.remove(sender);
            entry = getOrCreateReceiverEntry(sender, seqno, conn_id);
          } else {;
          }
        }
      } else { // entry == null && win == null OR entry != null && win == null OR entry != null &&
               // win != null
        if (entry == null || entry.recv_conn_id != conn_id) {
          recv_table_lock.unlock();
          sendRequestForFirstSeqno(sender, seqno); // drops the message and returns (see below)
          return null;
        }
      }
      return entry;
    } finally {
      if (recv_table_lock.isHeldByCurrentThread()) recv_table_lock.unlock();
    }
  }

  protected ReceiverEntry getOrCreateReceiverEntry(Address sender, long seqno, short conn_id) {
    Table<Message> table =
        new Table<Message>(
            xmit_table_num_rows,
            xmit_table_msgs_per_row,
            seqno - 1,
            xmit_table_resize_factor,
            xmit_table_max_compaction_time);
    ReceiverEntry entry = new ReceiverEntry(table, conn_id);
    ReceiverEntry entry2 = recv_table.putIfAbsent(sender, entry);
    if (entry2 != null) return entry2;
    if (log.isTraceEnabled())
      log.trace(
          local_addr
              + ": created receiver window for "
              + sender
              + " at seqno=#"
              + seqno
              + " for conn-id="
              + conn_id);
    return entry;
  }

  protected void handleAckReceived(Address sender, long seqno, short conn_id) {
    if (log.isTraceEnabled())
      log.trace(
          new StringBuilder()
              .append(local_addr)
              .append(" <-- ACK(")
              .append(sender)
              .append(": #")
              .append(seqno)
              .append(", conn-id=")
              .append(conn_id)
              .append(')'));
    SenderEntry entry = send_table.get(sender);

    if (entry != null && entry.send_conn_id != conn_id) {
      if (log.isTraceEnabled())
        log.trace(
            local_addr
                + ": my conn_id ("
                + entry.send_conn_id
                + ") != received conn_id ("
                + conn_id
                + "); discarding ACK");
      return;
    }

    Table<Message> win = entry != null ? entry.sent_msgs : null;
    if (win != null) {
      win.purge(seqno, true); // removes all messages <= seqno (forced purge)
      num_acks_received++;
    }
  }

  /**
   * We need to resend our first message with our conn_id
   *
   * @param sender
   * @param seqno Resend the non null messages in the range [lowest .. seqno]
   */
  protected void handleResendingOfFirstMessage(Address sender, long seqno) {
    if (log.isTraceEnabled())
      log.trace(local_addr + " <-- SEND_FIRST_SEQNO(" + sender + "," + seqno + ")");
    SenderEntry entry = send_table.get(sender);
    Table<Message> win = entry != null ? entry.sent_msgs : null;
    if (win == null) {
      if (log.isWarnEnabled())
        log.warn(local_addr + ": sender window for " + sender + " not found");
      return;
    }

    boolean first_sent = false;
    for (long i = win.getLow() + 1; i <= seqno; i++) {
      Message rsp = win.get(i);
      if (rsp == null) continue;
      if (first_sent) {
        down_prot.down(new Event(Event.MSG, rsp));
      } else {
        first_sent = true;
        // We need to copy the UnicastHeader and put it back into the message because Message.copy()
        // doesn't copy
        // the headers and therefore we'd modify the original message in the sender retransmission
        // window
        // (https://jira.jboss.org/jira/browse/JGRP-965)
        Message copy = rsp.copy();
        UnicastHeader hdr = (UnicastHeader) copy.getHeader(this.id);
        UnicastHeader newhdr = hdr.copy();
        newhdr.first = true;
        copy.putHeader(this.id, newhdr);
        down_prot.down(new Event(Event.MSG, copy));
      }
    }
  }

  protected void startRetransmitTask() {
    if (xmit_task == null || xmit_task.isDone())
      xmit_task =
          timer.scheduleWithFixedDelay(
              new RetransmitTask(), 0, xmit_interval, TimeUnit.MILLISECONDS);
  }

  protected void stopRetransmitTask() {
    if (xmit_task != null) {
      xmit_task.cancel(true);
      xmit_task = null;
    }
  }

  protected void sendAck(Address dst, long seqno, short conn_id) {
    if (!running) // if we are disconnected, then don't send any acks which throw exceptions on
                  // shutdown
    return;
    Message ack =
        new Message(dst)
            .setFlag(Message.Flag.INTERNAL)
            .putHeader(this.id, UnicastHeader.createAckHeader(seqno, conn_id));
    if (log.isTraceEnabled())
      log.trace(
          new StringBuilder()
              .append(local_addr)
              .append(" --> ACK(")
              .append(dst)
              .append(": #")
              .append(seqno)
              .append(')'));
    try {
      down_prot.down(new Event(Event.MSG, ack));
      num_acks_sent++;
    } catch (Throwable t) {
      log.error("failed sending ACK(" + seqno + ") to " + dst, t);
    }
  }

  protected synchronized void startConnectionReaper() {
    if (connection_reaper == null || connection_reaper.isDone())
      connection_reaper =
          timer.scheduleWithFixedDelay(
              new ConnectionReaper(),
              conn_expiry_timeout,
              conn_expiry_timeout,
              TimeUnit.MILLISECONDS);
  }

  protected synchronized void stopConnectionReaper() {
    if (connection_reaper != null) connection_reaper.cancel(false);
  }

  protected synchronized short getNewConnectionId() {
    short retval = last_conn_id;
    if (last_conn_id >= Short.MAX_VALUE || last_conn_id < 0) last_conn_id = 0;
    else last_conn_id++;
    return retval;
  }

  protected void sendRequestForFirstSeqno(Address dest, long seqno_received) {
    Message msg = new Message(dest).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL);
    UnicastHeader hdr = UnicastHeader.createSendFirstSeqnoHeader(seqno_received);
    msg.putHeader(this.id, hdr);
    if (log.isTraceEnabled())
      log.trace(local_addr + " --> SEND_FIRST_SEQNO(" + dest + "," + seqno_received + ")");
    down_prot.down(new Event(Event.MSG, msg));
  }

  @ManagedOperation(
      description = "Closes connections that have been idle for more than conn_expiry_timeout ms")
  public void reapIdleConnections() {
    // remove expired connections from send_table
    for (Map.Entry<Address, SenderEntry> entry : send_table.entrySet()) {
      SenderEntry val = entry.getValue();
      long age = val.age();
      if (age >= conn_expiry_timeout) {
        removeSendConnection(entry.getKey());
        if (log.isDebugEnabled())
          log.debug(
              local_addr
                  + ": removed expired connection for "
                  + entry.getKey()
                  + " ("
                  + age
                  + " ms old) from send_table");
      }
    }

    // remove expired connections from recv_table
    for (Map.Entry<Address, ReceiverEntry> entry : recv_table.entrySet()) {
      ReceiverEntry val = entry.getValue();
      long age = val.age();
      if (age >= conn_expiry_timeout) {
        removeReceiveConnection(entry.getKey());
        if (log.isDebugEnabled())
          log.debug(
              local_addr
                  + ": removed expired connection for "
                  + entry.getKey()
                  + " ("
                  + age
                  + " ms old) from recv_table");
      }
    }
  }

  protected String printMessageList(List<Message> list) {
    StringBuilder sb = new StringBuilder();
    int size = list.size();
    Message first = size > 0 ? list.get(0) : null, second = size > 1 ? list.get(size - 1) : first;
    UnicastHeader hdr;
    if (first != null) {
      hdr = (UnicastHeader) first.getHeader(id);
      if (hdr != null) sb.append("#" + hdr.seqno);
    }
    if (second != null) {
      hdr = (UnicastHeader) second.getHeader(id);
      if (hdr != null) sb.append(" - #" + hdr.seqno);
    }
    return sb.toString();
  }

  /**
   * The following types and fields are serialized:
   *
   * <pre>
   * | DATA | seqno | conn_id | first |
   * | ACK  | seqno |
   * | SEND_FIRST_SEQNO |
   * </pre>
   */
  public static class UnicastHeader extends Header {
    public static final byte DATA = 0;
    public static final byte ACK = 1;
    public static final byte SEND_FIRST_SEQNO = 2;

    byte type;
    long seqno; // DATA and ACK
    short conn_id; // DATA
    boolean first; // DATA

    public UnicastHeader() {} // used for externalization

    public static UnicastHeader createDataHeader(long seqno, short conn_id, boolean first) {
      return new UnicastHeader(DATA, seqno, conn_id, first);
    }

    public static UnicastHeader createAckHeader(long seqno, short conn_id) {
      return new UnicastHeader(ACK, seqno, conn_id, false);
    }

    public static UnicastHeader createSendFirstSeqnoHeader(long seqno_received) {
      return new UnicastHeader(SEND_FIRST_SEQNO, seqno_received);
    }

    protected UnicastHeader(byte type, long seqno) {
      this.type = type;
      this.seqno = seqno;
    }

    protected UnicastHeader(byte type, long seqno, short conn_id, boolean first) {
      this.type = type;
      this.seqno = seqno;
      this.conn_id = conn_id;
      this.first = first;
    }

    public long getSeqno() {
      return seqno;
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append(type2Str(type)).append(", seqno=").append(seqno);
      if (conn_id != 0) sb.append(", conn_id=").append(conn_id);
      if (first) sb.append(", first");
      return sb.toString();
    }

    public static String type2Str(byte t) {
      switch (t) {
        case DATA:
          return "DATA";
        case ACK:
          return "ACK";
        case SEND_FIRST_SEQNO:
          return "SEND_FIRST_SEQNO";
        default:
          return "<unknown>";
      }
    }

    public final int size() {
      int retval = Global.BYTE_SIZE; // type
      switch (type) {
        case DATA:
          retval +=
              Bits.size(seqno) // seqno
                  + Global.SHORT_SIZE // conn_id
                  + Global.BYTE_SIZE; // first
          break;
        case ACK:
          retval += Bits.size(seqno) + Global.SHORT_SIZE; // conn_id
          break;
        case SEND_FIRST_SEQNO:
          retval += Bits.size(seqno);
          break;
      }
      return retval;
    }

    public UnicastHeader copy() {
      return new UnicastHeader(type, seqno, conn_id, first);
    }

    public void writeTo(DataOutput out) throws Exception {
      out.writeByte(type);
      switch (type) {
        case DATA:
          Bits.writeLong(seqno, out);
          out.writeShort(conn_id);
          out.writeBoolean(first);
          break;
        case ACK:
          Bits.writeLong(seqno, out);
          out.writeShort(conn_id);
          break;
        case SEND_FIRST_SEQNO:
          Bits.writeLong(seqno, out);
          break;
      }
    }

    public void readFrom(DataInput in) throws Exception {
      type = in.readByte();
      switch (type) {
        case DATA:
          seqno = Bits.readLong(in);
          conn_id = in.readShort();
          first = in.readBoolean();
          break;
        case ACK:
          seqno = Bits.readLong(in);
          conn_id = in.readShort();
          break;
        case SEND_FIRST_SEQNO:
          seqno = Bits.readLong(in);
          break;
      }
    }
  }

  protected final class SenderEntry {
    // stores (and retransmits) msgs sent by us to a certain peer
    final Table<Message> sent_msgs;
    final AtomicLong sent_msgs_seqno =
        new AtomicLong(DEFAULT_FIRST_SEQNO); // seqno for msgs sent by us
    final short send_conn_id;
    protected final AtomicLong timestamp = new AtomicLong(0);
    final Lock lock = new ReentrantLock();

    public SenderEntry(short send_conn_id) {
      this.send_conn_id = send_conn_id;
      this.sent_msgs =
          new Table<Message>(
              xmit_table_num_rows,
              xmit_table_msgs_per_row,
              0,
              xmit_table_resize_factor,
              xmit_table_max_compaction_time);
      update();
    }

    void update() {
      timestamp.set(System.currentTimeMillis());
    }

    long age() {
      return System.currentTimeMillis() - timestamp.longValue();
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      if (sent_msgs != null) sb.append(sent_msgs).append(", ");
      sb.append("send_conn_id=" + send_conn_id).append(" (" + age() + " ms old)");
      return sb.toString();
    }
  }

  protected static final class ReceiverEntry {
    protected final Table<Message>
        received_msgs; // stores all msgs rcvd by a certain peer in seqno-order
    protected final short recv_conn_id;
    protected final AtomicLong timestamp = new AtomicLong(0);

    public ReceiverEntry(Table<Message> received_msgs, short recv_conn_id) {
      this.received_msgs = received_msgs;
      this.recv_conn_id = recv_conn_id;
      update();
    }

    void update() {
      timestamp.set(System.currentTimeMillis());
    }

    long age() {
      return System.currentTimeMillis() - timestamp.longValue();
    }

    public String toString() {
      StringBuilder sb = new StringBuilder();
      if (received_msgs != null) sb.append(received_msgs).append(", ");
      sb.append("recv_conn_id=" + recv_conn_id);
      sb.append(" (" + age() + " ms old)");
      return sb.toString();
    }
  }

  protected class ConnectionReaper implements Runnable {
    public void run() {
      reapIdleConnections();
    }

    public String toString() {
      return UNICAST.class.getSimpleName()
          + ": ConnectionReaper (interval="
          + conn_expiry_timeout
          + " ms)";
    }
  }

  /**
   * Retransmitter task which periodically (every xmit_interval ms) looks at all the retransmit
   * (send) tables and re-sends messages for which we haven't received an ack yet
   */
  protected class RetransmitTask implements Runnable {

    public void run() {
      for (SenderEntry val : send_table.values()) {
        Table<Message> buf = val != null ? val.sent_msgs : null;
        if (buf != null && !buf.isEmpty()) {
          long from = buf.getHighestDelivered() + 1, to = buf.getHighestReceived();
          List<Message> list = buf.get(from, to);
          if (list != null) {
            for (Message msg : list) retransmit(msg);
          }
        }
      }
    }

    public String toString() {
      return UNICAST.class.getSimpleName() + ": RetransmitTask (interval=" + xmit_interval + " ms)";
    }
  }
}
Exemplo n.º 4
0
  /**
   * Starts the merge protocol (only run by the merge leader). Essentially sends a MERGE_REQ to all
   * coordinators of all subgroups found. Each coord receives its digest and view and returns it.
   * The leader then computes the digest and view for the new group from the return values. Finally,
   * it sends this merged view/digest to all subgroup coordinators; each coordinator will install it
   * in their subgroup.
   */
  class MergeTask implements Runnable {
    private Thread thread = null;

    /** List of all subpartition coordinators and their members */
    private final ConcurrentMap<Address, Collection<Address>> coords =
        Util.createConcurrentMap(8, 0.75f, 8);

    /**
     * @param views Guaranteed to be non-null and to have >= 2 members, or else this thread would
     *     not be started
     */
    public synchronized void start(Map<Address, View> views) {
      if (thread != null && thread.isAlive()) // the merge thread is already running
      return;

      this.coords.clear();

      // now remove all members which don't have us in their view, so RPCs won't block (e.g. FLUSH)
      // https://jira.jboss.org/browse/JGRP-1061
      sanitizeViews(views);

      // Add all different coordinators of the views into the hashmap and sets their members:
      Collection<Address> coordinators = Util.determineMergeCoords(views);
      for (Address coord : coordinators) {
        View view = views.get(coord);
        if (view != null) this.coords.put(coord, new ArrayList<Address>(view.getMembers()));
      }

      // For the merge participants which are not coordinator, we simply add them, and the
      // associated
      // membership list consists only of themselves
      Collection<Address> merge_participants = Util.determineMergeParticipants(views);
      merge_participants.removeAll(coordinators);
      for (Address merge_participant : merge_participants) {
        Collection<Address> tmp = new ArrayList<Address>();
        tmp.add(merge_participant);
        coords.putIfAbsent(merge_participant, tmp);
      }

      thread = gms.getThreadFactory().newThread(this, "MergeTask");
      thread.setDaemon(true);
      thread.start();
    }

    public synchronized void stop() {
      Thread tmp = thread;
      if (thread != null && thread.isAlive()) tmp.interrupt();
      thread = null;
    }

    public synchronized boolean isRunning() {
      return thread != null && thread.isAlive();
    }

    public void run() {
      // 1. Generate merge_id
      final MergeId new_merge_id = MergeId.create(gms.local_addr);
      final Collection<Address> coordsCopy = new ArrayList<Address>(coords.keySet());

      long start = System.currentTimeMillis();

      try {
        _run(new_merge_id, coordsCopy); // might remove members from coordsCopy
      } catch (Throwable ex) {
        if (log.isWarnEnabled()) log.warn(gms.local_addr + ": " + ex + ", merge is cancelled");
        sendMergeCancelledMessage(coordsCopy, new_merge_id);
        cancelMerge(
            new_merge_id); // the message above cancels the merge, too, but this is a 2nd line of
        // defense
      } finally {
        /* 5. if flush is in stack stop the flush for entire cluster [JGRP-700] - FLUSH: flushing should span merge */
        if (gms.flushProtocolInStack) gms.stopFlush();
        thread = null;
      }
      long diff = System.currentTimeMillis() - start;
      if (log.isDebugEnabled())
        log.debug(gms.local_addr + ": merge " + new_merge_id + " took " + diff + " ms");
    }

    /** Runs the merge protocol as a leader */
    protected void _run(MergeId new_merge_id, final Collection<Address> coordsCopy)
        throws Exception {
      boolean success = setMergeId(null, new_merge_id);
      if (!success) {
        log.warn("failed to set my own merge_id (" + merge_id + ") to " + new_merge_id);
        return;
      }

      if (log.isDebugEnabled())
        log.debug(
            gms.local_addr
                + ": merge task "
                + merge_id
                + " started with "
                + coords.keySet().size()
                + " coords");

      /* 2. Fetch the current Views/Digests from all subgroup coordinators */
      success = getMergeDataFromSubgroupCoordinators(coords, new_merge_id, gms.merge_timeout);
      List<Address> missing = null;
      if (!success) {
        missing = merge_rsps.getMissing();
        if (log.isDebugEnabled())
          log.debug(
              "merge leader "
                  + gms.local_addr
                  + " did not get responses from all "
                  + coords.keySet().size()
                  + " partition coordinators; missing responses from "
                  + missing.size()
                  + " members, removing them from the merge");
        merge_rsps.remove(missing);
      }

      /* 3. Remove null or rejected merge responses from merge_rsp and coords (so we'll send the new view
       * only to members who accepted the merge request) */
      if (missing != null && !missing.isEmpty()) {
        coords.keySet().removeAll(missing);
        coordsCopy.removeAll(missing);
      }

      removeRejectedMergeRequests(coords.keySet());
      if (merge_rsps.size() == 0)
        throw new Exception("did not get any merge responses from partition coordinators");

      if (!coords
          .keySet()
          .contains(
              gms.local_addr)) // another member might have invoked a merge req on us before we got
        // there...
        throw new Exception("merge leader rejected merge request");

      /* 4. Combine all views and digests into 1 View/1 Digest */
      List<MergeData> merge_data = new ArrayList<MergeData>(merge_rsps.getResults().values());
      MergeData combined_merge_data = consolidateMergeData(merge_data);
      if (combined_merge_data == null) throw new Exception("could not consolidate merge");

      /* 4. Send the new View/Digest to all coordinators (including myself). On reception, they will
      install the digest and view in all of their subgroup members */
      if (log.isDebugEnabled())
        log.debug(
            gms.local_addr
                + ": installing merge view "
                + combined_merge_data.view.getViewId()
                + " ("
                + combined_merge_data.view.size()
                + " members) in "
                + coords.keySet().size()
                + " coords");
      sendMergeView(coords.keySet(), combined_merge_data, new_merge_id);
    }

    /**
     * Sends a MERGE_REQ to all coords and populates a list of MergeData (in merge_rsps). Returns
     * after coords.size() response have been received, or timeout msecs have elapsed (whichever is
     * first).
     *
     * <p>If a subgroup coordinator rejects the MERGE_REQ (e.g. because of participation in a
     * different merge), <em>that member will be removed from coords !</em>
     *
     * @param coords A map of coordinatgor addresses and associated membership lists
     * @param new_merge_id The new merge id
     * @param timeout Max number of msecs to wait for the merge responses from the subgroup coords
     */
    protected boolean getMergeDataFromSubgroupCoordinators(
        Map<Address, Collection<Address>> coords, MergeId new_merge_id, long timeout) {
      boolean gotAllResponses;
      long start = System.currentTimeMillis();
      merge_rsps.reset(coords.keySet());
      if (log.isTraceEnabled())
        log.trace(gms.local_addr + ": sending MERGE_REQ to " + coords.keySet());

      for (Map.Entry<Address, Collection<Address>> entry : coords.entrySet()) {
        Address coord = entry.getKey();
        Collection<Address> mbrs = entry.getValue();
        Message msg = new Message(coord).setFlag(Message.Flag.OOB, Message.Flag.INTERNAL);
        GMS.GmsHeader hdr = new GMS.GmsHeader(GMS.GmsHeader.MERGE_REQ, mbrs);
        hdr.mbr = gms.local_addr;
        hdr.merge_id = new_merge_id;
        msg.putHeader(gms.getId(), hdr);
        gms.getDownProtocol().down(new Event(Event.MSG, msg));
      }

      // wait until num_rsps_expected >= num_rsps or timeout elapsed
      merge_rsps.waitForAllResponses(timeout);
      gotAllResponses = merge_rsps.hasAllResponses();
      long stop = System.currentTimeMillis();
      if (log.isTraceEnabled())
        log.trace(
            gms.local_addr
                + ": collected "
                + merge_rsps.numberOfValidResponses()
                + " merge response(s) in "
                + (stop - start)
                + " ms");
      return gotAllResponses;
    }

    /**
     * Removed rejected merge requests from merge_rsps and coords. This method has a lock on
     * merge_rsps
     */
    private void removeRejectedMergeRequests(Collection<Address> coords) {
      int num_removed = 0;
      for (Iterator<Map.Entry<Address, MergeData>> it =
              merge_rsps.getResults().entrySet().iterator();
          it.hasNext(); ) {
        Map.Entry<Address, MergeData> entry = it.next();
        MergeData data = entry.getValue();
        if (data.merge_rejected) {
          if (data.getSender() != null) coords.remove(data.getSender());
          it.remove();
          num_removed++;
        }
      }

      if (num_removed > 0) {
        if (log.isTraceEnabled())
          log.trace(gms.local_addr + ": removed " + num_removed + " rejected merge responses");
      }
    }

    /**
     * Merge all MergeData. All MergeData elements should be disjunct (both views and digests).
     * However, this method is prepared to resolve duplicate entries (for the same member).
     * Resolution strategy for views is to merge only 1 of the duplicate members. Resolution
     * strategy for digests is to take the higher seqnos for duplicate digests.
     *
     * <p>After merging all members into a Membership and subsequent sorting, the first member of
     * the sorted membership will be the new coordinator. This method has a lock on merge_rsps.
     *
     * @param merge_rsps A list of MergeData items. Elements with merge_rejected=true were removed
     *     before. Is guaranteed not to be null and to contain at least 1 member.
     */
    private MergeData consolidateMergeData(List<MergeData> merge_rsps) {
      long logical_time = 0; // for new_vid
      List<View> subgroups =
          new ArrayList<View>(11); // contains a list of Views, each View is a subgroup
      Collection<Collection<Address>> sub_mbrships = new ArrayList<Collection<Address>>();

      for (MergeData tmp_data : merge_rsps) {
        View tmp_view = tmp_data.getView();
        if (tmp_view != null) {
          ViewId tmp_vid = tmp_view.getVid();
          if (tmp_vid != null) {
            // compute the new view id (max of all vids +1)
            logical_time = Math.max(logical_time, tmp_vid.getId());
          }
          // merge all membership lists into one (prevent duplicates)
          sub_mbrships.add(new ArrayList<Address>(tmp_view.getMembers()));
          subgroups.add(tmp_view.copy());
        }
      }

      // determine the new digest
      Digest new_digest = consolidateDigests(merge_rsps, merge_rsps.size());
      if (new_digest == null) return null;

      // remove all members from the new member list that are not in the digest
      Collection<Address> digest_mbrs = new_digest.getMembers();
      for (Collection<Address> coll : sub_mbrships) coll.retainAll(digest_mbrs);

      List<Address> merged_mbrs = gms.computeNewMembership(sub_mbrships);

      // the new coordinator is the first member of the consolidated & sorted membership list
      Address new_coord = merged_mbrs.isEmpty() ? null : merged_mbrs.get(0);
      if (new_coord == null) return null;

      // should be the highest view ID seen up to now plus 1
      ViewId new_vid = new ViewId(new_coord, logical_time + 1);

      // determine the new view
      MergeView new_view = new MergeView(new_vid, merged_mbrs, subgroups);

      if (log.isTraceEnabled())
        log.trace(
            gms.local_addr
                + ": consolidated view="
                + new_view
                + "\nconsolidated digest="
                + new_digest);
      return new MergeData(gms.local_addr, new_view, new_digest);
    }

    /**
     * Merge all digests into one. For each sender, the new value is max(highest_delivered),
     * max(highest_received). This method has a lock on merge_rsps
     */
    private Digest consolidateDigests(List<MergeData> merge_rsps, int num_mbrs) {
      MutableDigest retval = new MutableDigest(num_mbrs);

      for (MergeData data : merge_rsps) {
        Digest tmp_digest = data.getDigest();
        if (tmp_digest == null) continue;

        retval.merge(tmp_digest);
      }
      return retval.copy();
    }
  }