예제 #1
0
  public Vote lookForLeader() throws InterruptedException {
    try {
      self.jmxLeaderElectionBean = new LeaderElectionBean();
      MBeanRegistry.getInstance().register(self.jmxLeaderElectionBean, self.jmxLocalPeerBean);
    } catch (Exception e) {
      LOG.warn("Failed to register with JMX", e);
      self.jmxLeaderElectionBean = null;
    }

    try {
      self.setCurrentVote(new Vote(self.getId(), self.getLastLoggedZxid()));
      // We are going to look for a leader by casting a vote for ourself
      byte requestBytes[] = new byte[4];
      ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
      byte responseBytes[] = new byte[28];
      ByteBuffer responseBuffer = ByteBuffer.wrap(responseBytes);
      /* The current vote for the leader. Initially me! */
      DatagramSocket s = null;
      try {
        s = new DatagramSocket();
        s.setSoTimeout(200);
      } catch (SocketException e1) {
        LOG.error("Socket exception when creating socket for leader election", e1);
        System.exit(4);
      }
      DatagramPacket requestPacket = new DatagramPacket(requestBytes, requestBytes.length);
      DatagramPacket responsePacket = new DatagramPacket(responseBytes, responseBytes.length);
      HashMap<InetSocketAddress, Vote> votes =
          new HashMap<InetSocketAddress, Vote>(self.quorumPeers.size());
      int xid = epochGen.nextInt();
      while (self.running) {
        votes.clear();
        requestBuffer.clear();
        requestBuffer.putInt(xid);
        requestPacket.setLength(4);
        HashSet<Long> heardFrom = new HashSet<Long>();
        for (QuorumServer server : self.quorumPeers.values()) {
          LOG.info("Server address: " + server.addr);
          try {
            requestPacket.setSocketAddress(server.addr);
          } catch (IllegalArgumentException e) {
            // Sun doesn't include the address that causes this
            // exception to be thrown, so we wrap the exception
            // in order to capture this critical detail.
            throw new IllegalArgumentException(
                "Unable to set socket address on packet, msg:"
                    + e.getMessage()
                    + " with addr:"
                    + server.addr,
                e);
          }

          try {
            s.send(requestPacket);
            responsePacket.setLength(responseBytes.length);
            s.receive(responsePacket);
            if (responsePacket.getLength() != responseBytes.length) {
              LOG.error("Got a short response: " + responsePacket.getLength());
              continue;
            }
            responseBuffer.clear();
            int recvedXid = responseBuffer.getInt();
            if (recvedXid != xid) {
              LOG.error("Got bad xid: expected " + xid + " got " + recvedXid);
              continue;
            }
            long peerId = responseBuffer.getLong();
            heardFrom.add(peerId);
            // if(server.id != peerId){
            Vote vote = new Vote(responseBuffer.getLong(), responseBuffer.getLong());
            InetSocketAddress addr = (InetSocketAddress) responsePacket.getSocketAddress();
            votes.put(addr, vote);
            // }
          } catch (IOException e) {
            LOG.warn("Ignoring exception while looking for leader", e);
            // Errors are okay, since hosts may be
            // down
          }
        }
        ElectionResult result = countVotes(votes, heardFrom);
        if (result.winner.id >= 0) {
          self.setCurrentVote(result.vote);
          if (result.winningCount > (self.quorumPeers.size() / 2)) {
            self.setCurrentVote(result.winner);
            s.close();
            Vote current = self.getCurrentVote();
            self.setPeerState(
                (current.id == self.getId()) ? ServerState.LEADING : ServerState.FOLLOWING);
            if (self.getPeerState() == ServerState.FOLLOWING) {
              Thread.sleep(100);
            }
            return current;
          }
        }
        Thread.sleep(1000);
      }
      return null;
    } finally {
      try {
        if (self.jmxLeaderElectionBean != null) {
          MBeanRegistry.getInstance().unregister(self.jmxLeaderElectionBean);
        }
      } catch (Exception e) {
        LOG.warn("Failed to unregister with JMX", e);
      }
      self.jmxLeaderElectionBean = null;
    }
  }
  /** 寻找对应的leader */
  public Vote lookForLeader() throws InterruptedException {
    try {
      self.jmxLeaderElectionBean = new LeaderElectionBean();
      MBeanRegistry.getInstance().register(self.jmxLeaderElectionBean, self.jmxLocalPeerBean);
    } catch (Exception e) {
      LOG.warn("Failed to register with JMX", e);
      self.jmxLeaderElectionBean = null;
    }
    if (self.start_fle == 0) {
      self.start_fle = System.currentTimeMillis();
    }
    try {
      HashMap<Long, Vote> recvset = new HashMap<Long, Vote>();

      HashMap<Long, Vote> outofelection = new HashMap<Long, Vote>();

      int notTimeout = finalizeWait;

      synchronized (this) {
        logicalclock++;
        updateProposal(getInitId(), getInitLastLoggedZxid(), getPeerEpoch());
      }

      LOG.info(
          "New election. My id =  "
              + self.getId()
              + ", proposed zxid=0x"
              + Long.toHexString(proposedZxid));
      sendNotifications();

      /*
       * Loop in which we exchange notifications until we find a leader
       */

      while ((self.getPeerState() == ServerState.LOOKING) && (!stop)) {
        /*
         * Remove next notification from queue, times out after 2 times
         * the termination time
         */
        Notification n = recvqueue.poll(notTimeout, TimeUnit.MILLISECONDS);

        /*
         * Sends more notifications if haven't received enough.
         * Otherwise processes new notification.
         */
        if (n == null) {
          if (manager.haveDelivered()) {
            sendNotifications();
          } else {
            manager.connectAll();
          }

          /*
           * Exponential backoff
           */
          int tmpTimeOut = notTimeout * 2;
          notTimeout =
              (tmpTimeOut < maxNotificationInterval ? tmpTimeOut : maxNotificationInterval);
          LOG.info("Notification time out: " + notTimeout);
        } else if (self.getVotingView().containsKey(n.sid)) {
          /*
           * Only proceed if the vote comes from a replica in the
           * voting view.
           */
          switch (n.state) {
            case LOOKING:
              // If notification > current, replace and send messages
              // out
              if (n.electionEpoch > logicalclock) {
                logicalclock = n.electionEpoch;
                recvset.clear();
                if (totalOrderPredicate(
                    n.leader,
                    n.zxid,
                    n.peerEpoch,
                    getInitId(),
                    getInitLastLoggedZxid(),
                    getPeerEpoch())) {
                  updateProposal(n.leader, n.zxid, n.peerEpoch);
                } else {
                  updateProposal(getInitId(), getInitLastLoggedZxid(), getPeerEpoch());
                }
                sendNotifications();
              } else if (n.electionEpoch < logicalclock) {
                if (LOG.isDebugEnabled()) {
                  LOG.debug(
                      "Notification election epoch is smaller than logicalclock. n.electionEpoch = 0x"
                          + Long.toHexString(n.electionEpoch)
                          + ", logicalclock=0x"
                          + Long.toHexString(logicalclock));
                }
                break;
              } else if (totalOrderPredicate(
                  n.leader, n.zxid, n.peerEpoch, proposedLeader, proposedZxid, proposedEpoch)) {
                updateProposal(n.leader, n.zxid, n.peerEpoch);
                sendNotifications();
              }

              if (LOG.isDebugEnabled()) {
                LOG.debug(
                    "Adding vote: from="
                        + n.sid
                        + ", proposed leader="
                        + n.leader
                        + ", proposed zxid=0x"
                        + Long.toHexString(n.zxid)
                        + ", proposed election epoch=0x"
                        + Long.toHexString(n.electionEpoch));
              }

              recvset.put(n.sid, new Vote(n.leader, n.zxid, n.electionEpoch, n.peerEpoch));

              if (termPredicate(
                  recvset, new Vote(proposedLeader, proposedZxid, logicalclock, proposedEpoch))) {

                // Verify if there is any change in the proposed
                // leader
                while ((n = recvqueue.poll(finalizeWait, TimeUnit.MILLISECONDS)) != null) {
                  if (totalOrderPredicate(
                      n.leader, n.zxid, n.peerEpoch, proposedLeader, proposedZxid, proposedEpoch)) {
                    recvqueue.put(n);
                    break;
                  }
                }

                /*
                 * This predicate is true once we don't read any new
                 * relevant message from the reception queue
                 */
                if (n == null) {
                  self.setPeerState(
                      (proposedLeader == self.getId()) ? ServerState.LEADING : learningState());

                  Vote endVote =
                      new Vote(proposedLeader, proposedZxid, logicalclock, proposedEpoch);
                  leaveInstance(endVote);
                  return endVote;
                }
              }
              break;
            case OBSERVING:
              LOG.debug("Notification from observer: " + n.sid);
              break;
            case FOLLOWING:
            case LEADING:
              /*
               * Consider all notifications from the same epoch
               * together.
               */
              if (n.electionEpoch == logicalclock) {
                recvset.put(n.sid, new Vote(n.leader, n.zxid, n.electionEpoch, n.peerEpoch));

                if (ooePredicate(recvset, outofelection, n)) {
                  self.setPeerState(
                      (n.leader == self.getId()) ? ServerState.LEADING : learningState());

                  Vote endVote = new Vote(n.leader, n.zxid, n.electionEpoch, n.peerEpoch);
                  leaveInstance(endVote);
                  return endVote;
                }
              }

              /*
               * Before joining an established ensemble, verify a
               * majority is following the same leader.
               */
              outofelection.put(
                  n.sid,
                  new Vote(n.version, n.leader, n.zxid, n.electionEpoch, n.peerEpoch, n.state));

              if (ooePredicate(outofelection, outofelection, n)) {
                synchronized (this) {
                  logicalclock = n.electionEpoch;
                  self.setPeerState(
                      (n.leader == self.getId()) ? ServerState.LEADING : learningState());
                }
                Vote endVote = new Vote(n.leader, n.zxid, n.electionEpoch, n.peerEpoch);
                leaveInstance(endVote);
                return endVote;
              }
              break;
            default:
              LOG.warn("Notification state unrecognized: {} (n.state), {} (n.sid)", n.state, n.sid);
              break;
          }
        } else {
          LOG.warn("Ignoring notification from non-cluster member " + n.sid);
        }
      }
      return null;
    } finally {
      try {
        if (self.jmxLeaderElectionBean != null) {
          MBeanRegistry.getInstance().unregister(self.jmxLeaderElectionBean);
        }
      } catch (Exception e) {
        LOG.warn("Failed to unregister with JMX", e);
      }
      self.jmxLeaderElectionBean = null;
    }
  }