Ejemplo n.º 1
0
 /**
  * 开始ping
  *
  * @param qp
  * @throws IOException
  */
 protected void ping(QuorumPacket qp) throws IOException {
   // Send back the ping with our session data
   ByteArrayOutputStream bos = new ByteArrayOutputStream();
   DataOutputStream dos = new DataOutputStream(bos);
   HashMap<Long, Integer> touchTable = zk.getTouchSnapshot();
   for (Entry<Long, Integer> entry : touchTable.entrySet()) {
     dos.writeLong(entry.getKey());
     dos.writeInt(entry.getValue());
   }
   qp.setData(bos.toByteArray());
   writePacket(qp, true);
 }
Ejemplo n.º 2
0
  /**
   * Once connected to the leader, perform the handshake protocol to establish a following /
   * observing connection.
   *
   * @param pktType
   * @return the zxid the Leader sends for synchronization purposes.
   * @throws IOException
   */
  protected long registerWithLeader(int pktType) throws IOException {
    /*
     * Send follower info, including last zxid and sid
     */
    long lastLoggedZxid = self.getLastLoggedZxid();
    QuorumPacket qp = new QuorumPacket();
    qp.setType(pktType);
    qp.setZxid(ZxidUtils.makeZxid(self.getAcceptedEpoch(), 0));

    /*
     * Add sid to payload
     */
    LearnerInfo li = new LearnerInfo(self.getId(), 0x10000);
    ByteArrayOutputStream bsid = new ByteArrayOutputStream();
    BinaryOutputArchive boa = BinaryOutputArchive.getArchive(bsid);
    boa.writeRecord(li, "LearnerInfo");
    qp.setData(bsid.toByteArray());

    writePacket(qp, true);
    readPacket(qp);
    final long newEpoch = ZxidUtils.getEpochFromZxid(qp.getZxid());
    if (qp.getType() == Leader.LEADERINFO) {
      // we are connected to a 1.0 server so accept the new epoch and read
      // the next packet
      leaderProtocolVersion = ByteBuffer.wrap(qp.getData()).getInt();
      byte epochBytes[] = new byte[4];
      final ByteBuffer wrappedEpochBytes = ByteBuffer.wrap(epochBytes);
      if (newEpoch > self.getAcceptedEpoch()) {
        wrappedEpochBytes.putInt((int) self.getCurrentEpoch());
        self.setAcceptedEpoch(newEpoch);
      } else if (newEpoch == self.getAcceptedEpoch()) {
        // since we have already acked an epoch equal to the leaders, we
        // cannot ack
        // again, but we still need to send our lastZxid to the leader
        // so that we can
        // sync with it if it does assume leadership of the epoch.
        // the -1 indicates that this reply should not count as an ack
        // for the new epoch
        wrappedEpochBytes.putInt(-1);
      } else {
        throw new IOException(
            "Leaders epoch, "
                + newEpoch
                + " is less than accepted epoch, "
                + self.getAcceptedEpoch());
      }
      QuorumPacket ackNewEpoch =
          new QuorumPacket(Leader.ACKEPOCH, lastLoggedZxid, epochBytes); // 发送ack
      writePacket(ackNewEpoch, true);
      return ZxidUtils.makeZxid(newEpoch, 0);
    } else {
      if (newEpoch > self.getAcceptedEpoch()) {
        self.setAcceptedEpoch(newEpoch);
      }
      if (qp.getType() != Leader.NEWLEADER) {
        LOG.error("First packet should have been NEWLEADER");
        throw new IOException("First packet should have been NEWLEADER");
      }
      return qp.getZxid();
    }
  }
  /**
   * This thread will receive packets from the peer and process them and also listen to new
   * connections from new peers.
   */
  @Override
  public void run() {
    try {

      ia = BinaryInputArchive.getArchive(new BufferedInputStream(sock.getInputStream()));
      bufferedOutput = new BufferedOutputStream(sock.getOutputStream());
      oa = BinaryOutputArchive.getArchive(bufferedOutput);

      QuorumPacket qp = new QuorumPacket();
      ia.readRecord(qp, "packet");
      if (qp.getType() != Leader.FOLLOWERINFO && qp.getType() != Leader.OBSERVERINFO) {
        LOG.error("First packet " + qp.toString() + " is not FOLLOWERINFO or OBSERVERINFO!");
        return;
      }
      if (qp.getData() != null) {
        ByteBuffer bbsid = ByteBuffer.wrap(qp.getData());
        this.sid = bbsid.getLong();
      } else {
        this.sid = leader.followerCounter.getAndDecrement();
      }

      LOG.info("Follower sid: " + this.sid + " : info : " + leader.self.quorumPeers.get(this.sid));

      if (qp.getType() == Leader.OBSERVERINFO) {
        learnerType = LearnerType.OBSERVER;
      }

      long peerLastZxid = qp.getZxid();
      /* the default to send to the follower */
      int packetToSend = Leader.SNAP;
      long zxidToSend = 0;
      long leaderLastZxid = 0;
      /** the packets that the follower needs to get updates from * */
      long updates = peerLastZxid;

      /* we are sending the diff check if we have proposals in memory to be able to
       * send a diff to the
       */
      ReentrantReadWriteLock lock = leader.zk.getZKDatabase().getLogLock();
      ReadLock rl = lock.readLock();
      try {
        rl.lock();
        final long maxCommittedLog = leader.zk.getZKDatabase().getmaxCommittedLog();
        final long minCommittedLog = leader.zk.getZKDatabase().getminCommittedLog();
        LOG.info(
            "Synchronizing with Follower sid: "
                + this.sid
                + " maxCommittedLog ="
                + Long.toHexString(maxCommittedLog)
                + " minCommittedLog = "
                + Long.toHexString(minCommittedLog)
                + " peerLastZxid = "
                + Long.toHexString(peerLastZxid));

        LinkedList<Proposal> proposals = leader.zk.getZKDatabase().getCommittedLog();

        if (proposals.size() != 0) {
          if ((maxCommittedLog >= peerLastZxid) && (minCommittedLog <= peerLastZxid)) {

            // as we look through proposals, this variable keeps track of previous
            // proposal Id.
            long prevProposalZxid = minCommittedLog;

            // Keep track of whether we are about to send the first packet.
            // Before sending the first packet, we have to tell the learner
            // whether to expect a trunc or a diff
            boolean firstPacket = true;

            for (Proposal propose : proposals) {
              // skip the proposals the peer already has
              if (propose.packet.getZxid() <= peerLastZxid) {
                prevProposalZxid = propose.packet.getZxid();
                continue;
              } else {
                // If we are sending the first packet, figure out whether to trunc
                // in case the follower has some proposals that the leader doesn't
                if (firstPacket) {
                  firstPacket = false;
                  // Does the peer have some proposals that the leader hasn't seen yet
                  if (prevProposalZxid < peerLastZxid) {
                    // send a trunc message before sending the diff
                    packetToSend = Leader.TRUNC;
                    LOG.info("Sending TRUNC");
                    zxidToSend = prevProposalZxid;
                    updates = zxidToSend;
                  } else {
                    // Just send the diff
                    packetToSend = Leader.DIFF;
                    LOG.info("Sending diff");
                    zxidToSend = maxCommittedLog;
                  }
                }
                queuePacket(propose.packet);
                QuorumPacket qcommit =
                    new QuorumPacket(Leader.COMMIT, propose.packet.getZxid(), null, null);
                queuePacket(qcommit);
              }
            }
          } else if (peerLastZxid > maxCommittedLog) {
            packetToSend = Leader.TRUNC;
            zxidToSend = maxCommittedLog;
            updates = zxidToSend;
          }
        } else {
          // just let the state transfer happen
        }

        leaderLastZxid = leader.startForwarding(this, updates);
        if (peerLastZxid == leaderLastZxid) {
          // We are in sync so we'll do an empty diff
          packetToSend = Leader.DIFF;
          zxidToSend = leaderLastZxid;
        }
      } finally {
        rl.unlock();
      }

      QuorumPacket newLeaderQP = new QuorumPacket(Leader.NEWLEADER, leaderLastZxid, null, null);
      oa.writeRecord(newLeaderQP, "packet");
      bufferedOutput.flush();
      // Need to set the zxidToSend to the latest zxid
      if (packetToSend == Leader.SNAP) {
        zxidToSend = leader.zk.getZKDatabase().getDataTreeLastProcessedZxid();
      }
      oa.writeRecord(new QuorumPacket(packetToSend, zxidToSend, null, null), "packet");
      bufferedOutput.flush();

      /* if we are not truncating or sending a diff just send a snapshot */
      if (packetToSend == Leader.SNAP) {
        LOG.info(
            "Sending snapshot last zxid of peer is 0x"
                + Long.toHexString(peerLastZxid)
                + " "
                + " zxid of leader is 0x"
                + Long.toHexString(leaderLastZxid)
                + "sent zxid of db as 0x"
                + Long.toHexString(zxidToSend));
        // Dump data to peer
        leader.zk.getZKDatabase().serializeSnapshot(oa);
        oa.writeString("BenWasHere", "signature");
      }
      bufferedOutput.flush();

      // Mutation packets will be queued during the serialize,
      // so we need to mark when the peer can actually start
      // using the data
      //
      queuedPackets.add(new QuorumPacket(Leader.UPTODATE, -1, null, null));

      // Start sending packets
      new Thread() {
        public void run() {
          Thread.currentThread().setName("Sender-" + sock.getRemoteSocketAddress());
          try {
            sendPackets();
          } catch (InterruptedException e) {
            LOG.warn("Unexpected interruption", e);
          }
        }
      }.start();

      /*
       * Have to wait for the first ACK, wait until
       * the leader is ready, and only then we can
       * start processing messages.
       */
      qp = new QuorumPacket();
      ia.readRecord(qp, "packet");
      if (qp.getType() != Leader.ACK) {
        LOG.error("Next packet was supposed to be an ACK");
        return;
      }
      leader.processAck(this.sid, qp.getZxid(), sock.getLocalSocketAddress());

      /*
       * Wait until leader starts up
       */
      synchronized (leader.zk) {
        while (!leader.zk.isRunning()) {
          leader.zk.wait(500);
        }
      }

      while (true) {
        qp = new QuorumPacket();
        ia.readRecord(qp, "packet");

        long traceMask = ZooTrace.SERVER_PACKET_TRACE_MASK;
        if (qp.getType() == Leader.PING) {
          traceMask = ZooTrace.SERVER_PING_TRACE_MASK;
        }
        if (LOG.isTraceEnabled()) {
          ZooTrace.logQuorumPacket(LOG, traceMask, 'i', qp);
        }
        tickOfLastAck = leader.self.tick;

        ByteBuffer bb;
        long sessionId;
        int cxid;
        int type;

        switch (qp.getType()) {
          case Leader.ACK:
            if (this.learnerType == LearnerType.OBSERVER) {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Received ACK from Observer  " + this.sid);
              }
            }
            leader.processAck(this.sid, qp.getZxid(), sock.getLocalSocketAddress());
            break;
          case Leader.PING:
            // Process the touches
            ByteArrayInputStream bis = new ByteArrayInputStream(qp.getData());
            DataInputStream dis = new DataInputStream(bis);
            while (dis.available() > 0) {
              long sess = dis.readLong();
              int to = dis.readInt();
              leader.zk.touch(sess, to);
            }
            break;
          case Leader.REVALIDATE:
            bis = new ByteArrayInputStream(qp.getData());
            dis = new DataInputStream(bis);
            long id = dis.readLong();
            int to = dis.readInt();
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            dos.writeLong(id);
            boolean valid = leader.zk.touch(id, to);
            if (valid) {
              try {
                // set the session owner
                // as the follower that
                // owns the session
                leader.zk.setOwner(id, this);
              } catch (SessionExpiredException e) {
                LOG.error(
                    "Somehow session "
                        + Long.toHexString(id)
                        + " expired right after being renewed! (impossible)",
                    e);
              }
            }
            if (LOG.isTraceEnabled()) {
              ZooTrace.logTraceMessage(
                  LOG,
                  ZooTrace.SESSION_TRACE_MASK,
                  "Session 0x" + Long.toHexString(id) + " is valid: " + valid);
            }
            dos.writeBoolean(valid);
            qp.setData(bos.toByteArray());
            queuedPackets.add(qp);
            break;
          case Leader.REQUEST:
            bb = ByteBuffer.wrap(qp.getData());
            sessionId = bb.getLong();
            cxid = bb.getInt();
            type = bb.getInt();
            bb = bb.slice();
            Request si;
            if (type == OpCode.sync) {
              si = new LearnerSyncRequest(this, sessionId, cxid, type, bb, qp.getAuthinfo());
            } else {
              si = new Request(null, sessionId, cxid, type, bb, qp.getAuthinfo());
            }
            si.setOwner(this);
            leader.zk.submitRequest(si);
            break;
          default:
        }
      }
    } catch (IOException e) {
      if (sock != null && !sock.isClosed()) {
        LOG.error("Unexpected exception causing shutdown while sock " + "still open", e);
        // close the socket to make sure the
        // other side can see it being close
        try {
          sock.close();
        } catch (IOException ie) {
          // do nothing
        }
      }
    } catch (InterruptedException e) {
      LOG.error("Unexpected exception causing shutdown", e);
    } finally {
      LOG.warn(
          "******* GOODBYE "
              + (sock != null ? sock.getRemoteSocketAddress() : "<null>")
              + " ********");
      // Send the packet of death
      try {
        queuedPackets.put(proposalOfDeath);
      } catch (InterruptedException e) {
        LOG.warn("Ignoring unexpected exception", e);
      }
      shutdown();
    }
  }