Example #1
0
 /**
  * 开始ping
  *
  * @param qp
  * @throws IOException
  */
 protected void ping(QuorumPacket qp) throws IOException {
   // Send back the ping with our session data
   ByteArrayOutputStream bos = new ByteArrayOutputStream();
   DataOutputStream dos = new DataOutputStream(bos);
   HashMap<Long, Integer> touchTable = zk.getTouchSnapshot();
   for (Entry<Long, Integer> entry : touchTable.entrySet()) {
     dos.writeLong(entry.getKey());
     dos.writeInt(entry.getValue());
   }
   qp.setData(bos.toByteArray());
   writePacket(qp, true);
 }
Example #2
0
 /**
  * read a packet from the leader
  *
  * @param pp the packet to be instantiated
  * @throws IOException
  */
 void readPacket(QuorumPacket pp) throws IOException {
   synchronized (leaderIs) {
     leaderIs.readRecord(pp, "packet");
   }
   long traceMask = ZooTrace.SERVER_PACKET_TRACE_MASK;
   if (pp.getType() == Leader.PING) {
     traceMask = ZooTrace.SERVER_PING_TRACE_MASK;
   }
   if (LOG.isTraceEnabled()) {
     ZooTrace.logQuorumPacket(LOG, traceMask, 'i', pp);
   }
 }
  /**
   * This method will use the thread to send packets added to the queuedPackets list
   *
   * @throws InterruptedException
   */
  private void sendPackets() throws InterruptedException {
    long traceMask = ZooTrace.SERVER_PACKET_TRACE_MASK;
    while (true) {
      try {
        QuorumPacket p;
        p = queuedPackets.poll();
        if (p == null) {
          bufferedOutput.flush();
          p = queuedPackets.take();
        }

        if (p == proposalOfDeath) {
          // Packet of death!
          break;
        }
        if (p.getType() == Leader.PING) {
          traceMask = ZooTrace.SERVER_PING_TRACE_MASK;
        }
        if (LOG.isTraceEnabled()) {
          ZooTrace.logQuorumPacket(LOG, traceMask, 'o', p);
        }
        oa.writeRecord(p, "packet");
      } catch (IOException e) {
        if (!sock.isClosed()) {
          LOG.warn("Unexpected exception at " + this, e);
          try {
            // this will cause everything to shutdown on
            // this learner handler and will help notify
            // the learner/observer instantaneously
            sock.close();
          } catch (IOException ie) {
            LOG.warn("Error closing socket for handler " + this, ie);
          }
        }
        break;
      }
    }
  }
Example #4
0
 protected void revalidate(QuorumPacket qp) throws IOException {
   ByteArrayInputStream bis = new ByteArrayInputStream(qp.getData());
   DataInputStream dis = new DataInputStream(bis);
   long sessionId = dis.readLong();
   boolean valid = dis.readBoolean();
   ServerCnxn cnxn = pendingRevalidations.remove(sessionId);
   if (cnxn == null) {
     LOG.warn("Missing session 0x" + Long.toHexString(sessionId) + " for validation");
   } else {
     zk.finishSessionInit(cnxn, valid);
   }
   if (LOG.isTraceEnabled()) {
     ZooTrace.logTraceMessage(
         LOG,
         ZooTrace.SESSION_TRACE_MASK,
         "Session 0x" + Long.toHexString(sessionId) + " is valid: " + valid);
   }
 }
Example #5
0
  /**
   * Finally, synchronize our history with the Leader.
   *
   * @param newLeaderZxid
   * @throws IOException
   * @throws InterruptedException
   */
  protected void syncWithLeader(long newLeaderZxid) throws IOException, InterruptedException {
    QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null);
    QuorumPacket qp = new QuorumPacket();
    long newEpoch = ZxidUtils.getEpochFromZxid(newLeaderZxid);

    readPacket(qp);
    LinkedList<Long> packetsCommitted = new LinkedList<Long>();
    LinkedList<PacketInFlight> packetsNotCommitted = new LinkedList<PacketInFlight>();
    synchronized (zk) {
      if (qp.getType() == Leader.DIFF) {
        LOG.info("Getting a diff from the leader 0x" + Long.toHexString(qp.getZxid()));
      } else if (qp.getType() == Leader.SNAP) {
        LOG.info("Getting a snapshot from leader");
        // The leader is going to dump the database
        // clear our own database and read
        zk.getZKDatabase().clear();
        zk.getZKDatabase().deserializeSnapshot(leaderIs);
        String signature = leaderIs.readString("signature");
        if (!signature.equals("BenWasHere")) {
          LOG.error("Missing signature. Got " + signature);
          throw new IOException("Missing signature");
        }
        zk.getZKDatabase().commit();
        zk.getZKDatabase().getM2mData().clear();
      } else if (qp.getType() == Leader.TRUNC) {
        // we need to truncate the log to the lastzxid of the leader
        LOG.warn(
            "Truncating log to get in sync with the leader 0x" + Long.toHexString(qp.getZxid()));
        boolean truncated = zk.getZKDatabase().truncateLog(qp.getZxid());
        if (!truncated) {
          // not able to truncate the log
          LOG.error("Not able to truncate the log " + Long.toHexString(qp.getZxid()));
          System.exit(13);
        }

      } else {
        LOG.error("Got unexpected packet from leader " + qp.getType() + " exiting ... ");
        System.exit(13);
      }
      zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
      zk.createSessionTracker();

      long lastQueued = 0;

      // in V1.0 we take a snapshot when we get the NEWLEADER message, but
      // in pre V1.0
      // we take the snapshot at the UPDATE, since V1.0 also gets the
      // UPDATE (after the NEWLEADER)
      // we need to make sure that we don't take the snapshot twice.
      boolean snapshotTaken = false;
      // we are now going to start getting transactions to apply followed
      // by an UPTODATE
      outerLoop:
      while (self.isRunning()) {
        readPacket(qp);
        switch (qp.getType()) {
          case Leader.PROPOSAL:
            PacketInFlight pif = new PacketInFlight();
            pif.hdr = new M2mTxnHeader();
            pif.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), pif.hdr);
            if (pif.hdr.getZxid() != lastQueued + 1) {
              LOG.warn(
                  "Got zxid 0x"
                      + Long.toHexString(pif.hdr.getZxid())
                      + " expected 0x"
                      + Long.toHexString(lastQueued + 1));
            }
            lastQueued = pif.hdr.getZxid();
            packetsNotCommitted.add(pif);
            break;
          case Leader.COMMIT:
            if (!snapshotTaken) {
              pif = packetsNotCommitted.peekFirst();
              if (pif.hdr.getZxid() != qp.getZxid()) {
                LOG.warn(
                    "Committing " + qp.getZxid() + ", but next proposal is " + pif.hdr.getZxid());
              } else {
                zk.processTxn(pif.hdr, pif.rec);
                packetsNotCommitted.remove();
              }
            } else {
              packetsCommitted.add(qp.getZxid());
            }
            break;
          case Leader.INFORM:
            /*
             * Only observer get this type of packet. We treat this as
             * receiving PROPOSAL and COMMMIT.
             */
            PacketInFlight packet = new PacketInFlight();
            packet.hdr = new M2mTxnHeader();
            packet.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), packet.hdr);
            // Log warning message if txn comes out-of-order
            if (packet.hdr.getZxid() != lastQueued + 1) {
              LOG.warn(
                  "Got zxid 0x"
                      + Long.toHexString(packet.hdr.getZxid())
                      + " expected 0x"
                      + Long.toHexString(lastQueued + 1));
            }
            lastQueued = packet.hdr.getZxid();
            if (!snapshotTaken) {
              // Apply to db directly if we haven't taken the snapshot
              zk.processTxn(packet.hdr, packet.rec);
            } else {
              packetsNotCommitted.add(packet);
              packetsCommitted.add(qp.getZxid());
            }
            break;
          case Leader.UPTODATE:
            if (!snapshotTaken) { // true for the pre v1.0 case
              zk.takeSnapshot();
              self.setCurrentEpoch(newEpoch);
            }
            self.cnxnFactory.addZooKeeperServer(self.getHandleIp(), zk);
            break outerLoop;
          case Leader.NEWLEADER: // it will be NEWLEADER in v1.0
            // Create updatingEpoch file and remove it after current
            // epoch is set. QuorumPeer.loadDataBase() uses this file to
            // detect the case where the server was terminated after
            // taking a snapshot but before setting the current epoch.
            // File updating = new
            // File(self.getTxnFactory().getSnapDir(),
            // QuorumPeer.UPDATING_EPOCH_FILENAME);
            // if (!updating.exists() && !updating.createNewFile()) {
            // throw new IOException("Failed to create "
            // + updating.toString());
            // }
            // zk.takeSnapshot();
            // self.setCurrentEpoch(newEpoch);
            // if (!updating.delete()) {
            // throw new IOException("Failed to delete "
            // + updating.toString());
            // }
            snapshotTaken = true;
            writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null), true);
            break;
        }
      }
    }
    ack.setZxid(ZxidUtils.makeZxid(newEpoch, 0));
    writePacket(ack, true);
    sock.setSoTimeout(self.tickTime * self.syncLimit);
    zk.startup();
    /*
     * Update the election vote here to ensure that all members of the
     * ensemble report the same vote to new servers that start up and send
     * leader election notifications to the ensemble.
     *
     * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
     */
    self.updateElectionVote(newEpoch);

    // We need to log the stuff that came in between the snapshot and the
    // uptodate
    if (zk instanceof FollowerZooKeeperServer) {
      FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
      for (PacketInFlight p : packetsNotCommitted) {
        fzk.logRequest(p.hdr, p.rec);
      }
      for (Long zxid : packetsCommitted) {
        fzk.commit(zxid);
      }
    } else {
      // New server type need to handle in-flight packets
      throw new UnsupportedOperationException("Unknown server type");
    }
  }
Example #6
0
  /**
   * Once connected to the leader, perform the handshake protocol to establish a following /
   * observing connection.
   *
   * @param pktType
   * @return the zxid the Leader sends for synchronization purposes.
   * @throws IOException
   */
  protected long registerWithLeader(int pktType) throws IOException {
    /*
     * Send follower info, including last zxid and sid
     */
    long lastLoggedZxid = self.getLastLoggedZxid();
    QuorumPacket qp = new QuorumPacket();
    qp.setType(pktType);
    qp.setZxid(ZxidUtils.makeZxid(self.getAcceptedEpoch(), 0));

    /*
     * Add sid to payload
     */
    LearnerInfo li = new LearnerInfo(self.getId(), 0x10000);
    ByteArrayOutputStream bsid = new ByteArrayOutputStream();
    BinaryOutputArchive boa = BinaryOutputArchive.getArchive(bsid);
    boa.writeRecord(li, "LearnerInfo");
    qp.setData(bsid.toByteArray());

    writePacket(qp, true);
    readPacket(qp);
    final long newEpoch = ZxidUtils.getEpochFromZxid(qp.getZxid());
    if (qp.getType() == Leader.LEADERINFO) {
      // we are connected to a 1.0 server so accept the new epoch and read
      // the next packet
      leaderProtocolVersion = ByteBuffer.wrap(qp.getData()).getInt();
      byte epochBytes[] = new byte[4];
      final ByteBuffer wrappedEpochBytes = ByteBuffer.wrap(epochBytes);
      if (newEpoch > self.getAcceptedEpoch()) {
        wrappedEpochBytes.putInt((int) self.getCurrentEpoch());
        self.setAcceptedEpoch(newEpoch);
      } else if (newEpoch == self.getAcceptedEpoch()) {
        // since we have already acked an epoch equal to the leaders, we
        // cannot ack
        // again, but we still need to send our lastZxid to the leader
        // so that we can
        // sync with it if it does assume leadership of the epoch.
        // the -1 indicates that this reply should not count as an ack
        // for the new epoch
        wrappedEpochBytes.putInt(-1);
      } else {
        throw new IOException(
            "Leaders epoch, "
                + newEpoch
                + " is less than accepted epoch, "
                + self.getAcceptedEpoch());
      }
      QuorumPacket ackNewEpoch =
          new QuorumPacket(Leader.ACKEPOCH, lastLoggedZxid, epochBytes); // 发送ack
      writePacket(ackNewEpoch, true);
      return ZxidUtils.makeZxid(newEpoch, 0);
    } else {
      if (newEpoch > self.getAcceptedEpoch()) {
        self.setAcceptedEpoch(newEpoch);
      }
      if (qp.getType() != Leader.NEWLEADER) {
        LOG.error("First packet should have been NEWLEADER");
        throw new IOException("First packet should have been NEWLEADER");
      }
      return qp.getZxid();
    }
  }
  /**
   * This thread will receive packets from the peer and process them and also listen to new
   * connections from new peers.
   */
  @Override
  public void run() {
    try {

      ia = BinaryInputArchive.getArchive(new BufferedInputStream(sock.getInputStream()));
      bufferedOutput = new BufferedOutputStream(sock.getOutputStream());
      oa = BinaryOutputArchive.getArchive(bufferedOutput);

      QuorumPacket qp = new QuorumPacket();
      ia.readRecord(qp, "packet");
      if (qp.getType() != Leader.FOLLOWERINFO && qp.getType() != Leader.OBSERVERINFO) {
        LOG.error("First packet " + qp.toString() + " is not FOLLOWERINFO or OBSERVERINFO!");
        return;
      }
      if (qp.getData() != null) {
        ByteBuffer bbsid = ByteBuffer.wrap(qp.getData());
        this.sid = bbsid.getLong();
      } else {
        this.sid = leader.followerCounter.getAndDecrement();
      }

      LOG.info("Follower sid: " + this.sid + " : info : " + leader.self.quorumPeers.get(this.sid));

      if (qp.getType() == Leader.OBSERVERINFO) {
        learnerType = LearnerType.OBSERVER;
      }

      long peerLastZxid = qp.getZxid();
      /* the default to send to the follower */
      int packetToSend = Leader.SNAP;
      long zxidToSend = 0;
      long leaderLastZxid = 0;
      /** the packets that the follower needs to get updates from * */
      long updates = peerLastZxid;

      /* we are sending the diff check if we have proposals in memory to be able to
       * send a diff to the
       */
      ReentrantReadWriteLock lock = leader.zk.getZKDatabase().getLogLock();
      ReadLock rl = lock.readLock();
      try {
        rl.lock();
        final long maxCommittedLog = leader.zk.getZKDatabase().getmaxCommittedLog();
        final long minCommittedLog = leader.zk.getZKDatabase().getminCommittedLog();
        LOG.info(
            "Synchronizing with Follower sid: "
                + this.sid
                + " maxCommittedLog ="
                + Long.toHexString(maxCommittedLog)
                + " minCommittedLog = "
                + Long.toHexString(minCommittedLog)
                + " peerLastZxid = "
                + Long.toHexString(peerLastZxid));

        LinkedList<Proposal> proposals = leader.zk.getZKDatabase().getCommittedLog();

        if (proposals.size() != 0) {
          if ((maxCommittedLog >= peerLastZxid) && (minCommittedLog <= peerLastZxid)) {

            // as we look through proposals, this variable keeps track of previous
            // proposal Id.
            long prevProposalZxid = minCommittedLog;

            // Keep track of whether we are about to send the first packet.
            // Before sending the first packet, we have to tell the learner
            // whether to expect a trunc or a diff
            boolean firstPacket = true;

            for (Proposal propose : proposals) {
              // skip the proposals the peer already has
              if (propose.packet.getZxid() <= peerLastZxid) {
                prevProposalZxid = propose.packet.getZxid();
                continue;
              } else {
                // If we are sending the first packet, figure out whether to trunc
                // in case the follower has some proposals that the leader doesn't
                if (firstPacket) {
                  firstPacket = false;
                  // Does the peer have some proposals that the leader hasn't seen yet
                  if (prevProposalZxid < peerLastZxid) {
                    // send a trunc message before sending the diff
                    packetToSend = Leader.TRUNC;
                    LOG.info("Sending TRUNC");
                    zxidToSend = prevProposalZxid;
                    updates = zxidToSend;
                  } else {
                    // Just send the diff
                    packetToSend = Leader.DIFF;
                    LOG.info("Sending diff");
                    zxidToSend = maxCommittedLog;
                  }
                }
                queuePacket(propose.packet);
                QuorumPacket qcommit =
                    new QuorumPacket(Leader.COMMIT, propose.packet.getZxid(), null, null);
                queuePacket(qcommit);
              }
            }
          } else if (peerLastZxid > maxCommittedLog) {
            packetToSend = Leader.TRUNC;
            zxidToSend = maxCommittedLog;
            updates = zxidToSend;
          }
        } else {
          // just let the state transfer happen
        }

        leaderLastZxid = leader.startForwarding(this, updates);
        if (peerLastZxid == leaderLastZxid) {
          // We are in sync so we'll do an empty diff
          packetToSend = Leader.DIFF;
          zxidToSend = leaderLastZxid;
        }
      } finally {
        rl.unlock();
      }

      QuorumPacket newLeaderQP = new QuorumPacket(Leader.NEWLEADER, leaderLastZxid, null, null);
      oa.writeRecord(newLeaderQP, "packet");
      bufferedOutput.flush();
      // Need to set the zxidToSend to the latest zxid
      if (packetToSend == Leader.SNAP) {
        zxidToSend = leader.zk.getZKDatabase().getDataTreeLastProcessedZxid();
      }
      oa.writeRecord(new QuorumPacket(packetToSend, zxidToSend, null, null), "packet");
      bufferedOutput.flush();

      /* if we are not truncating or sending a diff just send a snapshot */
      if (packetToSend == Leader.SNAP) {
        LOG.info(
            "Sending snapshot last zxid of peer is 0x"
                + Long.toHexString(peerLastZxid)
                + " "
                + " zxid of leader is 0x"
                + Long.toHexString(leaderLastZxid)
                + "sent zxid of db as 0x"
                + Long.toHexString(zxidToSend));
        // Dump data to peer
        leader.zk.getZKDatabase().serializeSnapshot(oa);
        oa.writeString("BenWasHere", "signature");
      }
      bufferedOutput.flush();

      // Mutation packets will be queued during the serialize,
      // so we need to mark when the peer can actually start
      // using the data
      //
      queuedPackets.add(new QuorumPacket(Leader.UPTODATE, -1, null, null));

      // Start sending packets
      new Thread() {
        public void run() {
          Thread.currentThread().setName("Sender-" + sock.getRemoteSocketAddress());
          try {
            sendPackets();
          } catch (InterruptedException e) {
            LOG.warn("Unexpected interruption", e);
          }
        }
      }.start();

      /*
       * Have to wait for the first ACK, wait until
       * the leader is ready, and only then we can
       * start processing messages.
       */
      qp = new QuorumPacket();
      ia.readRecord(qp, "packet");
      if (qp.getType() != Leader.ACK) {
        LOG.error("Next packet was supposed to be an ACK");
        return;
      }
      leader.processAck(this.sid, qp.getZxid(), sock.getLocalSocketAddress());

      /*
       * Wait until leader starts up
       */
      synchronized (leader.zk) {
        while (!leader.zk.isRunning()) {
          leader.zk.wait(500);
        }
      }

      while (true) {
        qp = new QuorumPacket();
        ia.readRecord(qp, "packet");

        long traceMask = ZooTrace.SERVER_PACKET_TRACE_MASK;
        if (qp.getType() == Leader.PING) {
          traceMask = ZooTrace.SERVER_PING_TRACE_MASK;
        }
        if (LOG.isTraceEnabled()) {
          ZooTrace.logQuorumPacket(LOG, traceMask, 'i', qp);
        }
        tickOfLastAck = leader.self.tick;

        ByteBuffer bb;
        long sessionId;
        int cxid;
        int type;

        switch (qp.getType()) {
          case Leader.ACK:
            if (this.learnerType == LearnerType.OBSERVER) {
              if (LOG.isDebugEnabled()) {
                LOG.debug("Received ACK from Observer  " + this.sid);
              }
            }
            leader.processAck(this.sid, qp.getZxid(), sock.getLocalSocketAddress());
            break;
          case Leader.PING:
            // Process the touches
            ByteArrayInputStream bis = new ByteArrayInputStream(qp.getData());
            DataInputStream dis = new DataInputStream(bis);
            while (dis.available() > 0) {
              long sess = dis.readLong();
              int to = dis.readInt();
              leader.zk.touch(sess, to);
            }
            break;
          case Leader.REVALIDATE:
            bis = new ByteArrayInputStream(qp.getData());
            dis = new DataInputStream(bis);
            long id = dis.readLong();
            int to = dis.readInt();
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            DataOutputStream dos = new DataOutputStream(bos);
            dos.writeLong(id);
            boolean valid = leader.zk.touch(id, to);
            if (valid) {
              try {
                // set the session owner
                // as the follower that
                // owns the session
                leader.zk.setOwner(id, this);
              } catch (SessionExpiredException e) {
                LOG.error(
                    "Somehow session "
                        + Long.toHexString(id)
                        + " expired right after being renewed! (impossible)",
                    e);
              }
            }
            if (LOG.isTraceEnabled()) {
              ZooTrace.logTraceMessage(
                  LOG,
                  ZooTrace.SESSION_TRACE_MASK,
                  "Session 0x" + Long.toHexString(id) + " is valid: " + valid);
            }
            dos.writeBoolean(valid);
            qp.setData(bos.toByteArray());
            queuedPackets.add(qp);
            break;
          case Leader.REQUEST:
            bb = ByteBuffer.wrap(qp.getData());
            sessionId = bb.getLong();
            cxid = bb.getInt();
            type = bb.getInt();
            bb = bb.slice();
            Request si;
            if (type == OpCode.sync) {
              si = new LearnerSyncRequest(this, sessionId, cxid, type, bb, qp.getAuthinfo());
            } else {
              si = new Request(null, sessionId, cxid, type, bb, qp.getAuthinfo());
            }
            si.setOwner(this);
            leader.zk.submitRequest(si);
            break;
          default:
        }
      }
    } catch (IOException e) {
      if (sock != null && !sock.isClosed()) {
        LOG.error("Unexpected exception causing shutdown while sock " + "still open", e);
        // close the socket to make sure the
        // other side can see it being close
        try {
          sock.close();
        } catch (IOException ie) {
          // do nothing
        }
      }
    } catch (InterruptedException e) {
      LOG.error("Unexpected exception causing shutdown", e);
    } finally {
      LOG.warn(
          "******* GOODBYE "
              + (sock != null ? sock.getRemoteSocketAddress() : "<null>")
              + " ********");
      // Send the packet of death
      try {
        queuedPackets.put(proposalOfDeath);
      } catch (InterruptedException e) {
        LOG.warn("Ignoring unexpected exception", e);
      }
      shutdown();
    }
  }
  public static String packetToString(QuorumPacket p) {
    if (true) return null;
    String type = null;
    String mess = null;
    Record txn = null;

    switch (p.getType()) {
      case Leader.ACK:
        type = "ACK";
        break;
      case Leader.COMMIT:
        type = "COMMIT";
        break;
      case Leader.FOLLOWERINFO:
        type = "FOLLOWERINFO";
        break;
      case Leader.NEWLEADER:
        type = "NEWLEADER";
        break;
      case Leader.PING:
        type = "PING";
        break;
      case Leader.PROPOSAL:
        type = "PROPOSAL";
        BinaryInputArchive ia =
            BinaryInputArchive.getArchive(new ByteArrayInputStream(p.getData()));
        TxnHeader hdr = new TxnHeader();
        try {
          txn = SerializeUtils.deserializeTxn(ia, hdr);
          // mess = "transaction: " + txn.toString();
        } catch (IOException e) {
          LOG.warn("Unexpected exception", e);
        }
        break;
      case Leader.REQUEST:
        type = "REQUEST";
        break;
      case Leader.REVALIDATE:
        type = "REVALIDATE";
        ByteArrayInputStream bis = new ByteArrayInputStream(p.getData());
        DataInputStream dis = new DataInputStream(bis);
        try {
          long id = dis.readLong();
          mess = " sessionid = " + id;
        } catch (IOException e) {
          LOG.warn("Unexpected exception", e);
        }

        break;
      case Leader.UPTODATE:
        type = "UPTODATE";
        break;
      default:
        type = "UNKNOWN" + p.getType();
    }
    String entry = null;
    if (type != null) {
      entry = type + " " + Long.toHexString(p.getZxid()) + " " + mess;
    }
    return entry;
  }