예제 #1
0
  /**
   * Finally, synchronize our history with the Leader.
   *
   * @param newLeaderZxid
   * @throws IOException
   * @throws InterruptedException
   */
  protected void syncWithLeader(long newLeaderZxid) throws IOException, InterruptedException {
    QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null);
    QuorumPacket qp = new QuorumPacket();
    long newEpoch = ZxidUtils.getEpochFromZxid(newLeaderZxid);

    readPacket(qp);
    LinkedList<Long> packetsCommitted = new LinkedList<Long>();
    LinkedList<PacketInFlight> packetsNotCommitted = new LinkedList<PacketInFlight>();
    synchronized (zk) {
      if (qp.getType() == Leader.DIFF) {
        LOG.info("Getting a diff from the leader 0x" + Long.toHexString(qp.getZxid()));
      } else if (qp.getType() == Leader.SNAP) {
        LOG.info("Getting a snapshot from leader");
        // The leader is going to dump the database
        // clear our own database and read
        zk.getZKDatabase().clear();
        zk.getZKDatabase().deserializeSnapshot(leaderIs);
        String signature = leaderIs.readString("signature");
        if (!signature.equals("BenWasHere")) {
          LOG.error("Missing signature. Got " + signature);
          throw new IOException("Missing signature");
        }
        zk.getZKDatabase().commit();
        zk.getZKDatabase().getM2mData().clear();
      } else if (qp.getType() == Leader.TRUNC) {
        // we need to truncate the log to the lastzxid of the leader
        LOG.warn(
            "Truncating log to get in sync with the leader 0x" + Long.toHexString(qp.getZxid()));
        boolean truncated = zk.getZKDatabase().truncateLog(qp.getZxid());
        if (!truncated) {
          // not able to truncate the log
          LOG.error("Not able to truncate the log " + Long.toHexString(qp.getZxid()));
          System.exit(13);
        }

      } else {
        LOG.error("Got unexpected packet from leader " + qp.getType() + " exiting ... ");
        System.exit(13);
      }
      zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
      zk.createSessionTracker();

      long lastQueued = 0;

      // in V1.0 we take a snapshot when we get the NEWLEADER message, but
      // in pre V1.0
      // we take the snapshot at the UPDATE, since V1.0 also gets the
      // UPDATE (after the NEWLEADER)
      // we need to make sure that we don't take the snapshot twice.
      boolean snapshotTaken = false;
      // we are now going to start getting transactions to apply followed
      // by an UPTODATE
      outerLoop:
      while (self.isRunning()) {
        readPacket(qp);
        switch (qp.getType()) {
          case Leader.PROPOSAL:
            PacketInFlight pif = new PacketInFlight();
            pif.hdr = new M2mTxnHeader();
            pif.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), pif.hdr);
            if (pif.hdr.getZxid() != lastQueued + 1) {
              LOG.warn(
                  "Got zxid 0x"
                      + Long.toHexString(pif.hdr.getZxid())
                      + " expected 0x"
                      + Long.toHexString(lastQueued + 1));
            }
            lastQueued = pif.hdr.getZxid();
            packetsNotCommitted.add(pif);
            break;
          case Leader.COMMIT:
            if (!snapshotTaken) {
              pif = packetsNotCommitted.peekFirst();
              if (pif.hdr.getZxid() != qp.getZxid()) {
                LOG.warn(
                    "Committing " + qp.getZxid() + ", but next proposal is " + pif.hdr.getZxid());
              } else {
                zk.processTxn(pif.hdr, pif.rec);
                packetsNotCommitted.remove();
              }
            } else {
              packetsCommitted.add(qp.getZxid());
            }
            break;
          case Leader.INFORM:
            /*
             * Only observer get this type of packet. We treat this as
             * receiving PROPOSAL and COMMMIT.
             */
            PacketInFlight packet = new PacketInFlight();
            packet.hdr = new M2mTxnHeader();
            packet.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), packet.hdr);
            // Log warning message if txn comes out-of-order
            if (packet.hdr.getZxid() != lastQueued + 1) {
              LOG.warn(
                  "Got zxid 0x"
                      + Long.toHexString(packet.hdr.getZxid())
                      + " expected 0x"
                      + Long.toHexString(lastQueued + 1));
            }
            lastQueued = packet.hdr.getZxid();
            if (!snapshotTaken) {
              // Apply to db directly if we haven't taken the snapshot
              zk.processTxn(packet.hdr, packet.rec);
            } else {
              packetsNotCommitted.add(packet);
              packetsCommitted.add(qp.getZxid());
            }
            break;
          case Leader.UPTODATE:
            if (!snapshotTaken) { // true for the pre v1.0 case
              zk.takeSnapshot();
              self.setCurrentEpoch(newEpoch);
            }
            self.cnxnFactory.addZooKeeperServer(self.getHandleIp(), zk);
            break outerLoop;
          case Leader.NEWLEADER: // it will be NEWLEADER in v1.0
            // Create updatingEpoch file and remove it after current
            // epoch is set. QuorumPeer.loadDataBase() uses this file to
            // detect the case where the server was terminated after
            // taking a snapshot but before setting the current epoch.
            // File updating = new
            // File(self.getTxnFactory().getSnapDir(),
            // QuorumPeer.UPDATING_EPOCH_FILENAME);
            // if (!updating.exists() && !updating.createNewFile()) {
            // throw new IOException("Failed to create "
            // + updating.toString());
            // }
            // zk.takeSnapshot();
            // self.setCurrentEpoch(newEpoch);
            // if (!updating.delete()) {
            // throw new IOException("Failed to delete "
            // + updating.toString());
            // }
            snapshotTaken = true;
            writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null), true);
            break;
        }
      }
    }
    ack.setZxid(ZxidUtils.makeZxid(newEpoch, 0));
    writePacket(ack, true);
    sock.setSoTimeout(self.tickTime * self.syncLimit);
    zk.startup();
    /*
     * Update the election vote here to ensure that all members of the
     * ensemble report the same vote to new servers that start up and send
     * leader election notifications to the ensemble.
     *
     * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
     */
    self.updateElectionVote(newEpoch);

    // We need to log the stuff that came in between the snapshot and the
    // uptodate
    if (zk instanceof FollowerZooKeeperServer) {
      FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
      for (PacketInFlight p : packetsNotCommitted) {
        fzk.logRequest(p.hdr, p.rec);
      }
      for (Long zxid : packetsCommitted) {
        fzk.commit(zxid);
      }
    } else {
      // New server type need to handle in-flight packets
      throw new UnsupportedOperationException("Unknown server type");
    }
  }
예제 #2
0
  /**
   * Once connected to the leader, perform the handshake protocol to establish a following /
   * observing connection.
   *
   * @param pktType
   * @return the zxid the Leader sends for synchronization purposes.
   * @throws IOException
   */
  protected long registerWithLeader(int pktType) throws IOException {
    /*
     * Send follower info, including last zxid and sid
     */
    long lastLoggedZxid = self.getLastLoggedZxid();
    QuorumPacket qp = new QuorumPacket();
    qp.setType(pktType);
    qp.setZxid(ZxidUtils.makeZxid(self.getAcceptedEpoch(), 0));

    /*
     * Add sid to payload
     */
    LearnerInfo li = new LearnerInfo(self.getId(), 0x10000);
    ByteArrayOutputStream bsid = new ByteArrayOutputStream();
    BinaryOutputArchive boa = BinaryOutputArchive.getArchive(bsid);
    boa.writeRecord(li, "LearnerInfo");
    qp.setData(bsid.toByteArray());

    writePacket(qp, true);
    readPacket(qp);
    final long newEpoch = ZxidUtils.getEpochFromZxid(qp.getZxid());
    if (qp.getType() == Leader.LEADERINFO) {
      // we are connected to a 1.0 server so accept the new epoch and read
      // the next packet
      leaderProtocolVersion = ByteBuffer.wrap(qp.getData()).getInt();
      byte epochBytes[] = new byte[4];
      final ByteBuffer wrappedEpochBytes = ByteBuffer.wrap(epochBytes);
      if (newEpoch > self.getAcceptedEpoch()) {
        wrappedEpochBytes.putInt((int) self.getCurrentEpoch());
        self.setAcceptedEpoch(newEpoch);
      } else if (newEpoch == self.getAcceptedEpoch()) {
        // since we have already acked an epoch equal to the leaders, we
        // cannot ack
        // again, but we still need to send our lastZxid to the leader
        // so that we can
        // sync with it if it does assume leadership of the epoch.
        // the -1 indicates that this reply should not count as an ack
        // for the new epoch
        wrappedEpochBytes.putInt(-1);
      } else {
        throw new IOException(
            "Leaders epoch, "
                + newEpoch
                + " is less than accepted epoch, "
                + self.getAcceptedEpoch());
      }
      QuorumPacket ackNewEpoch =
          new QuorumPacket(Leader.ACKEPOCH, lastLoggedZxid, epochBytes); // 发送ack
      writePacket(ackNewEpoch, true);
      return ZxidUtils.makeZxid(newEpoch, 0);
    } else {
      if (newEpoch > self.getAcceptedEpoch()) {
        self.setAcceptedEpoch(newEpoch);
      }
      if (qp.getType() != Leader.NEWLEADER) {
        LOG.error("First packet should have been NEWLEADER");
        throw new IOException("First packet should have been NEWLEADER");
      }
      return qp.getZxid();
    }
  }