/** * Finally, synchronize our history with the Leader. * * @param newLeaderZxid * @throws IOException * @throws InterruptedException */ protected void syncWithLeader(long newLeaderZxid) throws IOException, InterruptedException { QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null); QuorumPacket qp = new QuorumPacket(); long newEpoch = ZxidUtils.getEpochFromZxid(newLeaderZxid); readPacket(qp); LinkedList<Long> packetsCommitted = new LinkedList<Long>(); LinkedList<PacketInFlight> packetsNotCommitted = new LinkedList<PacketInFlight>(); synchronized (zk) { if (qp.getType() == Leader.DIFF) { LOG.info("Getting a diff from the leader 0x" + Long.toHexString(qp.getZxid())); } else if (qp.getType() == Leader.SNAP) { LOG.info("Getting a snapshot from leader"); // The leader is going to dump the database // clear our own database and read zk.getZKDatabase().clear(); zk.getZKDatabase().deserializeSnapshot(leaderIs); String signature = leaderIs.readString("signature"); if (!signature.equals("BenWasHere")) { LOG.error("Missing signature. Got " + signature); throw new IOException("Missing signature"); } zk.getZKDatabase().commit(); zk.getZKDatabase().getM2mData().clear(); } else if (qp.getType() == Leader.TRUNC) { // we need to truncate the log to the lastzxid of the leader LOG.warn( "Truncating log to get in sync with the leader 0x" + Long.toHexString(qp.getZxid())); boolean truncated = zk.getZKDatabase().truncateLog(qp.getZxid()); if (!truncated) { // not able to truncate the log LOG.error("Not able to truncate the log " + Long.toHexString(qp.getZxid())); System.exit(13); } } else { LOG.error("Got unexpected packet from leader " + qp.getType() + " exiting ... "); System.exit(13); } zk.getZKDatabase().setlastProcessedZxid(qp.getZxid()); zk.createSessionTracker(); long lastQueued = 0; // in V1.0 we take a snapshot when we get the NEWLEADER message, but // in pre V1.0 // we take the snapshot at the UPDATE, since V1.0 also gets the // UPDATE (after the NEWLEADER) // we need to make sure that we don't take the snapshot twice. boolean snapshotTaken = false; // we are now going to start getting transactions to apply followed // by an UPTODATE outerLoop: while (self.isRunning()) { readPacket(qp); switch (qp.getType()) { case Leader.PROPOSAL: PacketInFlight pif = new PacketInFlight(); pif.hdr = new M2mTxnHeader(); pif.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), pif.hdr); if (pif.hdr.getZxid() != lastQueued + 1) { LOG.warn( "Got zxid 0x" + Long.toHexString(pif.hdr.getZxid()) + " expected 0x" + Long.toHexString(lastQueued + 1)); } lastQueued = pif.hdr.getZxid(); packetsNotCommitted.add(pif); break; case Leader.COMMIT: if (!snapshotTaken) { pif = packetsNotCommitted.peekFirst(); if (pif.hdr.getZxid() != qp.getZxid()) { LOG.warn( "Committing " + qp.getZxid() + ", but next proposal is " + pif.hdr.getZxid()); } else { zk.processTxn(pif.hdr, pif.rec); packetsNotCommitted.remove(); } } else { packetsCommitted.add(qp.getZxid()); } break; case Leader.INFORM: /* * Only observer get this type of packet. We treat this as * receiving PROPOSAL and COMMMIT. */ PacketInFlight packet = new PacketInFlight(); packet.hdr = new M2mTxnHeader(); packet.rec = M2mSerializeUtils.deserializeTxn(qp.getData(), packet.hdr); // Log warning message if txn comes out-of-order if (packet.hdr.getZxid() != lastQueued + 1) { LOG.warn( "Got zxid 0x" + Long.toHexString(packet.hdr.getZxid()) + " expected 0x" + Long.toHexString(lastQueued + 1)); } lastQueued = packet.hdr.getZxid(); if (!snapshotTaken) { // Apply to db directly if we haven't taken the snapshot zk.processTxn(packet.hdr, packet.rec); } else { packetsNotCommitted.add(packet); packetsCommitted.add(qp.getZxid()); } break; case Leader.UPTODATE: if (!snapshotTaken) { // true for the pre v1.0 case zk.takeSnapshot(); self.setCurrentEpoch(newEpoch); } self.cnxnFactory.addZooKeeperServer(self.getHandleIp(), zk); break outerLoop; case Leader.NEWLEADER: // it will be NEWLEADER in v1.0 // Create updatingEpoch file and remove it after current // epoch is set. QuorumPeer.loadDataBase() uses this file to // detect the case where the server was terminated after // taking a snapshot but before setting the current epoch. // File updating = new // File(self.getTxnFactory().getSnapDir(), // QuorumPeer.UPDATING_EPOCH_FILENAME); // if (!updating.exists() && !updating.createNewFile()) { // throw new IOException("Failed to create " // + updating.toString()); // } // zk.takeSnapshot(); // self.setCurrentEpoch(newEpoch); // if (!updating.delete()) { // throw new IOException("Failed to delete " // + updating.toString()); // } snapshotTaken = true; writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null), true); break; } } } ack.setZxid(ZxidUtils.makeZxid(newEpoch, 0)); writePacket(ack, true); sock.setSoTimeout(self.tickTime * self.syncLimit); zk.startup(); /* * Update the election vote here to ensure that all members of the * ensemble report the same vote to new servers that start up and send * leader election notifications to the ensemble. * * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732 */ self.updateElectionVote(newEpoch); // We need to log the stuff that came in between the snapshot and the // uptodate if (zk instanceof FollowerZooKeeperServer) { FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk; for (PacketInFlight p : packetsNotCommitted) { fzk.logRequest(p.hdr, p.rec); } for (Long zxid : packetsCommitted) { fzk.commit(zxid); } } else { // New server type need to handle in-flight packets throw new UnsupportedOperationException("Unknown server type"); } }
/** * Once connected to the leader, perform the handshake protocol to establish a following / * observing connection. * * @param pktType * @return the zxid the Leader sends for synchronization purposes. * @throws IOException */ protected long registerWithLeader(int pktType) throws IOException { /* * Send follower info, including last zxid and sid */ long lastLoggedZxid = self.getLastLoggedZxid(); QuorumPacket qp = new QuorumPacket(); qp.setType(pktType); qp.setZxid(ZxidUtils.makeZxid(self.getAcceptedEpoch(), 0)); /* * Add sid to payload */ LearnerInfo li = new LearnerInfo(self.getId(), 0x10000); ByteArrayOutputStream bsid = new ByteArrayOutputStream(); BinaryOutputArchive boa = BinaryOutputArchive.getArchive(bsid); boa.writeRecord(li, "LearnerInfo"); qp.setData(bsid.toByteArray()); writePacket(qp, true); readPacket(qp); final long newEpoch = ZxidUtils.getEpochFromZxid(qp.getZxid()); if (qp.getType() == Leader.LEADERINFO) { // we are connected to a 1.0 server so accept the new epoch and read // the next packet leaderProtocolVersion = ByteBuffer.wrap(qp.getData()).getInt(); byte epochBytes[] = new byte[4]; final ByteBuffer wrappedEpochBytes = ByteBuffer.wrap(epochBytes); if (newEpoch > self.getAcceptedEpoch()) { wrappedEpochBytes.putInt((int) self.getCurrentEpoch()); self.setAcceptedEpoch(newEpoch); } else if (newEpoch == self.getAcceptedEpoch()) { // since we have already acked an epoch equal to the leaders, we // cannot ack // again, but we still need to send our lastZxid to the leader // so that we can // sync with it if it does assume leadership of the epoch. // the -1 indicates that this reply should not count as an ack // for the new epoch wrappedEpochBytes.putInt(-1); } else { throw new IOException( "Leaders epoch, " + newEpoch + " is less than accepted epoch, " + self.getAcceptedEpoch()); } QuorumPacket ackNewEpoch = new QuorumPacket(Leader.ACKEPOCH, lastLoggedZxid, epochBytes); // 发送ack writePacket(ackNewEpoch, true); return ZxidUtils.makeZxid(newEpoch, 0); } else { if (newEpoch > self.getAcceptedEpoch()) { self.setAcceptedEpoch(newEpoch); } if (qp.getType() != Leader.NEWLEADER) { LOG.error("First packet should have been NEWLEADER"); throw new IOException("First packet should have been NEWLEADER"); } return qp.getZxid(); } }