@Test public void testUpdate() throws Exception { ZooKeeper zk = getClient(0); ZooKeeper zk2 = getClient(0); MailboxTracker tracker = new MailboxTracker(zk, handler); MailboxPublisher publisher = new MailboxPublisher(VoltZK.mailboxes + "/1"); VoltZK.createPersistentZKNodes(zk); publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(1L, 0)); publisher.publish(zk2); publisher = new MailboxPublisher(VoltZK.mailboxes + "/2"); publisher.registerMailbox(MailboxType.ExecutionSite, new MailboxNodeContent(2L, 1)); publisher.publish(zk); tracker.start(); // The ephemaral node just created will disappear and we should get an update zk2.close(); while (handler.m_handleCount.get() < 2) { Thread.sleep(1); } Map<MailboxType, List<MailboxNodeContent>> value = handler.m_mailboxes; assertTrue(value.containsKey(MailboxType.ExecutionSite)); List<MailboxNodeContent> list = value.get(MailboxType.ExecutionSite); assertEquals(1, list.size()); assertEquals(2, list.get(0).HSId.longValue()); assertEquals(1, list.get(0).partitionId.intValue()); tracker.shutdown(); }
/** * Rebuild the point-in-time snapshot of the children objects and set watches on new * children. @Param event may be null on the first initialization. */ private void processParentEvent(WatchedEvent event) throws Exception { // get current children snapshot and reset this watch. Set<String> children = new TreeSet<String>(m_zk.getChildren(m_rootNode, m_parentWatch)); // intersect to get newChildren and update m_lastChildren to the current set. Set<String> newChildren = new HashSet<String>(children); newChildren.removeAll(m_lastChildren); m_lastChildren = children; List<ByteArrayCallback> callbacks = new ArrayList<ByteArrayCallback>(); for (String child : children) { ByteArrayCallback cb = new ByteArrayCallback(); // set watches on new children. if (newChildren.contains(child)) { m_zk.getData(ZKUtil.joinZKPath(m_rootNode, child), m_childWatch, cb, null); } else { m_zk.getData(ZKUtil.joinZKPath(m_rootNode, child), false, cb, null); } callbacks.add(cb); } HashMap<String, JSONObject> cache = new HashMap<String, JSONObject>(); for (ByteArrayCallback callback : callbacks) { try { byte payload[] = callback.getData(); JSONObject jsObj = new JSONObject(new String(payload, "UTF-8")); cache.put(callback.getPath(), jsObj); } catch (KeeperException.NoNodeException e) { // child may have been deleted between the parent trigger and getData. } } m_publicCache.set(ImmutableMap.copyOf(cache)); }
/* * Inherit the per partition txnid from the long since gone * partition that existed in the past */ private long[] fetchPerPartitionTxnId() { ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK(); byte partitionTxnIdsBytes[] = null; try { partitionTxnIdsBytes = zk.getData(VoltZK.perPartitionTxnIds, false, null); } catch (KeeperException.NoNodeException e) { return null; } // Can be no node if the cluster was never restored catch (Exception e) { VoltDB.crashLocalVoltDB("Error retrieving per partition txn ids", true, e); } ByteBuffer buf = ByteBuffer.wrap(partitionTxnIdsBytes); int count = buf.getInt(); Long partitionTxnId = null; long partitionTxnIds[] = new long[count]; for (int ii = 0; ii < count; ii++) { long txnId = buf.getLong(); partitionTxnIds[ii] = txnId; int partitionId = TxnEgo.getPartitionId(txnId); if (partitionId == m_partitionId) { partitionTxnId = txnId; continue; } } if (partitionTxnId != null) { return partitionTxnIds; } return null; }
@Test public void testDeleteChildWithCallback() throws Exception { ZooKeeper zk = getClient(0); configure("/cache02", zk); TestCallback cb = new TestCallback(); LeaderCache dut = new LeaderCache(zk, "/cache02", cb); dut.start(true); Map<Integer, Long> cache = cb.m_cache; assertEquals("3 items cached.", 3, cache.size()); zk.delete("/cache02/1", -1); while (true) { cache = cb.m_cache; if (cache.size() == 3) { Thread.sleep(1); } else { break; } } assertEquals("Item removed", 2, cache.size()); assertEquals(null, cache.get(1)); assertEquals(12345678, cache.get(0).longValue()); assertEquals(11223344, cache.get(2).longValue()); dut.shutdown(); zk.close(); }
@Test public void testModifyChildWithCallback() throws Exception { ZooKeeper zk = getClient(0); configure("/cache03", zk); TestCallback cb = new TestCallback(); LeaderCache dut = new LeaderCache(zk, "/cache03", cb); dut.start(true); Map<Integer, Long> cache = cb.m_cache; assertEquals("3 items cached.", 3, cache.size()); assertEquals(12345678, cache.get(0).longValue()); dut.put(0, 23456789); while (true) { cache = cb.m_cache; if (cache.get(0) == 23456789) { break; } } cache = cb.m_cache; assertEquals("3 items cached.", 3, cache.size()); assertEquals(23456789, cache.get(0).longValue()); assertEquals(87654321, cache.get(1).longValue()); assertEquals(11223344, cache.get(2).longValue()); dut.shutdown(); zk.close(); }
@Test public void testModifyChild() throws Exception { ZooKeeper zk = getClient(0); configure("/cache03", zk); LeaderCache dut = new LeaderCache(zk, "/cache03"); dut.start(true); Map<Integer, Long> cache = dut.pointInTimeCache(); assertEquals("3 items cached.", 3, cache.size()); assertEquals(12345678, dut.get(0).longValue()); zk.setData("/cache03/0", Long.toString(23456789).getBytes(), -1); while (true) { if (dut.get(0) == 23456789) { break; } } assertEquals("3 items cached.", 3, cache.size()); assertEquals(23456789L, dut.get(0).longValue()); assertEquals(87654321L, dut.get(1).longValue()); assertEquals(11223344L, dut.get(2).longValue()); dut.shutdown(); zk.close(); }
public static void deleteRecursively(ZooKeeper zk, String dir) throws KeeperException, InterruptedException { List<String> children = zk.getChildren(dir, false); for (String child : children) { deleteRecursively(zk, joinZKPath(dir, child)); } zk.delete(dir, -1); }
void configure(String root, ZooKeeper zk) throws Exception { Long aa = 12345678L; Long bb = 87654321L; Long cc = 11223344L; zk.create(root, new byte[] {}, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create(root + "/0", aa.toString().getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create(root + "/1", bb.toString().getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); zk.create(root + "/2", cc.toString().getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); }
@Override public void run() { try { JSONStringer js = new JSONStringer(); js.object(); js.key("role").value(m_config.m_replicationRole.ordinal()); js.key("active").value(m_rvdb.getReplicationActive()); js.endObject(); ZooKeeper zk = m_rvdb.getHostMessenger().getZK(); // rejoining nodes figure out the replication role from other nodes if (!m_isRejoin) { try { zk.create( VoltZK.replicationconfig, js.toString().getBytes("UTF-8"), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } catch (KeeperException.NodeExistsException e) { } String discoveredReplicationConfig = new String(zk.getData(VoltZK.replicationconfig, false, null), "UTF-8"); JSONObject discoveredjsObj = new JSONObject(discoveredReplicationConfig); ReplicationRole discoveredRole = ReplicationRole.get((byte) discoveredjsObj.getLong("role")); if (!discoveredRole.equals(m_config.m_replicationRole)) { VoltDB.crashGlobalVoltDB( "Discovered replication role " + discoveredRole + " doesn't match locally specified replication role " + m_config.m_replicationRole, true, null); } // See if we should bring the server up in WAN replication mode m_rvdb.setReplicationRole(discoveredRole); } else { String discoveredReplicationConfig = new String(zk.getData(VoltZK.replicationconfig, false, null), "UTF-8"); JSONObject discoveredjsObj = new JSONObject(discoveredReplicationConfig); ReplicationRole discoveredRole = ReplicationRole.get((byte) discoveredjsObj.getLong("role")); boolean replicationActive = discoveredjsObj.getBoolean("active"); // See if we should bring the server up in WAN replication mode m_rvdb.setReplicationRole(discoveredRole); m_rvdb.setReplicationActive(replicationActive); } } catch (Exception e) { VoltDB.crashGlobalVoltDB("Error discovering replication role", false, e); } }
public static ZKUtil.StringCallback asyncMkdirs(ZooKeeper zk, String dirDN, byte payload[]) { Preconditions.checkArgument( dirDN != null && !dirDN.trim().isEmpty() && !"/".equals(dirDN) && dirDN.startsWith("/")); StringBuilder dsb = new StringBuilder(128); ZKUtil.StringCallback lastCallback = null; try { String dirPortions[] = dirDN.substring(1).split("/"); for (int ii = 0; ii < dirPortions.length; ii++) { String dirPortion = dirPortions[ii]; lastCallback = new ZKUtil.StringCallback(); dsb.append('/').append(dirPortion); zk.create( dsb.toString(), ii == dirPortions.length - 1 ? payload : null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT, lastCallback, null); } } catch (Throwable t) { Throwables.propagate(t); } return lastCallback; }
/** * Given a set of partition IDs, return a map of partition to a list of HSIDs of all the sites * with copies of each partition */ public Map<Integer, List<Long>> getReplicasForPartitions(Collection<Integer> partitions) { Map<Integer, List<Long>> retval = new HashMap<Integer, List<Long>>(); List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks = new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>(); for (Integer partition : partitions) { String zkpath = LeaderElector.electionDirForPartition(partition); ZKUtil.ChildrenCallback cb = new ZKUtil.ChildrenCallback(); callbacks.add(Pair.of(partition, cb)); m_zk.getChildren(zkpath, false, cb, null); } for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) { final Integer partition = p.getFirst(); try { List<String> children = p.getSecond().getChildren(); List<Long> sites = new ArrayList<Long>(); for (String child : children) { sites.add(Long.valueOf(child.split("_")[0])); } retval.put(partition, sites); } catch (KeeperException ke) { org.voltdb.VoltDB.crashLocalVoltDB( "KeeperException getting replicas for partition: " + partition, true, ke); } catch (InterruptedException ie) { org.voltdb.VoltDB.crashLocalVoltDB( "InterruptedException getting replicas for partition: " + partition, true, ie); } } return retval; }
@Test public void testInitialCacheWithCallback() throws Exception { ZooKeeper zk = getClient(0); configure("/cache01", zk); TestCallback cb = new TestCallback(); LeaderCache dut = new LeaderCache(zk, "/cache01", cb); dut.start(true); assertEquals("3 items cached.", 3, cb.m_cache.size()); assertEquals(12345678, cb.m_cache.get(0).longValue()); assertEquals(87654321, cb.m_cache.get(1).longValue()); assertEquals(11223344, cb.m_cache.get(2).longValue()); dut.shutdown(); zk.close(); }
@Test public void testInitialCache() throws Exception { ZooKeeper zk = getClient(0); configure("/cache01", zk); LeaderCache dut = new LeaderCache(zk, "/cache01"); dut.start(true); Map<Integer, Long> cache = dut.pointInTimeCache(); assertEquals("3 items cached.", 3, cache.size()); assertEquals(12345678L, dut.get(0).longValue()); assertEquals(87654321L, dut.get(1).longValue()); assertEquals(11223344L, dut.get(2).longValue()); dut.shutdown(); zk.close(); }
/** * Once participating host count is set, SnapshotCompletionMonitor can check this ZK node to * determine whether the snapshot has finished or not. * * <p>This should only be called when all participants have responded. It is possible that some * hosts finish taking snapshot before the coordinator logs the participating host count. In this * case, the host count would have been decremented multiple times already. To make sure finished * hosts are logged correctly, this method adds participating host count + 1 to the current host * count. * * @param txnId The snapshot txnId * @param participantCount The number of hosts participating in this snapshot */ public static void logParticipatingHostCount(long txnId, int participantCount) { ZooKeeper zk = VoltDB.instance().getHostMessenger().getZK(); final String snapshotPath = VoltZK.completed_snapshots + "/" + txnId; boolean success = false; while (!success) { Stat stat = new Stat(); byte data[] = null; try { data = zk.getData(snapshotPath, false, stat); } catch (KeeperException e) { if (e.code() == KeeperException.Code.NONODE) { // If snapshot creation failed for some reason, the node won't exist. ignore return; } VoltDB.crashLocalVoltDB("Failed to get snapshot completion node", true, e); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Interrupted getting snapshot completion node", true, e); } if (data == null) { VoltDB.crashLocalVoltDB("Data should not be null if the node exists", false, null); } try { JSONObject jsonObj = new JSONObject(new String(data, Charsets.UTF_8)); if (jsonObj.getLong("txnId") != txnId) { VoltDB.crashLocalVoltDB("TxnId should match", false, null); } int hostCount = jsonObj.getInt("hostCount"); // +1 because hostCount was initialized to -1 jsonObj.put("hostCount", hostCount + participantCount + 1); zk.setData(snapshotPath, jsonObj.toString(4).getBytes(Charsets.UTF_8), stat.getVersion()); } catch (KeeperException.BadVersionException e) { continue; } catch (Exception e) { VoltDB.crashLocalVoltDB("This ZK call should never fail", true, e); } success = true; } }
public static void uploadBytesAsChunks( ZooKeeper zk, String node, byte payload[], boolean ephemeral) throws Exception { ByteBuffer buffer = ByteBuffer.wrap(compressBytes(payload)); while (buffer.hasRemaining()) { int nextChunkSize = Math.min(1024 * 1024, buffer.remaining()); ByteBuffer nextChunk = ByteBuffer.allocate(nextChunkSize); buffer.limit(buffer.position() + nextChunkSize); nextChunk.put(buffer); buffer.limit(buffer.capacity()); zk.create( node, nextChunk.array(), Ids.OPEN_ACL_UNSAFE, ephemeral ? CreateMode.EPHEMERAL_SEQUENTIAL : CreateMode.PERSISTENT_SEQUENTIAL); } zk.create( node + "_complete", null, Ids.OPEN_ACL_UNSAFE, ephemeral ? CreateMode.EPHEMERAL : CreateMode.PERSISTENT); }
@Test public void testAddChildWithPutWithCallback() throws Exception { ZooKeeper zk = getClient(0); configure("/cache04", zk); TestCallback cb = new TestCallback(); LeaderCache dut = new LeaderCache(zk, "/cache04", cb); dut.start(true); Map<Integer, Long> cache = cb.m_cache; dut.put(3, 88776655); while (true) { cache = cb.m_cache; if (cache.size() == 3) { Thread.sleep(1); } else { break; } } assertEquals("Item added", 4, cache.size()); assertEquals(12345678, cache.get(0).longValue()); assertEquals(87654321, cache.get(1).longValue()); assertEquals(11223344, cache.get(2).longValue()); assertEquals(88776655, cache.get(3).longValue()); // modify the new child and make sure it has a watch set. dut.put(3, 99887766); while (true) { cache = cb.m_cache; if (cache.get(3) == 99887766) { break; } } assertEquals("Items accounted for.", 4, cache.size()); assertEquals(99887766, cache.get(3).longValue()); dut.shutdown(); zk.close(); }
private void writeKnownLiveNodes(List<Integer> liveNodes) { try { if (m_zk.exists(VoltZK.lastKnownLiveNodes, null) == null) { // VoltZK.createPersistentZKNodes should have done this m_zk.create(VoltZK.lastKnownLiveNodes, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } JSONStringer stringer = new JSONStringer(); stringer.object(); stringer.key("liveNodes").array(); for (Integer node : liveNodes) { stringer.value(node); } stringer.endArray(); stringer.endObject(); JSONObject obj = new JSONObject(stringer.toString()); tmLog.debug("Writing live nodes to ZK: " + obj.toString(4)); m_zk.setData(VoltZK.lastKnownLiveNodes, obj.toString(4).getBytes("UTF-8"), -1); } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unable to update known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e); } }
/** * Update a modified child and republish a new snapshot. This may indicate a deleted child or a * child with modified data. */ private void processChildEvent(WatchedEvent event) throws Exception { HashMap<String, JSONObject> cacheCopy = new HashMap<String, JSONObject>(m_publicCache.get()); ByteArrayCallback cb = new ByteArrayCallback(); m_zk.getData(event.getPath(), m_childWatch, cb, null); try { byte payload[] = cb.getData(); JSONObject jsObj = new JSONObject(new String(payload, "UTF-8")); cacheCopy.put(cb.getPath(), jsObj); } catch (KeeperException.NoNodeException e) { cacheCopy.remove(event.getPath()); } m_publicCache.set(ImmutableMap.copyOf(cacheCopy)); }
/** * Returns the IDs of the partitions currently in the cluster. * * @return A list of partition IDs */ public static List<Integer> getPartitions(ZooKeeper zk) { List<Integer> partitions = new ArrayList<Integer>(); try { List<String> children = zk.getChildren(VoltZK.leaders_initiators, null); for (String child : children) { partitions.add(LeaderElector.getPartitionFromElectionDir(child)); } } catch (KeeperException e) { VoltDB.crashLocalVoltDB("Failed to get partition IDs from ZK", true, e); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Failed to get partition IDs from ZK", true, e); } return partitions; }
public static Pair<byte[], Integer> retrieveChunksAsBytes( ZooKeeper zk, String path, String prefix, boolean getCRC) throws Exception { TreeSet<String> chunks = new TreeSet<String>(); while (true) { boolean allUploadsComplete = true; if (!chunks.contains(path + "/" + prefix + "_complete")) { allUploadsComplete = false; } if (allUploadsComplete) { break; } chunks = new TreeSet<String>(zk.getChildren(path, false)); for (String chunk : chunks) { for (int ii = 0; ii < chunks.size(); ii++) { if (chunk.startsWith(path + "/" + prefix)) { chunks.add(chunk); } } } } byte resultBuffers[][] = new byte[chunks.size() - 1][]; int ii = 0; PureJavaCrc32 crc = getCRC ? new PureJavaCrc32() : null; for (String chunk : chunks) { if (chunk.endsWith("_complete")) continue; resultBuffers[ii] = zk.getData(chunk, false, null); if (crc != null) { crc.update(resultBuffers[ii]); } ii++; } return Pair.of(decompressBytes(resultBuffers), crc != null ? (int) crc.getValue() : null); }
private boolean isClusterKSafe() { boolean retval = true; for (int i = 0; i < m_partitionCount; i++) { String dir = LeaderElector.electionDirForPartition(i); try { List<String> replicas = m_zk.getChildren(dir, null, null); if (replicas.isEmpty()) { tmLog.fatal("K-Safety violation: No replicas found for partition: " + i); retval = false; } } catch (Exception e) { VoltDB.crashLocalVoltDB("Unable to read replicas in ZK dir: " + dir, true, e); } } return retval; }
/** Given a partition ID, return a list of HSIDs of all the sites with copies of that partition */ public List<Long> getReplicasForPartition(int partition) { String zkpath = LeaderElector.electionDirForPartition(partition); List<Long> retval = new ArrayList<Long>(); try { List<String> children = m_zk.getChildren(zkpath, null); for (String child : children) { retval.add(Long.valueOf(child.split("_")[0])); } } catch (KeeperException ke) { org.voltdb.VoltDB.crashLocalVoltDB( "KeeperException getting replicas for partition: " + partition, true, ke); } catch (InterruptedException ie) { org.voltdb.VoltDB.crashLocalVoltDB( "InterruptedException getting replicas for partition: " + partition, true, ie); } return retval; }
private Set<Integer> readPriorKnownLiveNodes() { Set<Integer> nodes = new HashSet<Integer>(); try { byte[] data = m_zk.getData(VoltZK.lastKnownLiveNodes, false, null); String jsonString = new String(data, "UTF-8"); tmLog.debug("Read prior known live nodes: " + jsonString); JSONObject jsObj = new JSONObject(jsonString); JSONArray jsonNodes = jsObj.getJSONArray("liveNodes"); for (int ii = 0; ii < jsonNodes.length(); ii++) { nodes.add(jsonNodes.getInt(ii)); } } catch (Exception e) { VoltDB.crashLocalVoltDB( "Unable to read prior known live nodes at ZK path: " + VoltZK.lastKnownLiveNodes, true, e); } return nodes; }
@Override public void acceptPromotion() throws InterruptedException, ExecutionException, KeeperException { // Crank up the leader caches. Use blocking startup so that we'll have valid point-in-time // caches later. m_iv2appointees.start(true); m_iv2masters.start(true); // Figure out what conditions we assumed leadership under. if (m_iv2appointees.pointInTimeCache().size() == 0) { tmLog.debug("LeaderAppointer in startup"); m_state.set(AppointerState.CLUSTER_START); } else if ((m_iv2appointees.pointInTimeCache().size() != m_partitionCount) || (m_iv2masters.pointInTimeCache().size() != m_partitionCount)) { // If we are promoted and the appointees or masters set is partial, the previous appointer // failed // during startup (at least for now, until we add add/remove a partition on the fly). VoltDB.crashGlobalVoltDB("Detected failure during startup, unable to start", false, null); } else { tmLog.debug("LeaderAppointer in repair"); m_state.set(AppointerState.DONE); } if (m_state.get() == AppointerState.CLUSTER_START) { // Need to block the return of acceptPromotion until after the MPI is promoted. Wait for this // latch // to countdown after appointing all the partition leaders. The // LeaderCache callback will count it down once it has seen all the // appointed leaders publish themselves as the actual leaders. m_startupLatch = new CountDownLatch(1); writeKnownLiveNodes(m_hostMessenger.getLiveHostIds()); for (int i = 0; i < m_partitionCount; i++) { String dir = LeaderElector.electionDirForPartition(i); // Race along with all of the replicas for this partition to create the ZK parent node try { m_zk.create(dir, null, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); } catch (KeeperException.NodeExistsException e) { // expected on all nodes that don't start() first. } m_callbacks[i] = new PartitionCallback(i); Pair<BabySitter, List<String>> sitterstuff = BabySitter.blockingFactory(m_zk, dir, m_callbacks[i], m_es); m_partitionWatchers[i] = sitterstuff.getFirst(); } m_startupLatch.await(); } else { // If we're taking over for a failed LeaderAppointer, we know when // we get here that every partition had a leader at some point in // time. We'll seed each of the PartitionCallbacks for each // partition with the HSID of the last published leader. The // blocking startup of the BabySitter watching that partition will // call our callback, get the current full set of replicas, and // appoint a new leader if the seeded one has actually failed Map<Integer, Long> masters = m_iv2masters.pointInTimeCache(); tmLog.info("LeaderAppointer repairing with master set: " + masters); for (Entry<Integer, Long> master : masters.entrySet()) { int partId = master.getKey(); String dir = LeaderElector.electionDirForPartition(partId); m_callbacks[partId] = new PartitionCallback(partId, master.getValue()); Pair<BabySitter, List<String>> sitterstuff = BabySitter.blockingFactory(m_zk, dir, m_callbacks[partId], m_es); m_partitionWatchers[partId] = sitterstuff.getFirst(); } // just go ahead and promote our MPI m_MPI.acceptPromotion(); } }
private void handleChildUpdate(final WatchedEvent event) { m_zk.getChildren( event.getPath(), constructMailboxChildWatcher(), constructChildRetrievalCallback(), null); }
private void createAndRegisterAckMailboxes( final Set<Integer> localPartitions, HostMessenger messenger) { m_zk = messenger.getZK(); m_mailboxesZKPath = VoltZK.exportGenerations + "/" + m_timestamp + "/" + "mailboxes"; m_mbox = new LocalMailbox(messenger) { @Override public void deliver(VoltMessage message) { if (message instanceof BinaryPayloadMessage) { BinaryPayloadMessage bpm = (BinaryPayloadMessage) message; ByteBuffer buf = ByteBuffer.wrap(bpm.m_payload); final int partition = buf.getInt(); final int length = buf.getInt(); byte stringBytes[] = new byte[length]; buf.get(stringBytes); String signature = new String(stringBytes, Constants.UTF8ENCODING); final long ackUSO = buf.getLong(); final HashMap<String, ExportDataSource> partitionSources = m_dataSourcesByPartition.get(partition); if (partitionSources == null) { exportLog.error( "Received an export ack for partition " + partition + " which does not exist on this node"); return; } final ExportDataSource eds = partitionSources.get(signature); if (eds == null) { exportLog.error( "Received an export ack for partition " + partition + " source signature " + signature + " which does not exist on this node"); return; } try { eds.ack(ackUSO); } catch (RejectedExecutionException ignoreIt) { // ignore it: as it is already shutdown } } else { exportLog.error("Receive unexpected message " + message + " in export subsystem"); } } }; messenger.createMailbox(null, m_mbox); for (Integer partition : localPartitions) { final String partitionDN = m_mailboxesZKPath + "/" + partition; ZKUtil.asyncMkdirs(m_zk, partitionDN); ZKUtil.StringCallback cb = new ZKUtil.StringCallback(); m_zk.create( partitionDN + "/" + m_mbox.getHSId(), null, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, cb, null); } ListenableFuture<?> fut = m_childUpdatingThread.submit( new Runnable() { @Override public void run() { List<Pair<Integer, ZKUtil.ChildrenCallback>> callbacks = new ArrayList<Pair<Integer, ZKUtil.ChildrenCallback>>(); for (Integer partition : localPartitions) { ZKUtil.ChildrenCallback callback = new ZKUtil.ChildrenCallback(); m_zk.getChildren( m_mailboxesZKPath + "/" + partition, constructMailboxChildWatcher(), callback, null); callbacks.add(Pair.of(partition, callback)); } for (Pair<Integer, ZKUtil.ChildrenCallback> p : callbacks) { final Integer partition = p.getFirst(); List<String> children = null; try { children = p.getSecond().getChildren(); } catch (InterruptedException e) { Throwables.propagate(e); } catch (KeeperException e) { Throwables.propagate(e); } ImmutableList.Builder<Long> mailboxes = ImmutableList.builder(); for (String child : children) { if (child.equals(Long.toString(m_mbox.getHSId()))) continue; mailboxes.add(Long.valueOf(child)); } ImmutableList<Long> mailboxHsids = mailboxes.build(); for (ExportDataSource eds : m_dataSourcesByPartition.get(partition).values()) { eds.updateAckMailboxes(Pair.of(m_mbox, mailboxHsids)); } } } }); try { fut.get(); } catch (Throwable t) { Throwables.propagate(t); } }