@Override @Test public void testGetVersions() throws Exception { List<ByteArray> keys = getKeys(2); ByteArray key = keys.get(0); byte[] value = getValue(); VectorClock vc = getClock(0, 0); Store<ByteArray, byte[], byte[]> store = getStore(); store.put(key, Versioned.value(value, vc), null); List<Versioned<byte[]>> versioneds = store.get(key, null); List<Version> versions = store.getVersions(key); assertEquals(1, versioneds.size()); assertTrue(versions.size() > 0); for (int i = 0; i < versions.size(); i++) assertEquals(versioneds.get(0).getVersion(), versions.get(i)); assertEquals(0, store.getVersions(keys.get(1)).size()); }
@SuppressWarnings("unchecked") public class StreamingSlopPusherJob implements Runnable { private static final Logger logger = Logger.getLogger(StreamingSlopPusherJob.class.getName()); public static final String TYPE_NAME = "streaming"; private static final Versioned<Slop> END = Versioned.value(null); private final MetadataStore metadataStore; private final StoreRepository storeRepo; private final FailureDetector failureDetector; private final ConcurrentMap<Integer, SynchronousQueue<Versioned<Slop>>> slopQueues; private final ExecutorService consumerExecutor; private final EventThrottler readThrottler; private AdminClient adminClient; private final Cluster cluster; private final List<Future> consumerResults; private final VoldemortConfig voldemortConfig; private final Map<Integer, Set<Integer>> zoneMapping; private final ConcurrentHashMap<Integer, Long> attemptedByNode, succeededByNode; private final Semaphore repairPermits; public StreamingSlopPusherJob( StoreRepository storeRepo, MetadataStore metadataStore, FailureDetector failureDetector, VoldemortConfig voldemortConfig, Semaphore repairPermits) { this.storeRepo = storeRepo; this.metadataStore = metadataStore; this.failureDetector = failureDetector; this.voldemortConfig = voldemortConfig; this.repairPermits = Utils.notNull(repairPermits); this.cluster = metadataStore.getCluster(); this.slopQueues = new ConcurrentHashMap<Integer, SynchronousQueue<Versioned<Slop>>>( cluster.getNumberOfNodes()); this.consumerExecutor = Executors.newFixedThreadPool( cluster.getNumberOfNodes(), new ThreadFactory() { public Thread newThread(Runnable r) { Thread thread = new Thread(r); thread.setName("slop-pusher"); return thread; } }); this.readThrottler = new EventThrottler(voldemortConfig.getSlopMaxReadBytesPerSec()); this.adminClient = null; this.consumerResults = Lists.newArrayList(); this.attemptedByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes()); this.succeededByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes()); this.zoneMapping = Maps.newHashMap(); } public void run() { // don't try to run slop pusher job when rebalancing if (metadataStore .getServerState() .equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) { logger.error("Cannot run slop pusher job since Voldemort server is rebalancing"); return; } boolean terminatedEarly = false; Date startTime = new Date(); logger.info("Started streaming slop pusher job at " + startTime); SlopStorageEngine slopStorageEngine = storeRepo.getSlopStore(); ClosableIterator<Pair<ByteArray, Versioned<Slop>>> iterator = null; if (adminClient == null) { adminClient = new AdminClient( cluster, new AdminClientConfig() .setMaxThreads(cluster.getNumberOfNodes()) .setMaxConnectionsPerNode(1)); } if (voldemortConfig.getSlopZonesDownToTerminate() > 0) { // Populating the zone mapping for early termination zoneMapping.clear(); for (Node n : cluster.getNodes()) { if (failureDetector.isAvailable(n)) { Set<Integer> nodes = zoneMapping.get(n.getZoneId()); if (nodes == null) { nodes = Sets.newHashSet(); zoneMapping.put(n.getZoneId(), nodes); } nodes.add(n.getId()); } } // Check how many zones are down int zonesDown = 0; for (Zone zone : cluster.getZones()) { if (zoneMapping.get(zone.getId()) == null || zoneMapping.get(zone.getId()).size() == 0) zonesDown++; } // Terminate early if (voldemortConfig.getSlopZonesDownToTerminate() <= zoneMapping.size() && zonesDown >= voldemortConfig.getSlopZonesDownToTerminate()) { logger.info( "Completed streaming slop pusher job at " + startTime + " early because " + zonesDown + " zones are down"); stopAdminClient(); return; } } // Clearing the statistics AtomicLong attemptedPushes = new AtomicLong(0); for (Node node : cluster.getNodes()) { attemptedByNode.put(node.getId(), 0L); succeededByNode.put(node.getId(), 0L); } acquireRepairPermit(); try { StorageEngine<ByteArray, Slop, byte[]> slopStore = slopStorageEngine.asSlopStore(); iterator = slopStore.entries(); while (iterator.hasNext()) { Pair<ByteArray, Versioned<Slop>> keyAndVal; try { keyAndVal = iterator.next(); Versioned<Slop> versioned = keyAndVal.getSecond(); // Retrieve the node int nodeId = versioned.getValue().getNodeId(); Node node = cluster.getNodeById(nodeId); attemptedPushes.incrementAndGet(); Long attempted = attemptedByNode.get(nodeId); attemptedByNode.put(nodeId, attempted + 1L); if (attemptedPushes.get() % 10000 == 0) logger.info("Attempted pushing " + attemptedPushes + " slops"); if (logger.isTraceEnabled()) logger.trace( "Pushing slop for " + versioned.getValue().getNodeId() + " and store " + versioned.getValue().getStoreName()); if (failureDetector.isAvailable(node)) { SynchronousQueue<Versioned<Slop>> slopQueue = slopQueues.get(nodeId); if (slopQueue == null) { // No previous slop queue, add one slopQueue = new SynchronousQueue<Versioned<Slop>>(); slopQueues.put(nodeId, slopQueue); consumerResults.add( consumerExecutor.submit(new SlopConsumer(nodeId, slopQueue, slopStorageEngine))); } boolean offered = slopQueue.offer( versioned, voldemortConfig.getClientRoutingTimeoutMs(), TimeUnit.MILLISECONDS); if (!offered) { if (logger.isDebugEnabled()) logger.debug( "No consumer appeared for slop in " + voldemortConfig.getClientConnectionTimeoutMs() + " ms"); } readThrottler.maybeThrottle(nBytesRead(keyAndVal)); } else { logger.trace(node + " declared down, won't push slop"); } } catch (RejectedExecutionException e) { throw new VoldemortException("Ran out of threads in executor", e); } } } catch (InterruptedException e) { logger.warn("Interrupted exception", e); terminatedEarly = true; } catch (Exception e) { logger.error(e, e); terminatedEarly = true; } finally { try { if (iterator != null) iterator.close(); } catch (Exception e) { logger.warn("Failed to close iterator cleanly as database might be closed", e); } // Adding the poison pill for (SynchronousQueue<Versioned<Slop>> slopQueue : slopQueues.values()) { try { slopQueue.put(END); } catch (InterruptedException e) { logger.warn("Error putting poison pill", e); } } for (Future result : consumerResults) { try { result.get(); } catch (Exception e) { logger.warn("Exception in consumer", e); } } // Only if exception didn't take place do we update the counts if (!terminatedEarly) { Map<Integer, Long> outstanding = Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes()); for (int nodeId : succeededByNode.keySet()) { logger.info( "Slops to node " + nodeId + " - Succeeded - " + succeededByNode.get(nodeId) + " - Attempted - " + attemptedByNode.get(nodeId)); outstanding.put(nodeId, attemptedByNode.get(nodeId) - succeededByNode.get(nodeId)); } slopStorageEngine.resetStats(outstanding); logger.info("Completed streaming slop pusher job which started at " + startTime); } else { for (int nodeId : succeededByNode.keySet()) { logger.info( "Slops to node " + nodeId + " - Succeeded - " + succeededByNode.get(nodeId) + " - Attempted - " + attemptedByNode.get(nodeId)); } logger.info("Completed early streaming slop pusher job which started at " + startTime); } // Shut down admin client as not to waste connections consumerResults.clear(); slopQueues.clear(); stopAdminClient(); this.repairPermits.release(); } } private void stopAdminClient() { if (adminClient != null) { adminClient.stop(); adminClient = null; } } private int nBytesRead(Pair<ByteArray, Versioned<Slop>> keyAndVal) { return keyAndVal.getFirst().length() + slopSize(keyAndVal.getSecond()); } /** * Returns the approximate size of slop to help in throttling * * @param slopVersioned The versioned slop whose size we want * @return Size in bytes */ private int slopSize(Versioned<Slop> slopVersioned) { int nBytes = 0; Slop slop = slopVersioned.getValue(); nBytes += slop.getKey().length(); nBytes += ((VectorClock) slopVersioned.getVersion()).sizeInBytes(); switch (slop.getOperation()) { case PUT: { nBytes += slop.getValue().length; break; } case DELETE: { break; } default: logger.error("Unknown slop operation: " + slop.getOperation()); } return nBytes; } /** Smart slop iterator which keeps two previous batches of data */ private class SlopIterator extends AbstractIterator<Versioned<Slop>> { private final SynchronousQueue<Versioned<Slop>> slopQueue; private final List<Pair<ByteArray, Version>> deleteBatch; private final EventThrottler writeThrottler; private int writtenLast = 0; private long slopsDone = 0L; private boolean shutDown = false, isComplete = false; public SlopIterator( SynchronousQueue<Versioned<Slop>> slopQueue, List<Pair<ByteArray, Version>> deleteBatch) { this.slopQueue = slopQueue; this.deleteBatch = deleteBatch; this.writeThrottler = new EventThrottler(voldemortConfig.getSlopMaxWriteBytesPerSec()); } public boolean isComplete() { return isComplete; } @Override protected Versioned<Slop> computeNext() { try { Versioned<Slop> head = null; if (!shutDown) { head = slopQueue.take(); if (head.equals(END)) { shutDown = true; isComplete = true; } else { slopsDone++; if (slopsDone % voldemortConfig.getSlopBatchSize() == 0) { shutDown = true; } writeThrottler.maybeThrottle(writtenLast); writtenLast = slopSize(head); deleteBatch.add(Pair.create(head.getValue().makeKey(), head.getVersion())); return head; } } return endOfData(); } catch (Exception e) { logger.error("Got an exception " + e); return endOfData(); } } } private void acquireRepairPermit() { logger.info("Acquiring lock to perform streaming slop pusher job "); try { this.repairPermits.acquire(); logger.info("Acquired lock to perform streaming slop pusher job "); } catch (InterruptedException e) { stopAdminClient(); throw new IllegalStateException( "Streaming slop pusher job interrupted while waiting for permit.", e); } } private class SlopConsumer implements Runnable { private final int nodeId; private SynchronousQueue<Versioned<Slop>> slopQueue; private long startTime; private SlopStorageEngine slopStorageEngine; // Keep two lists to track deleted items private List<Pair<ByteArray, Version>> previous, current; public SlopConsumer( int nodeId, SynchronousQueue<Versioned<Slop>> slopQueue, SlopStorageEngine slopStorageEngine) { this.nodeId = nodeId; this.slopQueue = slopQueue; this.slopStorageEngine = slopStorageEngine; this.previous = Lists.newArrayList(); this.current = Lists.newArrayList(); } public void run() { try { SlopIterator iterator = null; do { if (!current.isEmpty()) { if (!previous.isEmpty()) { for (Pair<ByteArray, Version> entry : previous) { slopStorageEngine.delete(entry.getFirst(), entry.getSecond()); } Long succeeded = succeededByNode.get(nodeId); succeeded += previous.size(); succeededByNode.put(nodeId, succeeded); previous.clear(); } previous = null; previous = current; current = Lists.newArrayList(); } this.startTime = System.currentTimeMillis(); iterator = new SlopIterator(slopQueue, current); adminClient.updateSlopEntries(nodeId, iterator); } while (!iterator.isComplete()); // Clear up both previous and current if (!previous.isEmpty()) { for (Pair<ByteArray, Version> entry : previous) slopStorageEngine.delete(entry.getFirst(), entry.getSecond()); Long succeeded = succeededByNode.get(nodeId); succeeded += previous.size(); succeededByNode.put(nodeId, succeeded); previous.clear(); } if (!current.isEmpty()) { for (Pair<ByteArray, Version> entry : current) slopStorageEngine.delete(entry.getFirst(), entry.getSecond()); Long succeeded = succeededByNode.get(nodeId); succeeded += current.size(); succeededByNode.put(nodeId, succeeded); current.clear(); } } catch (UnreachableStoreException e) { failureDetector.recordException( metadataStore.getCluster().getNodeById(nodeId), System.currentTimeMillis() - this.startTime, e); throw e; } finally { // Clean the slop queue and remove the queue from the global // queue slopQueue.clear(); slopQueues.remove(nodeId); } } } }