@Override
  @Test
  public void testGetVersions() throws Exception {
    List<ByteArray> keys = getKeys(2);
    ByteArray key = keys.get(0);
    byte[] value = getValue();
    VectorClock vc = getClock(0, 0);
    Store<ByteArray, byte[], byte[]> store = getStore();
    store.put(key, Versioned.value(value, vc), null);
    List<Versioned<byte[]>> versioneds = store.get(key, null);
    List<Version> versions = store.getVersions(key);
    assertEquals(1, versioneds.size());
    assertTrue(versions.size() > 0);
    for (int i = 0; i < versions.size(); i++)
      assertEquals(versioneds.get(0).getVersion(), versions.get(i));

    assertEquals(0, store.getVersions(keys.get(1)).size());
  }
@SuppressWarnings("unchecked")
public class StreamingSlopPusherJob implements Runnable {

  private static final Logger logger = Logger.getLogger(StreamingSlopPusherJob.class.getName());
  public static final String TYPE_NAME = "streaming";

  private static final Versioned<Slop> END = Versioned.value(null);

  private final MetadataStore metadataStore;
  private final StoreRepository storeRepo;
  private final FailureDetector failureDetector;
  private final ConcurrentMap<Integer, SynchronousQueue<Versioned<Slop>>> slopQueues;
  private final ExecutorService consumerExecutor;
  private final EventThrottler readThrottler;
  private AdminClient adminClient;
  private final Cluster cluster;

  private final List<Future> consumerResults;
  private final VoldemortConfig voldemortConfig;
  private final Map<Integer, Set<Integer>> zoneMapping;
  private final ConcurrentHashMap<Integer, Long> attemptedByNode, succeededByNode;
  private final Semaphore repairPermits;

  public StreamingSlopPusherJob(
      StoreRepository storeRepo,
      MetadataStore metadataStore,
      FailureDetector failureDetector,
      VoldemortConfig voldemortConfig,
      Semaphore repairPermits) {
    this.storeRepo = storeRepo;
    this.metadataStore = metadataStore;
    this.failureDetector = failureDetector;
    this.voldemortConfig = voldemortConfig;
    this.repairPermits = Utils.notNull(repairPermits);

    this.cluster = metadataStore.getCluster();
    this.slopQueues =
        new ConcurrentHashMap<Integer, SynchronousQueue<Versioned<Slop>>>(
            cluster.getNumberOfNodes());
    this.consumerExecutor =
        Executors.newFixedThreadPool(
            cluster.getNumberOfNodes(),
            new ThreadFactory() {

              public Thread newThread(Runnable r) {
                Thread thread = new Thread(r);
                thread.setName("slop-pusher");
                return thread;
              }
            });

    this.readThrottler = new EventThrottler(voldemortConfig.getSlopMaxReadBytesPerSec());
    this.adminClient = null;
    this.consumerResults = Lists.newArrayList();
    this.attemptedByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes());
    this.succeededByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes());

    this.zoneMapping = Maps.newHashMap();
  }

  public void run() {

    // don't try to run slop pusher job when rebalancing
    if (metadataStore
        .getServerState()
        .equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
      logger.error("Cannot run slop pusher job since Voldemort server is rebalancing");
      return;
    }

    boolean terminatedEarly = false;
    Date startTime = new Date();
    logger.info("Started streaming slop pusher job at " + startTime);

    SlopStorageEngine slopStorageEngine = storeRepo.getSlopStore();
    ClosableIterator<Pair<ByteArray, Versioned<Slop>>> iterator = null;

    if (adminClient == null) {
      adminClient =
          new AdminClient(
              cluster,
              new AdminClientConfig()
                  .setMaxThreads(cluster.getNumberOfNodes())
                  .setMaxConnectionsPerNode(1));
    }

    if (voldemortConfig.getSlopZonesDownToTerminate() > 0) {
      // Populating the zone mapping for early termination
      zoneMapping.clear();
      for (Node n : cluster.getNodes()) {
        if (failureDetector.isAvailable(n)) {
          Set<Integer> nodes = zoneMapping.get(n.getZoneId());
          if (nodes == null) {
            nodes = Sets.newHashSet();
            zoneMapping.put(n.getZoneId(), nodes);
          }
          nodes.add(n.getId());
        }
      }

      // Check how many zones are down
      int zonesDown = 0;
      for (Zone zone : cluster.getZones()) {
        if (zoneMapping.get(zone.getId()) == null || zoneMapping.get(zone.getId()).size() == 0)
          zonesDown++;
      }

      // Terminate early
      if (voldemortConfig.getSlopZonesDownToTerminate() <= zoneMapping.size()
          && zonesDown >= voldemortConfig.getSlopZonesDownToTerminate()) {
        logger.info(
            "Completed streaming slop pusher job at "
                + startTime
                + " early because "
                + zonesDown
                + " zones are down");
        stopAdminClient();
        return;
      }
    }

    // Clearing the statistics
    AtomicLong attemptedPushes = new AtomicLong(0);
    for (Node node : cluster.getNodes()) {
      attemptedByNode.put(node.getId(), 0L);
      succeededByNode.put(node.getId(), 0L);
    }

    acquireRepairPermit();
    try {
      StorageEngine<ByteArray, Slop, byte[]> slopStore = slopStorageEngine.asSlopStore();
      iterator = slopStore.entries();

      while (iterator.hasNext()) {
        Pair<ByteArray, Versioned<Slop>> keyAndVal;
        try {
          keyAndVal = iterator.next();
          Versioned<Slop> versioned = keyAndVal.getSecond();

          // Retrieve the node
          int nodeId = versioned.getValue().getNodeId();
          Node node = cluster.getNodeById(nodeId);

          attemptedPushes.incrementAndGet();
          Long attempted = attemptedByNode.get(nodeId);
          attemptedByNode.put(nodeId, attempted + 1L);
          if (attemptedPushes.get() % 10000 == 0)
            logger.info("Attempted pushing " + attemptedPushes + " slops");

          if (logger.isTraceEnabled())
            logger.trace(
                "Pushing slop for "
                    + versioned.getValue().getNodeId()
                    + " and store  "
                    + versioned.getValue().getStoreName());

          if (failureDetector.isAvailable(node)) {
            SynchronousQueue<Versioned<Slop>> slopQueue = slopQueues.get(nodeId);
            if (slopQueue == null) {
              // No previous slop queue, add one
              slopQueue = new SynchronousQueue<Versioned<Slop>>();
              slopQueues.put(nodeId, slopQueue);
              consumerResults.add(
                  consumerExecutor.submit(new SlopConsumer(nodeId, slopQueue, slopStorageEngine)));
            }
            boolean offered =
                slopQueue.offer(
                    versioned, voldemortConfig.getClientRoutingTimeoutMs(), TimeUnit.MILLISECONDS);
            if (!offered) {
              if (logger.isDebugEnabled())
                logger.debug(
                    "No consumer appeared for slop in "
                        + voldemortConfig.getClientConnectionTimeoutMs()
                        + " ms");
            }
            readThrottler.maybeThrottle(nBytesRead(keyAndVal));
          } else {
            logger.trace(node + " declared down, won't push slop");
          }
        } catch (RejectedExecutionException e) {
          throw new VoldemortException("Ran out of threads in executor", e);
        }
      }

    } catch (InterruptedException e) {
      logger.warn("Interrupted exception", e);
      terminatedEarly = true;
    } catch (Exception e) {
      logger.error(e, e);
      terminatedEarly = true;
    } finally {
      try {
        if (iterator != null) iterator.close();
      } catch (Exception e) {
        logger.warn("Failed to close iterator cleanly as database might be closed", e);
      }

      // Adding the poison pill
      for (SynchronousQueue<Versioned<Slop>> slopQueue : slopQueues.values()) {
        try {
          slopQueue.put(END);
        } catch (InterruptedException e) {
          logger.warn("Error putting poison pill", e);
        }
      }

      for (Future result : consumerResults) {
        try {
          result.get();
        } catch (Exception e) {
          logger.warn("Exception in consumer", e);
        }
      }

      // Only if exception didn't take place do we update the counts
      if (!terminatedEarly) {
        Map<Integer, Long> outstanding =
            Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes());
        for (int nodeId : succeededByNode.keySet()) {
          logger.info(
              "Slops to node "
                  + nodeId
                  + " - Succeeded - "
                  + succeededByNode.get(nodeId)
                  + " - Attempted - "
                  + attemptedByNode.get(nodeId));
          outstanding.put(nodeId, attemptedByNode.get(nodeId) - succeededByNode.get(nodeId));
        }
        slopStorageEngine.resetStats(outstanding);
        logger.info("Completed streaming slop pusher job which started at " + startTime);
      } else {
        for (int nodeId : succeededByNode.keySet()) {
          logger.info(
              "Slops to node "
                  + nodeId
                  + " - Succeeded - "
                  + succeededByNode.get(nodeId)
                  + " - Attempted - "
                  + attemptedByNode.get(nodeId));
        }
        logger.info("Completed early streaming slop pusher job which started at " + startTime);
      }

      // Shut down admin client as not to waste connections
      consumerResults.clear();
      slopQueues.clear();
      stopAdminClient();
      this.repairPermits.release();
    }
  }

  private void stopAdminClient() {
    if (adminClient != null) {
      adminClient.stop();
      adminClient = null;
    }
  }

  private int nBytesRead(Pair<ByteArray, Versioned<Slop>> keyAndVal) {
    return keyAndVal.getFirst().length() + slopSize(keyAndVal.getSecond());
  }

  /**
   * Returns the approximate size of slop to help in throttling
   *
   * @param slopVersioned The versioned slop whose size we want
   * @return Size in bytes
   */
  private int slopSize(Versioned<Slop> slopVersioned) {
    int nBytes = 0;
    Slop slop = slopVersioned.getValue();
    nBytes += slop.getKey().length();
    nBytes += ((VectorClock) slopVersioned.getVersion()).sizeInBytes();
    switch (slop.getOperation()) {
      case PUT:
        {
          nBytes += slop.getValue().length;
          break;
        }
      case DELETE:
        {
          break;
        }
      default:
        logger.error("Unknown slop operation: " + slop.getOperation());
    }
    return nBytes;
  }

  /** Smart slop iterator which keeps two previous batches of data */
  private class SlopIterator extends AbstractIterator<Versioned<Slop>> {

    private final SynchronousQueue<Versioned<Slop>> slopQueue;
    private final List<Pair<ByteArray, Version>> deleteBatch;
    private final EventThrottler writeThrottler;

    private int writtenLast = 0;
    private long slopsDone = 0L;
    private boolean shutDown = false, isComplete = false;

    public SlopIterator(
        SynchronousQueue<Versioned<Slop>> slopQueue, List<Pair<ByteArray, Version>> deleteBatch) {
      this.slopQueue = slopQueue;
      this.deleteBatch = deleteBatch;
      this.writeThrottler = new EventThrottler(voldemortConfig.getSlopMaxWriteBytesPerSec());
    }

    public boolean isComplete() {
      return isComplete;
    }

    @Override
    protected Versioned<Slop> computeNext() {
      try {
        Versioned<Slop> head = null;
        if (!shutDown) {
          head = slopQueue.take();
          if (head.equals(END)) {
            shutDown = true;
            isComplete = true;
          } else {
            slopsDone++;
            if (slopsDone % voldemortConfig.getSlopBatchSize() == 0) {
              shutDown = true;
            }

            writeThrottler.maybeThrottle(writtenLast);
            writtenLast = slopSize(head);
            deleteBatch.add(Pair.create(head.getValue().makeKey(), head.getVersion()));
            return head;
          }
        }
        return endOfData();
      } catch (Exception e) {
        logger.error("Got an exception " + e);
        return endOfData();
      }
    }
  }

  private void acquireRepairPermit() {
    logger.info("Acquiring lock to perform streaming slop pusher job ");
    try {
      this.repairPermits.acquire();
      logger.info("Acquired lock to perform streaming slop pusher job ");
    } catch (InterruptedException e) {
      stopAdminClient();
      throw new IllegalStateException(
          "Streaming slop pusher job interrupted while waiting for permit.", e);
    }
  }

  private class SlopConsumer implements Runnable {

    private final int nodeId;
    private SynchronousQueue<Versioned<Slop>> slopQueue;
    private long startTime;
    private SlopStorageEngine slopStorageEngine;

    // Keep two lists to track deleted items
    private List<Pair<ByteArray, Version>> previous, current;

    public SlopConsumer(
        int nodeId,
        SynchronousQueue<Versioned<Slop>> slopQueue,
        SlopStorageEngine slopStorageEngine) {
      this.nodeId = nodeId;
      this.slopQueue = slopQueue;
      this.slopStorageEngine = slopStorageEngine;
      this.previous = Lists.newArrayList();
      this.current = Lists.newArrayList();
    }

    public void run() {
      try {
        SlopIterator iterator = null;
        do {
          if (!current.isEmpty()) {
            if (!previous.isEmpty()) {
              for (Pair<ByteArray, Version> entry : previous) {
                slopStorageEngine.delete(entry.getFirst(), entry.getSecond());
              }
              Long succeeded = succeededByNode.get(nodeId);
              succeeded += previous.size();
              succeededByNode.put(nodeId, succeeded);
              previous.clear();
            }
            previous = null;
            previous = current;
            current = Lists.newArrayList();
          }
          this.startTime = System.currentTimeMillis();
          iterator = new SlopIterator(slopQueue, current);
          adminClient.updateSlopEntries(nodeId, iterator);
        } while (!iterator.isComplete());

        // Clear up both previous and current
        if (!previous.isEmpty()) {
          for (Pair<ByteArray, Version> entry : previous)
            slopStorageEngine.delete(entry.getFirst(), entry.getSecond());
          Long succeeded = succeededByNode.get(nodeId);
          succeeded += previous.size();
          succeededByNode.put(nodeId, succeeded);
          previous.clear();
        }
        if (!current.isEmpty()) {
          for (Pair<ByteArray, Version> entry : current)
            slopStorageEngine.delete(entry.getFirst(), entry.getSecond());
          Long succeeded = succeededByNode.get(nodeId);
          succeeded += current.size();
          succeededByNode.put(nodeId, succeeded);
          current.clear();
        }

      } catch (UnreachableStoreException e) {
        failureDetector.recordException(
            metadataStore.getCluster().getNodeById(nodeId),
            System.currentTimeMillis() - this.startTime,
            e);
        throw e;
      } finally {
        // Clean the slop queue and remove the queue from the global
        // queue
        slopQueue.clear();
        slopQueues.remove(nodeId);
      }
    }
  }
}