public void openSystemStore(StoreDefinition storeDef) {

    logger.info("Opening system store '" + storeDef.getName() + "' (" + storeDef.getType() + ").");

    StorageConfiguration config = storageConfigs.get(storeDef.getType());
    if (config == null)
      throw new ConfigurationException(
          "Attempt to open system store "
              + storeDef.getName()
              + " but "
              + storeDef.getType()
              + " storage engine has not been enabled.");

    final StorageEngine<ByteArray, byte[], byte[]> engine = config.getStore(storeDef, null);

    // Noted that there is no read-only processing as for user stores.

    // openStore() should have atomic semantics
    try {
      registerSystemEngine(engine);

      if (voldemortConfig.isServerRoutingEnabled())
        registerNodeStores(storeDef, metadata.getCluster(), voldemortConfig.getNodeId());

      if (storeDef.hasRetentionPeriod()) scheduleCleanupJob(storeDef, engine);
    } catch (Exception e) {
      unregisterSystemEngine(engine);
      throw new VoldemortException(e);
    }
  }
  public void unregisterSystemEngine(StorageEngine<ByteArray, byte[], byte[]> engine) {
    String storeName = engine.getName();
    Store<ByteArray, byte[], byte[]> store = storeRepository.removeLocalStore(storeName);

    if (store != null) {
      if (voldemortConfig.isJmxEnabled()) {
        MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();

        if (voldemortConfig.isEnableRebalanceService()) {

          ObjectName name = null;
          if (this.voldemortConfig.isEnableJmxClusterName())
            name =
                JmxUtils.createObjectName(
                    metadata.getCluster().getName()
                        + "."
                        + JmxUtils.getPackageName(RedirectingStore.class),
                    store.getName());
          else
            name =
                JmxUtils.createObjectName(
                    JmxUtils.getPackageName(RedirectingStore.class), store.getName());

          synchronized (mbeanServer) {
            if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);
          }
        }

        if (voldemortConfig.isStatTrackingEnabled()) {
          ObjectName name = null;
          if (this.voldemortConfig.isEnableJmxClusterName())
            name =
                JmxUtils.createObjectName(
                    metadata.getCluster().getName()
                        + "."
                        + JmxUtils.getPackageName(store.getClass()),
                    store.getName());
          else
            name =
                JmxUtils.createObjectName(
                    JmxUtils.getPackageName(store.getClass()), store.getName());

          synchronized (mbeanServer) {
            if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);
          }
        }
      }
      if (voldemortConfig.isServerRoutingEnabled()) {
        this.storeRepository.removeRoutedStore(storeName);
        for (Node node : metadata.getCluster().getNodes())
          this.storeRepository.removeNodeStore(storeName, node.getId());
      }
    }

    storeRepository.removeStorageEngine(storeName);
    // engine.truncate(); why truncate here when unregister? Isn't close
    // good enough?
    engine.close();
  }
  /**
   * Schedule a data retention cleanup job for the given store
   *
   * @param storeDef The store definition
   * @param engine The storage engine to do cleanup on
   */
  private void scheduleCleanupJob(
      StoreDefinition storeDef, StorageEngine<ByteArray, byte[], byte[]> engine) {
    // Compute the start time of the job, based on current time
    GregorianCalendar cal =
        Utils.getCalendarForNextRun(
            new GregorianCalendar(),
            voldemortConfig.getRetentionCleanupFirstStartDayOfWeek(),
            voldemortConfig.getRetentionCleanupFirstStartTimeInHour());

    // allow only one cleanup job at a time
    Date startTime = cal.getTime();

    int maxReadRate =
        storeDef.hasRetentionScanThrottleRate()
            ? storeDef.getRetentionScanThrottleRate()
            : Integer.MAX_VALUE;

    logger.info(
        "Scheduling data retention cleanup job for store '"
            + storeDef.getName()
            + "' at "
            + startTime
            + " with retention scan throttle rate:"
            + maxReadRate
            + " Entries/second.");

    EventThrottler throttler = new EventThrottler(maxReadRate);

    Runnable cleanupJob =
        new DataCleanupJob<ByteArray, byte[], byte[]>(
            engine,
            scanPermitWrapper,
            storeDef.getRetentionDays() * Time.MS_PER_DAY,
            SystemTime.INSTANCE,
            throttler,
            metadata);
    if (voldemortConfig.isJmxEnabled()) {
      JmxUtils.registerMbean("DataCleanupJob-" + engine.getName(), cleanupJob);
    }

    long retentionFreqHours =
        storeDef.hasRetentionFrequencyDays()
            ? (storeDef.getRetentionFrequencyDays() * Time.HOURS_PER_DAY)
            : voldemortConfig.getRetentionCleanupScheduledPeriodInHour();

    this.scheduler.schedule(
        "cleanup-" + storeDef.getName(),
        cleanupJob,
        startTime,
        retentionFreqHours * Time.MS_PER_HOUR,
        voldemortConfig.getRetentionCleanupPinStartTime());
  }
  public StorageEngine<ByteArray, byte[], byte[]> openStore(StoreDefinition storeDef) {

    logger.info("Opening store '" + storeDef.getName() + "' (" + storeDef.getType() + ").");

    StorageConfiguration config = storageConfigs.get(storeDef.getType());
    if (config == null)
      throw new ConfigurationException(
          "Attempt to open store "
              + storeDef.getName()
              + " but "
              + storeDef.getType()
              + " storage engine has not been enabled.");

    boolean isReadOnly = storeDef.getType().compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0;
    final RoutingStrategy routingStrategy =
        new RoutingStrategyFactory().updateRoutingStrategy(storeDef, metadata.getCluster());

    final StorageEngine<ByteArray, byte[], byte[]> engine =
        config.getStore(storeDef, routingStrategy);
    // Update the routing strategy + add listener to metadata
    if (storeDef.getType().compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0) {
      metadata.addMetadataStoreListener(
          storeDef.getName(),
          new MetadataStoreListener() {

            public void updateRoutingStrategy(RoutingStrategy updatedRoutingStrategy) {
              ((ReadOnlyStorageEngine) engine).setRoutingStrategy(updatedRoutingStrategy);
            }

            public void updateStoreDefinition(StoreDefinition storeDef) {
              return;
            }
          });
    }

    // openStore() should have atomic semantics
    try {
      registerEngine(engine, isReadOnly, storeDef.getType(), storeDef);

      if (voldemortConfig.isServerRoutingEnabled())
        registerNodeStores(storeDef, metadata.getCluster(), voldemortConfig.getNodeId());

      if (storeDef.hasRetentionPeriod()) scheduleCleanupJob(storeDef, engine);
    } catch (Exception e) {
      removeEngine(engine, isReadOnly, storeDef.getType(), false);
      throw new VoldemortException(e);
    }
    return engine;
  }
  private void initStorageConfig(String configClassName) {
    // add the configurations of the storage engines needed by user stores
    try {
      Class<?> configClass = ReflectUtils.loadClass(configClassName);
      StorageConfiguration configuration =
          (StorageConfiguration)
              ReflectUtils.callConstructor(
                  configClass,
                  new Class<?>[] {VoldemortConfig.class},
                  new Object[] {voldemortConfig});
      logger.info("Initializing " + configuration.getType() + " storage engine.");
      storageConfigs.put(configuration.getType(), configuration);

      if (voldemortConfig.isJmxEnabled())
        JmxUtils.registerMbean(configuration.getType() + "StorageConfiguration", configuration);
    } catch (IllegalStateException e) {
      logger.error("Error loading storage configuration '" + configClassName + "'.", e);
    }

    if (storageConfigs.size() == 0)
      throw new ConfigurationException("No storage engine has been enabled!");

    // now, add the configurations of the storage engines needed by system
    // stores, if not yet exist
    initSystemStorageConfig();
  }
  public StreamingSlopPusherJob(
      StoreRepository storeRepo,
      MetadataStore metadataStore,
      FailureDetector failureDetector,
      VoldemortConfig voldemortConfig,
      ScanPermitWrapper repairPermits) {
    this.storeRepo = storeRepo;
    this.metadataStore = metadataStore;
    this.failureDetector = failureDetector;
    this.voldemortConfig = voldemortConfig;
    this.repairPermits = Utils.notNull(repairPermits);
    this.readThrottler = new EventThrottler(voldemortConfig.getSlopMaxReadBytesPerSec());
    this.adminClient = null;
    this.consumerResults = Lists.newArrayList();
    this.zoneMapping = Maps.newHashMap();
    this.consumerExecutor =
        Executors.newCachedThreadPool(
            new ThreadFactory() {

              public Thread newThread(Runnable r) {
                Thread thread = new Thread(r);
                thread.setName("slop-pusher");
                return thread;
              }
            });
  }
  public VAdminProto.AsyncOperationStatusResponse handleRebalanceNode(
      VAdminProto.InitiateRebalanceNodeRequest request) {
    VAdminProto.AsyncOperationStatusResponse.Builder response =
        VAdminProto.AsyncOperationStatusResponse.newBuilder();
    try {
      if (!voldemortConfig.isEnableRebalanceService())
        throw new VoldemortException(
            "Rebalance service is not enabled for node:" + metadataStore.getNodeId());

      RebalancePartitionsInfo rebalanceStealInfo =
          new RebalancePartitionsInfo(
              request.getStealerId(),
              request.getDonorId(),
              request.getPartitionsList(),
              request.getDeletePartitionsList(),
              request.getUnbalancedStoreList(),
              request.getAttempt());

      int requestId = rebalancer.rebalanceLocalNode(rebalanceStealInfo);

      response
          .setRequestId(requestId)
          .setDescription(rebalanceStealInfo.toString())
          .setStatus("started")
          .setComplete(false);
    } catch (VoldemortException e) {
      response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
      logger.error("handleRebalanceNode failed for request(" + request.toString() + ")", e);
    }

    return response.build();
  }
  static VoldemortFilter getFilterFromRequest(
      VAdminProto.VoldemortFilter request,
      VoldemortConfig voldemortConfig,
      NetworkClassLoader networkClassLoader) {
    VoldemortFilter filter = null;

    byte[] classBytes = ProtoUtils.decodeBytes(request.getData()).get();
    String className = request.getName();
    logger.debug("Attempt to load VoldemortFilter class:" + className);

    try {
      if (voldemortConfig.isNetworkClassLoaderEnabled()) {
        // TODO: network class loader was throwing NoClassDefFound for
        // voldemort.server package classes, Need testing and fixes
        logger.warn("NetworkLoader is experimental and should not be used for now.");

        Class<?> cl = networkClassLoader.loadClass(className, classBytes, 0, classBytes.length);
        filter = (VoldemortFilter) cl.newInstance();
      } else {
        Class<?> cl = Thread.currentThread().getContextClassLoader().loadClass(className);
        filter = (VoldemortFilter) cl.newInstance();
      }
    } catch (Exception e) {
      throw new VoldemortException("Failed to load and instantiate the filter class", e);
    }

    return filter;
  }
  public void run() {
    logger.debug("rebalancer run() called.");
    if (VoldemortState.REBALANCING_MASTER_SERVER.equals(metadataStore.getServerState())
        && acquireRebalancingPermit()) {

      // free permit here for rebalanceLocalNode to acquire.
      releaseRebalancingPermit();

      RebalancePartitionsInfo stealInfo = metadataStore.getRebalancingStealInfo();

      try {
        logger.warn(
            "Rebalance server found incomplete rebalancing attempt, restarting rebalancing task "
                + stealInfo);

        if (stealInfo.getAttempt() < voldemortConfig.getMaxRebalancingAttempt()) {
          attemptRebalance(stealInfo);
        } else {
          logger.warn(
              "Rebalancing for rebalancing task "
                  + stealInfo
                  + " failed multiple times, Aborting more trials.");
          metadataStore.cleanAllRebalancingState();
        }
      } catch (Exception e) {
        logger.error(
            "RebalanceService rebalancing attempt " + stealInfo + " failed with exception", e);
      }
    }
  }
 private void waitForShutdown() {
   try {
     executors.shutdown();
     executors.awaitTermination(voldemortConfig.getRebalancingTimeoutSec(), TimeUnit.SECONDS);
   } catch (InterruptedException e) {
     logger.error("Interrupted while awaiting termination for executors.", e);
   }
 }
 private Store<ByteArray, byte[], byte[]> createNodeStore(String storeName, Node node) {
   return storeFactory.create(
       storeName,
       node.getHost(),
       node.getSocketPort(),
       voldemortConfig.getRequestFormatType(),
       RequestRoutingType.NORMAL);
 }
  public void registerSystemEngine(StorageEngine<ByteArray, byte[], byte[]> engine) {

    Cluster cluster = this.metadata.getCluster();
    storeRepository.addStorageEngine(engine);

    /* Now add any store wrappers that are enabled */
    Store<ByteArray, byte[], byte[]> store = engine;

    if (voldemortConfig.isVerboseLoggingEnabled())
      store =
          new LoggingStore<ByteArray, byte[], byte[]>(
              store, cluster.getName(), SystemTime.INSTANCE);

    if (voldemortConfig.isMetadataCheckingEnabled())
      store = new InvalidMetadataCheckingStore(metadata.getNodeId(), store, metadata);

    if (voldemortConfig.isStatTrackingEnabled()) {
      StatTrackingStore statStore = new StatTrackingStore(store, this.storeStats);
      store = statStore;
      if (voldemortConfig.isJmxEnabled()) {

        MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
        ObjectName name = null;
        if (this.voldemortConfig.isEnableJmxClusterName())
          name =
              JmxUtils.createObjectName(
                  metadata.getCluster().getName() + "." + JmxUtils.getPackageName(store.getClass()),
                  store.getName());
        else
          name =
              JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()), store.getName());

        synchronized (mbeanServer) {
          if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);

          JmxUtils.registerMbean(
              mbeanServer,
              JmxUtils.createModelMBean(new StoreStatsJmx(statStore.getStats())),
              name);
        }
      }
    }

    storeRepository.addLocalStore(store);
  }
 /**
  * Constructs a new FailureDetectorConfig from a server perspective (via {@link VoldemortConfig}).
  *
  * <p><b>Note</b>: the {@link #setNodes(Collection)} and {@link #setStoreVerifier(StoreVerifier)}
  * methods must be called to ensure <i>complete</i> configuration.
  *
  * @param config {@link VoldemortConfig} instance
  */
 public FailureDetectorConfig(VoldemortConfig config) {
   setImplementationClassName(config.getFailureDetectorImplementation());
   setBannagePeriod(config.getFailureDetectorBannagePeriod());
   setThreshold(config.getFailureDetectorThreshold());
   setThresholdCountMinimum(config.getFailureDetectorThresholdCountMinimum());
   setThresholdInterval(config.getFailureDetectorThresholdInterval());
   setAsyncRecoveryInterval(config.getFailureDetectorAsyncRecoveryInterval());
   setCatastrophicErrorTypes(config.getFailureDetectorCatastrophicErrorTypes());
   setRequestLengthThreshold(config.getFailureDetectorRequestLengthThreshold());
   setMaximumTolerableFatalFailures(DEFAULT_MAX_TOLERABLE_FATAL_FAILURES);
 }
 public StorageEngine<ByteArray, byte[], byte[]> getStore(String storeName) {
   synchronized (lock) {
     try {
       LockMode readLockMode = getLockMode();
       Environment environment = getEnvironment(storeName);
       Database db = environment.openDatabase(null, storeName, databaseConfig);
       if (voldemortConfig.getBdbCursorPreload()) {
         PreloadConfig preloadConfig = new PreloadConfig();
         preloadConfig.setLoadLNs(true);
         db.preload(preloadConfig);
       }
       BdbStorageEngine engine =
           new BdbStorageEngine(
               storeName, environment, db, readLockMode, voldemortConfig.getBdbCursorPreload());
       return engine;
     } catch (DatabaseException d) {
       throw new StorageInitializationException(d);
     }
   }
 }
Exemple #15
0
  private void attemptRebalance(RebalancePartitionsInfo stealInfo) {
    stealInfo.setAttempt(stealInfo.getAttempt() + 1);

    AdminClient adminClient =
        RebalanceUtils.createTempAdminClient(voldemortConfig, metadataStore.getCluster(), 4, 2);
    int rebalanceAsyncId = rebalanceLocalNode(stealInfo);

    adminClient.waitForCompletion(
        stealInfo.getStealerId(),
        rebalanceAsyncId,
        voldemortConfig.getAdminSocketTimeout(),
        TimeUnit.SECONDS);
  }
 public RebalanceAsyncOperation(
     Rebalancer rebalancer,
     VoldemortConfig voldemortConfig,
     MetadataStore metadataStore,
     int requestId,
     RebalancePartitionsInfo stealInfo) {
   super(requestId, "Rebalance operation: " + stealInfo.toString());
   this.rebalancer = rebalancer;
   this.voldemortConfig = voldemortConfig;
   this.metadataStore = metadataStore;
   this.stealInfo = stealInfo;
   this.rebalanceStatusList = new ArrayList<Integer>();
   this.adminClient = null;
   this.executors = createExecutors(voldemortConfig.getMaxParallelStoresRebalancing());
 }
  /**
   * Tests that any reservation that will not violate minimum shared cache will fail, during server
   * startup and dynamic updation
   */
  public void testMinimumSharedCache() {
    int totalCache = 20 * ByteUtils.BYTES_PER_MB; // total cache size
    int shareA = 10 * ByteUtils.BYTES_PER_MB; // A reserves 10MB

    // lets use all the default values.
    Props props = new Props();
    props.put("node.id", 1);
    props.put("voldemort.home", "test/common/voldemort/config");
    VoldemortConfig voldemortConfig = new VoldemortConfig(props);
    voldemortConfig.setBdbCacheSize(totalCache);
    voldemortConfig.setBdbOneEnvPerStore(true);
    voldemortConfig.setBdbDataDirectory(bdbMasterDir.toURI().getPath());
    voldemortConfig.setBdbMinimumSharedCache(15 * ByteUtils.BYTES_PER_MB);

    BdbStorageEngine storeA = null;
    bdbStorage = new BdbStorageConfiguration(voldemortConfig);
    assertEquals(0, bdbStorage.getReservedCacheSize());

    try {
      StoreDefinition defA =
          TestUtils.makeStoreDefinition("storeA", shareA / ByteUtils.BYTES_PER_MB);
      storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
      fail("Should have thrown exception since minSharedCache will be violated");
    } catch (StorageInitializationException sie) {
      // should come here.
    }
    // failing operations should not alter reserved cache size
    assertEquals(0, bdbStorage.getReservedCacheSize());

    voldemortConfig.setBdbMinimumSharedCache(10 * ByteUtils.BYTES_PER_MB);
    bdbStorage = new BdbStorageConfiguration(voldemortConfig);
    try {
      StoreDefinition defA =
          TestUtils.makeStoreDefinition("storeA", shareA / ByteUtils.BYTES_PER_MB);
      storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
    } catch (StorageInitializationException sie) {
      // should not come here.
      fail("minSharedCache should n't have been violated");
    }
    assertEquals(shareA, bdbStorage.getReservedCacheSize());

    long reserveCacheSize = bdbStorage.getReservedCacheSize();
    // now, try increasing the reservation dynamically and it should fail
    try {
      StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", 15);
      bdbStorage.update(defA);
      fail("Should have thrown exception since minSharedCache will be violated");
    } catch (StorageInitializationException sie) {
      // should come here.
    }
    // this failure cannot alter the reservedCacheSize
    assertEquals(reserveCacheSize, bdbStorage.getReservedCacheSize());

    if (storeA != null) storeA.close();
  }
  public VAdminProto.DeletePartitionEntriesResponse handleDeletePartitionEntries(
      VAdminProto.DeletePartitionEntriesRequest request) {
    VAdminProto.DeletePartitionEntriesResponse.Builder response =
        VAdminProto.DeletePartitionEntriesResponse.newBuilder();
    ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> iterator = null;
    try {
      String storeName = request.getStore();
      List<Integer> partitions = request.getPartitionsList();
      StorageEngine<ByteArray, byte[]> storageEngine = getStorageEngine(storeRepository, storeName);
      VoldemortFilter filter =
          (request.hasFilter())
              ? getFilterFromRequest(request.getFilter(), voldemortConfig, networkClassLoader)
              : new DefaultVoldemortFilter();
      RoutingStrategy routingStrategy = metadataStore.getRoutingStrategy(storageEngine.getName());

      EventThrottler throttler = new EventThrottler(voldemortConfig.getStreamMaxReadBytesPerSec());
      iterator = storageEngine.entries();
      int deleteSuccess = 0;

      while (iterator.hasNext()) {
        Pair<ByteArray, Versioned<byte[]>> entry = iterator.next();

        ByteArray key = entry.getFirst();
        Versioned<byte[]> value = entry.getSecond();
        throttler.maybeThrottle(key.length() + valueSize(value));
        if (checkKeyBelongsToDeletePartition(key.get(), partitions, routingStrategy)
            && filter.accept(key, value)) {
          if (storageEngine.delete(key, value.getVersion())) deleteSuccess++;
        }
      }
      response.setCount(deleteSuccess);
    } catch (VoldemortException e) {
      response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
      logger.error(
          "handleDeletePartitionEntries failed for request(" + request.toString() + ")", e);
    } finally {
      if (null != iterator) iterator.close();
    }

    return response.build();
  }
  public StreamingSlopPusherJob(
      StoreRepository storeRepo,
      MetadataStore metadataStore,
      FailureDetector failureDetector,
      VoldemortConfig voldemortConfig,
      Semaphore repairPermits) {
    this.storeRepo = storeRepo;
    this.metadataStore = metadataStore;
    this.failureDetector = failureDetector;
    this.voldemortConfig = voldemortConfig;
    this.repairPermits = Utils.notNull(repairPermits);

    this.cluster = metadataStore.getCluster();
    this.slopQueues =
        new ConcurrentHashMap<Integer, SynchronousQueue<Versioned<Slop>>>(
            cluster.getNumberOfNodes());
    this.consumerExecutor =
        Executors.newFixedThreadPool(
            cluster.getNumberOfNodes(),
            new ThreadFactory() {

              public Thread newThread(Runnable r) {
                Thread thread = new Thread(r);
                thread.setName("slop-pusher");
                return thread;
              }
            });

    this.readThrottler = new EventThrottler(voldemortConfig.getSlopMaxReadBytesPerSec());
    this.adminClient = null;
    this.consumerResults = Lists.newArrayList();
    this.attemptedByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes());
    this.succeededByNode = new ConcurrentHashMap<Integer, Long>(cluster.getNumberOfNodes());

    this.zoneMapping = Maps.newHashMap();
  }
 public BdbRuntimeConfig(VoldemortConfig config) {
   LockMode lockMode =
       config.getBdbReadUncommitted() ? LockMode.READ_UNCOMMITTED : LockMode.DEFAULT;
   setLockMode(lockMode);
   setStatsCacheTtlMs(config.getBdbStatsCacheTtlMs());
 }
  public StorageService(
      StoreRepository storeRepository,
      MetadataStore metadata,
      SchedulerService scheduler,
      VoldemortConfig config) {
    super(ServiceType.STORAGE);
    this.voldemortConfig = config;
    this.scheduler = scheduler;
    this.storeRepository = storeRepository;
    this.metadata = metadata;
    this.scanPermitWrapper = new ScanPermitWrapper(voldemortConfig.getNumScanPermits());
    this.storageConfigs = new ConcurrentHashMap<String, StorageConfiguration>();
    this.clientThreadPool =
        new ClientThreadPool(
            config.getClientMaxThreads(),
            config.getClientThreadIdleMs(),
            config.getClientMaxQueuedRequests());
    this.storeFactory =
        new ClientRequestExecutorPool(
            config.getClientSelectors(),
            config.getClientMaxConnectionsPerNode(),
            config.getClientConnectionTimeoutMs(),
            config.getSocketTimeoutMs(),
            config.getSocketBufferSize(),
            config.getSocketKeepAlive(),
            "-storage");

    FailureDetectorConfig failureDetectorConfig =
        new FailureDetectorConfig(voldemortConfig)
            .setCluster(metadata.getCluster())
            .setConnectionVerifier(
                new ServerStoreConnectionVerifier(storeFactory, metadata, config));
    FailureDetectorConfig slopStreamingFailureDetectorConfig =
        new FailureDetectorConfig(voldemortConfig)
            .setImplementationClassName(AsyncRecoveryFailureDetector.class.getName())
            .setCluster(metadata.getCluster())
            .setConnectionVerifier(new AdminSlopStreamingVerifier(this.metadata.getCluster()));
    this.failureDetector = create(failureDetectorConfig, config.isJmxEnabled());
    this.slopStreamingFailureDetector =
        create(slopStreamingFailureDetectorConfig, config.isJmxEnabled());
    this.storeStats = new StoreStats("aggregate.storage-service");
    this.routedStoreFactory = new RoutedStoreFactory();
    this.routedStoreFactory.setThreadPool(this.clientThreadPool);
    this.routedStoreConfig =
        new RoutedStoreConfig(this.voldemortConfig, this.metadata.getCluster());

    // create the proxy put thread pool
    this.proxyPutWorkerPool =
        Executors.newFixedThreadPool(
            config.getMaxProxyPutThreads(), new DaemonThreadFactory("voldemort-proxy-put-thread"));
    this.aggregatedProxyPutStats = new ProxyPutStats(null);
    if (config.isJmxEnabled()) {
      JmxUtils.registerMbean(
          this.aggregatedProxyPutStats,
          JmxUtils.createObjectName("voldemort.store.rebalancing", "aggregate-proxy-puts"));
    }

    this.aggregatedQuotaStats = new QuotaLimitStats(null);
    if (config.isJmxEnabled()) {
      JmxUtils.registerMbean(
          this.aggregatedQuotaStats,
          JmxUtils.createObjectName("voldemort.store.quota", "aggregate-quota-limit-stats"));
    }
  }
  /**
   * Register the given engine with the storage repository
   *
   * @param engine Register the storage engine
   * @param isReadOnly Boolean indicating if this store is read-only
   * @param storeType The type of the store
   * @param storeDef store definition for the store to be registered
   */
  public void registerEngine(
      StorageEngine<ByteArray, byte[], byte[]> engine,
      boolean isReadOnly,
      String storeType,
      StoreDefinition storeDef) {
    Cluster cluster = this.metadata.getCluster();
    storeRepository.addStorageEngine(engine);

    /* Now add any store wrappers that are enabled */
    Store<ByteArray, byte[], byte[]> store = engine;

    boolean isMetadata = store.getName().compareTo(MetadataStore.METADATA_STORE_NAME) == 0;
    boolean isSlop = storeType.compareTo("slop") == 0;
    boolean isView = storeType.compareTo(ViewStorageConfiguration.TYPE_NAME) == 0;

    if (voldemortConfig.isVerboseLoggingEnabled())
      store =
          new LoggingStore<ByteArray, byte[], byte[]>(
              store, cluster.getName(), SystemTime.INSTANCE);
    if (!isSlop) {
      if (!isReadOnly && !isMetadata && !isView) {
        // wrap store to enforce retention policy
        if (voldemortConfig.isEnforceRetentionPolicyOnRead() && storeDef != null) {
          RetentionEnforcingStore retentionEnforcingStore =
              new RetentionEnforcingStore(
                  store,
                  storeDef,
                  voldemortConfig.isDeleteExpiredValuesOnRead(),
                  SystemTime.INSTANCE);
          metadata.addMetadataStoreListener(store.getName(), retentionEnforcingStore);
          store = retentionEnforcingStore;
        }

        if (voldemortConfig.isEnableRebalanceService()) {
          ProxyPutStats proxyPutStats = new ProxyPutStats(aggregatedProxyPutStats);
          if (voldemortConfig.isJmxEnabled()) {
            JmxUtils.registerMbean(
                proxyPutStats,
                JmxUtils.createObjectName(
                    "voldemort.store.rebalancing", engine.getName() + "-proxy-puts"));
          }
          store =
              new RedirectingStore(
                  store,
                  metadata,
                  storeRepository,
                  failureDetector,
                  storeFactory,
                  proxyPutWorkerPool,
                  proxyPutStats);
          if (voldemortConfig.isJmxEnabled()) {
            MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
            ObjectName name = null;
            if (this.voldemortConfig.isEnableJmxClusterName())
              name =
                  JmxUtils.createObjectName(
                      cluster.getName() + "." + JmxUtils.getPackageName(RedirectingStore.class),
                      store.getName());
            else
              name =
                  JmxUtils.createObjectName(
                      JmxUtils.getPackageName(RedirectingStore.class), store.getName());

            synchronized (mbeanServer) {
              if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);

              JmxUtils.registerMbean(mbeanServer, JmxUtils.createModelMBean(store), name);
            }
          }
        }
      }

      if (voldemortConfig.isMetadataCheckingEnabled() && !isMetadata) {
        store = new InvalidMetadataCheckingStore(metadata.getNodeId(), store, metadata);
      }
    }

    if (voldemortConfig.isStatTrackingEnabled()) {
      StatTrackingStore statStore = new StatTrackingStore(store, this.storeStats);
      store = statStore;
      if (voldemortConfig.isJmxEnabled()) {

        MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
        ObjectName name = null;
        if (this.voldemortConfig.isEnableJmxClusterName())
          name =
              JmxUtils.createObjectName(
                  metadata.getCluster().getName() + "." + JmxUtils.getPackageName(store.getClass()),
                  store.getName());
        else
          name =
              JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()), store.getName());

        synchronized (mbeanServer) {
          if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);

          JmxUtils.registerMbean(
              mbeanServer,
              JmxUtils.createModelMBean(new StoreStatsJmx(statStore.getStats())),
              name);
        }
      }

      // Wrap everything under the rate limiting store (barring the
      // metadata store)
      if (voldemortConfig.isEnableQuotaLimiting() && !isMetadata) {
        FileBackedCachingStorageEngine quotaStore =
            (FileBackedCachingStorageEngine)
                storeRepository.getStorageEngine(
                    SystemStoreConstants.SystemStoreName.voldsys$_store_quotas.toString());
        QuotaLimitStats quotaStats = new QuotaLimitStats(this.aggregatedQuotaStats);
        QuotaLimitingStore rateLimitingStore =
            new QuotaLimitingStore(store, this.storeStats, quotaStats, quotaStore);
        if (voldemortConfig.isJmxEnabled()) {
          JmxUtils.registerMbean(
              this.aggregatedQuotaStats,
              JmxUtils.createObjectName(
                  "voldemort.store.quota", store.getName() + "-quota-limit-stats"));
        }
        store = rateLimitingStore;
      }
    }

    storeRepository.addLocalStore(store);
  }
  /**
   * Unregister and remove the engine from the storage repository. This is called during deletion of
   * stores and if there are exceptions adding/opening stores
   *
   * @param engine The actual engine to remove
   * @param isReadOnly Is this read-only?
   * @param storeType The storage type of the store
   * @param truncate Should the store be truncated?
   */
  public void removeEngine(
      StorageEngine<ByteArray, byte[], byte[]> engine,
      boolean isReadOnly,
      String storeType,
      boolean truncate) {
    String storeName = engine.getName();
    Store<ByteArray, byte[], byte[]> store = storeRepository.removeLocalStore(storeName);

    boolean isSlop = storeType.compareTo("slop") == 0;
    boolean isView = storeType.compareTo(ViewStorageConfiguration.TYPE_NAME) == 0;
    boolean isMetadata = storeName.compareTo(MetadataStore.METADATA_STORE_NAME) == 0;

    if (store != null) {
      if (voldemortConfig.isJmxEnabled()) {
        MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();

        if (!isSlop
            && voldemortConfig.isEnableRebalanceService()
            && !isReadOnly
            && !isMetadata
            && !isView) {

          ObjectName name = null;
          if (this.voldemortConfig.isEnableJmxClusterName())
            name =
                JmxUtils.createObjectName(
                    metadata.getCluster().getName()
                        + "."
                        + JmxUtils.getPackageName(RedirectingStore.class),
                    store.getName());
          else
            name =
                JmxUtils.createObjectName(
                    JmxUtils.getPackageName(RedirectingStore.class), store.getName());

          synchronized (mbeanServer) {
            if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);
          }
        }

        if (voldemortConfig.isStatTrackingEnabled()) {
          ObjectName name = null;
          if (this.voldemortConfig.isEnableJmxClusterName())
            name =
                JmxUtils.createObjectName(
                    metadata.getCluster().getName()
                        + "."
                        + JmxUtils.getPackageName(store.getClass()),
                    store.getName());
          else
            name =
                JmxUtils.createObjectName(
                    JmxUtils.getPackageName(store.getClass()), store.getName());

          synchronized (mbeanServer) {
            if (mbeanServer.isRegistered(name)) JmxUtils.unregisterMbean(mbeanServer, name);
          }
        }
      }
      if (voldemortConfig.isServerRoutingEnabled() && !isSlop) {
        this.storeRepository.removeRoutedStore(storeName);
        for (Node node : metadata.getCluster().getNodes())
          this.storeRepository.removeNodeStore(storeName, node.getId());
      }
    }

    storeRepository.removeStorageEngine(storeName);
    if (truncate) engine.truncate();
    engine.close();
  }
  @Override
  protected void startInner() {
    registerInternalEngine(metadata, false, "metadata");

    /* Initialize storage configurations */
    for (String configClassName : voldemortConfig.getStorageConfigurations())
      initStorageConfig(configClassName);

    /* Initialize view storage configuration */
    storageConfigs.put(
        ViewStorageConfiguration.TYPE_NAME,
        new ViewStorageConfiguration(voldemortConfig, metadata.getStoreDefList(), storeRepository));

    /* Initialize system stores */
    initSystemStores();

    /* Register slop store */
    if (voldemortConfig.isSlopEnabled()) {

      logger.info("Initializing the slop store using " + voldemortConfig.getSlopStoreType());
      StorageConfiguration config = storageConfigs.get(voldemortConfig.getSlopStoreType());
      if (config == null)
        throw new ConfigurationException(
            "Attempt to open store "
                + SlopStorageEngine.SLOP_STORE_NAME
                + " but "
                + voldemortConfig.getSlopStoreType()
                + " storage engine has not been enabled.");

      // make a dummy store definition object
      StoreDefinition slopStoreDefinition =
          new StoreDefinition(
              SlopStorageEngine.SLOP_STORE_NAME,
              null,
              null,
              null,
              null,
              null,
              null,
              RoutingStrategyType.CONSISTENT_STRATEGY,
              0,
              null,
              0,
              null,
              0,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              0);
      SlopStorageEngine slopEngine =
          new SlopStorageEngine(
              config.getStore(
                  slopStoreDefinition,
                  new RoutingStrategyFactory()
                      .updateRoutingStrategy(slopStoreDefinition, metadata.getCluster())),
              metadata.getCluster());
      registerInternalEngine(slopEngine, false, "slop");
      storeRepository.setSlopStore(slopEngine);

      if (voldemortConfig.isSlopPusherJobEnabled()) {
        // Now initialize the pusher job after some time
        GregorianCalendar cal = new GregorianCalendar();
        cal.add(Calendar.SECOND, (int) (voldemortConfig.getSlopFrequencyMs() / Time.MS_PER_SECOND));
        Date nextRun = cal.getTime();
        logger.info(
            "Initializing slop pusher job type "
                + voldemortConfig.getPusherType()
                + " at "
                + nextRun);

        scheduler.schedule(
            "slop",
            (voldemortConfig.getPusherType().compareTo(BlockingSlopPusherJob.TYPE_NAME) == 0)
                ? new BlockingSlopPusherJob(
                    storeRepository, metadata, failureDetector, voldemortConfig, scanPermitWrapper)
                : new StreamingSlopPusherJob(
                    storeRepository,
                    metadata,
                    slopStreamingFailureDetector,
                    voldemortConfig,
                    scanPermitWrapper),
            nextRun,
            voldemortConfig.getSlopFrequencyMs());
      }

      // Create a SlopPurgeJob object and register it
      if (voldemortConfig.isSlopPurgeJobEnabled()) {
        logger.info("Initializing Slop Purge job");
        SlopPurgeJob job =
            new SlopPurgeJob(
                storeRepository,
                metadata,
                scanPermitWrapper,
                voldemortConfig.getSlopPurgeJobMaxKeysScannedPerSec());
        JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
        storeRepository.registerSlopPurgeJob(job);
      }
    }

    // Create a repair job object and register it with Store repository
    if (voldemortConfig.isRepairEnabled()) {
      logger.info("Initializing repair job.");
      RepairJob job =
          new RepairJob(
              storeRepository,
              metadata,
              scanPermitWrapper,
              voldemortConfig.getRepairJobMaxKeysScannedPerSec());
      JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
      storeRepository.registerRepairJob(job);
    }

    // Create a prune job object and register it
    if (voldemortConfig.isPruneJobEnabled()) {
      logger.info("Intializing prune job");
      VersionedPutPruneJob job =
          new VersionedPutPruneJob(
              storeRepository,
              metadata,
              scanPermitWrapper,
              voldemortConfig.getPruneJobMaxKeysScannedPerSec());
      JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
      storeRepository.registerPruneJob(job);
    }

    List<StoreDefinition> storeDefs =
        new ArrayList<StoreDefinition>(this.metadata.getStoreDefList());
    logger.info("Initializing stores:");

    logger.info("Validating schemas:");
    StoreDefinitionUtils.validateSchemasAsNeeded(storeDefs);
    // first initialize non-view stores
    for (StoreDefinition def : storeDefs) if (!def.isView()) openStore(def);

    // now that we have all our stores, we can initialize views pointing at
    // those stores
    for (StoreDefinition def : storeDefs) {
      if (def.isView()) openStore(def);
    }

    initializeMetadataVersions(storeDefs);

    // enable aggregate jmx statistics
    if (voldemortConfig.isStatTrackingEnabled())
      if (this.voldemortConfig.isEnableJmxClusterName())
        JmxUtils.registerMbean(
            new StoreStatsJmx(this.storeStats),
            JmxUtils.createObjectName(
                metadata.getCluster().getName() + ".voldemort.store.stats.aggregate",
                "aggregate-perf"));
      else
        JmxUtils.registerMbean(
            new StoreStatsJmx(this.storeStats),
            JmxUtils.createObjectName("voldemort.store.stats.aggregate", "aggregate-perf"));

    List<StorageEngine> listOfDisabledStores = Lists.newArrayList();
    for (StorageEngine storageEngine : storeRepository.getAllStorageEngines()) {
      try {
        StoreVersionManager storeVersionManager =
            (StoreVersionManager)
                storageEngine.getCapability(StoreCapabilityType.DISABLE_STORE_VERSION);
        if (storeVersionManager.hasAnyDisabledVersion()) {
          listOfDisabledStores.add(storageEngine);
          logger.warn("The following store is marked as disabled: " + storageEngine.getName());
          // Must put server in offline mode.
        }
      } catch (NoSuchCapabilityException e) {
        // Not a read-only store: no-op
      }
    }
    if (listOfDisabledStores.isEmpty()) {
      logger.info("All stores initialized.");
    } else {
      throw new DisabledStoreException(
          "All stores initialized, but the server needs to go "
              + "in offline mode because some store(s) are disabled.");
    }
  }
  public BdbStorageConfiguration(VoldemortConfig config) {
    this.voldemortConfig = config;
    environmentConfig = new EnvironmentConfig();
    environmentConfig.setTransactional(true);
    environmentConfig.setCacheSize(config.getBdbCacheSize());
    if (config.isBdbWriteTransactionsEnabled() && config.isBdbFlushTransactionsEnabled()) {
      environmentConfig.setTxnNoSync(false);
      environmentConfig.setTxnWriteNoSync(false);
    } else if (config.isBdbWriteTransactionsEnabled() && !config.isBdbFlushTransactionsEnabled()) {
      environmentConfig.setTxnNoSync(false);
      environmentConfig.setTxnWriteNoSync(true);
    } else {
      environmentConfig.setTxnNoSync(true);
    }
    environmentConfig.setAllowCreate(true);
    environmentConfig.setConfigParam(
        EnvironmentConfig.LOG_FILE_MAX, Long.toString(config.getBdbMaxLogFileSize()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CHECKPOINTER_BYTES_INTERVAL,
        Long.toString(config.getBdbCheckpointBytes()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CHECKPOINTER_WAKEUP_INTERVAL,
        Long.toString(config.getBdbCheckpointMs() * Time.US_PER_MS));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CLEANER_MIN_FILE_UTILIZATION,
        Integer.toString(config.getBdbCleanerMinFileUtilization()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CLEANER_MIN_UTILIZATION,
        Integer.toString(config.getBdbCleanerMinUtilization()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CLEANER_THREADS, Integer.toString(config.getBdbCleanerThreads()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CLEANER_LOOK_AHEAD_CACHE_SIZE,
        Integer.toString(config.getBdbCleanerLookAheadCacheSize()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.LOCK_N_LOCK_TABLES, Integer.toString(config.getBdbLockNLockTables()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.ENV_FAIR_LATCHES, Boolean.toString(config.getBdbFairLatches()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CHECKPOINTER_HIGH_PRIORITY,
        Boolean.toString(config.getBdbCheckpointerHighPriority()));
    environmentConfig.setConfigParam(
        EnvironmentConfig.CLEANER_MAX_BATCH_FILES,
        Integer.toString(config.getBdbCleanerMaxBatchFiles()));

    environmentConfig.setLockTimeout(config.getBdbLockTimeoutMs(), TimeUnit.MILLISECONDS);
    databaseConfig = new DatabaseConfig();
    databaseConfig.setAllowCreate(true);
    databaseConfig.setSortedDuplicates(config.isBdbSortedDuplicatesEnabled());
    databaseConfig.setNodeMaxEntries(config.getBdbBtreeFanout());
    databaseConfig.setTransactional(true);
    bdbMasterDir = config.getBdbDataDirectory();
    useOneEnvPerStore = config.isBdbOneEnvPerStore();
    if (useOneEnvPerStore) environmentConfig.setSharedCache(true);
  }
  /**
   * Blocking function which completes the migration of one store
   *
   * @param storeName The name of the store
   * @param adminClient Admin client used to initiate the copying of data
   * @param stealInfo The steal information
   * @param isReadOnlyStore Boolean indicating that this is a read-only store
   */
  private void rebalanceStore(
      String storeName,
      final AdminClient adminClient,
      RebalancePartitionsInfo stealInfo,
      boolean isReadOnlyStore) {
    logger.info(
        getHeader(stealInfo)
            + "Starting partitions migration for store "
            + storeName
            + " from donor node "
            + stealInfo.getDonorId());
    updateStatus(
        getHeader(stealInfo)
            + "Started partition migration for store "
            + storeName
            + " from donor node "
            + stealInfo.getDonorId());

    int asyncId =
        adminClient.migratePartitions(
            stealInfo.getDonorId(),
            metadataStore.getNodeId(),
            storeName,
            stealInfo.getReplicaToAddPartitionList(storeName),
            null,
            stealInfo.getInitialCluster());
    rebalanceStatusList.add(asyncId);

    if (logger.isDebugEnabled()) {
      logger.debug(
          getHeader(stealInfo)
              + "Waiting for completion for "
              + storeName
              + " with async id "
              + asyncId);
    }
    adminClient.waitForCompletion(
        metadataStore.getNodeId(),
        asyncId,
        voldemortConfig.getRebalancingTimeoutSec(),
        TimeUnit.SECONDS,
        getStatus());

    rebalanceStatusList.remove((Object) asyncId);

    logger.info(
        getHeader(stealInfo)
            + "Completed partition migration for store "
            + storeName
            + " from donor node "
            + stealInfo.getDonorId());
    updateStatus(
        getHeader(stealInfo)
            + "Completed partition migration for store "
            + storeName
            + " from donor node "
            + stealInfo.getDonorId());

    if (stealInfo.getReplicaToDeletePartitionList(storeName) != null
        && stealInfo.getReplicaToDeletePartitionList(storeName).size() > 0
        && !isReadOnlyStore) {
      logger.info(
          getHeader(stealInfo)
              + "Deleting partitions for store "
              + storeName
              + " on donor node "
              + stealInfo.getDonorId());
      updateStatus(
          getHeader(stealInfo)
              + "Deleting partitions for store "
              + storeName
              + " on donor node "
              + stealInfo.getDonorId());

      adminClient.deletePartitions(
          stealInfo.getDonorId(),
          storeName,
          stealInfo.getReplicaToDeletePartitionList(storeName),
          stealInfo.getInitialCluster(),
          null);
      logger.info(
          getHeader(stealInfo)
              + "Deleted partitions for store "
              + storeName
              + " on donor node "
              + stealInfo.getDonorId());
      updateStatus(
          getHeader(stealInfo)
              + "Deleted partitions for store "
              + storeName
              + " on donor node "
              + stealInfo.getDonorId());
    }

    logger.info(getHeader(stealInfo) + "Finished all migration for store " + storeName);
    updateStatus(getHeader(stealInfo) + "Finished all migration for store " + storeName);
  }
  @Override
  protected void startInner() {
    registerInternalEngine(metadata, false, "metadata");

    /* Initialize storage configurations */
    for (String configClassName : voldemortConfig.getStorageConfigurations())
      initStorageConfig(configClassName);

    /* Initialize view storage configuration */
    storageConfigs.put(
        ViewStorageConfiguration.TYPE_NAME,
        new ViewStorageConfiguration(voldemortConfig, metadata.getStoreDefList(), storeRepository));

    /* Initialize system stores */
    initSystemStores();

    /* Register slop store */
    if (voldemortConfig.isSlopEnabled()) {

      logger.info("Initializing the slop store using " + voldemortConfig.getSlopStoreType());
      StorageConfiguration config = storageConfigs.get(voldemortConfig.getSlopStoreType());
      if (config == null)
        throw new ConfigurationException(
            "Attempt to open store "
                + SlopStorageEngine.SLOP_STORE_NAME
                + " but "
                + voldemortConfig.getSlopStoreType()
                + " storage engine has not been enabled.");

      // make a dummy store definition object
      StoreDefinition slopStoreDefinition =
          new StoreDefinition(
              SlopStorageEngine.SLOP_STORE_NAME,
              null,
              null,
              null,
              null,
              null,
              null,
              RoutingStrategyType.CONSISTENT_STRATEGY,
              0,
              null,
              0,
              null,
              0,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              null,
              0);
      SlopStorageEngine slopEngine =
          new SlopStorageEngine(
              config.getStore(
                  slopStoreDefinition,
                  new RoutingStrategyFactory()
                      .updateRoutingStrategy(slopStoreDefinition, metadata.getCluster())),
              metadata.getCluster());
      registerInternalEngine(slopEngine, false, "slop");
      storeRepository.setSlopStore(slopEngine);

      if (voldemortConfig.isSlopPusherJobEnabled()) {
        // Now initialize the pusher job after some time
        GregorianCalendar cal = new GregorianCalendar();
        cal.add(Calendar.SECOND, (int) (voldemortConfig.getSlopFrequencyMs() / Time.MS_PER_SECOND));
        Date nextRun = cal.getTime();
        logger.info(
            "Initializing slop pusher job type "
                + voldemortConfig.getPusherType()
                + " at "
                + nextRun);

        scheduler.schedule(
            "slop",
            (voldemortConfig.getPusherType().compareTo(BlockingSlopPusherJob.TYPE_NAME) == 0)
                ? new BlockingSlopPusherJob(
                    storeRepository, metadata, failureDetector, voldemortConfig, scanPermitWrapper)
                : new StreamingSlopPusherJob(
                    storeRepository, metadata, failureDetector, voldemortConfig, scanPermitWrapper),
            nextRun,
            voldemortConfig.getSlopFrequencyMs());
      }

      // Create a SlopPurgeJob object and register it
      if (voldemortConfig.isSlopPurgeJobEnabled()) {
        logger.info("Initializing Slop Purge job");
        SlopPurgeJob job =
            new SlopPurgeJob(
                storeRepository,
                metadata,
                scanPermitWrapper,
                voldemortConfig.getSlopPurgeJobMaxKeysScannedPerSec());
        JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
        storeRepository.registerSlopPurgeJob(job);
      }
    }

    // Create a repair job object and register it with Store repository
    if (voldemortConfig.isRepairEnabled()) {
      logger.info("Initializing repair job.");
      RepairJob job =
          new RepairJob(
              storeRepository,
              metadata,
              scanPermitWrapper,
              voldemortConfig.getRepairJobMaxKeysScannedPerSec());
      JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
      storeRepository.registerRepairJob(job);
    }

    // Create a prune job object and register it
    if (voldemortConfig.isPruneJobEnabled()) {
      logger.info("Intializing prune job");
      VersionedPutPruneJob job =
          new VersionedPutPruneJob(
              storeRepository,
              metadata,
              scanPermitWrapper,
              voldemortConfig.getPruneJobMaxKeysScannedPerSec());
      JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
      storeRepository.registerPruneJob(job);
    }

    List<StoreDefinition> storeDefs =
        new ArrayList<StoreDefinition>(this.metadata.getStoreDefList());
    logger.info("Initializing stores:");

    logger.info("Validating schemas:");
    String AVRO_GENERIC_VERSIONED_TYPE_NAME = "avro-generic-versioned";

    for (StoreDefinition storeDef : storeDefs) {
      SerializerDefinition keySerDef = storeDef.getKeySerializer();
      SerializerDefinition valueSerDef = storeDef.getValueSerializer();

      if (keySerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {

        SchemaEvolutionValidator.checkSchemaCompatibility(keySerDef);
      }

      if (valueSerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {

        SchemaEvolutionValidator.checkSchemaCompatibility(valueSerDef);
      }
    }
    // first initialize non-view stores
    for (StoreDefinition def : storeDefs) if (!def.isView()) openStore(def);

    // now that we have all our stores, we can initialize views pointing at
    // those stores
    for (StoreDefinition def : storeDefs) {
      if (def.isView()) openStore(def);
    }

    initializeMetadataVersions(storeDefs);

    // enable aggregate jmx statistics
    if (voldemortConfig.isStatTrackingEnabled())
      if (this.voldemortConfig.isEnableJmxClusterName())
        JmxUtils.registerMbean(
            new StoreStatsJmx(this.storeStats),
            JmxUtils.createObjectName(
                metadata.getCluster().getName() + ".voldemort.store.stats.aggregate",
                "aggregate-perf"));
      else
        JmxUtils.registerMbean(
            new StoreStatsJmx(this.storeStats),
            JmxUtils.createObjectName("voldemort.store.stats.aggregate", "aggregate-perf"));

    logger.info("All stores initialized.");
  }
  public StorageService(
      StoreRepository storeRepository,
      MetadataStore metadata,
      SchedulerService scheduler,
      VoldemortConfig config) {
    super(ServiceType.STORAGE);
    this.voldemortConfig = config;
    this.scheduler = scheduler;
    this.storeRepository = storeRepository;
    this.metadata = metadata;
    this.scanPermitWrapper = new ScanPermitWrapper(voldemortConfig.getNumScanPermits());
    this.storageConfigs = new ConcurrentHashMap<String, StorageConfiguration>();
    this.clientThreadPool =
        new ClientThreadPool(
            config.getClientMaxThreads(),
            config.getClientThreadIdleMs(),
            config.getClientMaxQueuedRequests());
    this.storeFactory =
        new ClientRequestExecutorPool(
            config.getClientSelectors(),
            config.getClientMaxConnectionsPerNode(),
            config.getClientConnectionTimeoutMs(),
            config.getSocketTimeoutMs(),
            config.getSocketBufferSize(),
            config.getSocketKeepAlive());

    FailureDetectorConfig failureDetectorConfig =
        new FailureDetectorConfig(voldemortConfig)
            .setCluster(metadata.getCluster())
            .setStoreVerifier(new ServerStoreVerifier(storeFactory, metadata, config));
    this.failureDetector = create(failureDetectorConfig, config.isJmxEnabled());
    this.storeStats = new StoreStats();
    this.routedStoreFactory = new RoutedStoreFactory();
    this.routedStoreFactory.setThreadPool(this.clientThreadPool);
    this.routedStoreConfig =
        new RoutedStoreConfig(this.voldemortConfig, this.metadata.getCluster());

    /*
     * Initialize the dynamic throttle limit based on the per node limit
     * config only if read-only engine is being used.
     */
    if (this.voldemortConfig
        .getStorageConfigurations()
        .contains(ReadOnlyStorageConfiguration.class.getName())) {
      long rate = this.voldemortConfig.getReadOnlyFetcherMaxBytesPerSecond();
      this.dynThrottleLimit = new DynamicThrottleLimit(rate);
    } else this.dynThrottleLimit = null;

    // create the proxy put thread pool
    this.proxyPutWorkerPool =
        Executors.newFixedThreadPool(
            config.getMaxProxyPutThreads(), new DaemonThreadFactory("voldemort-proxy-put-thread"));
    this.aggregatedProxyPutStats = new ProxyPutStats(null);
    if (config.isJmxEnabled()) {
      JmxUtils.registerMbean(
          this.aggregatedProxyPutStats,
          JmxUtils.createObjectName("voldemort.store.rebalancing", "aggregate-proxy-puts"));
    }

    this.aggregatedQuotaStats = new QuotaLimitStats(null);
    if (config.isJmxEnabled()) {
      JmxUtils.registerMbean(
          this.aggregatedQuotaStats,
          JmxUtils.createObjectName("voldemort.store.quota", "aggregate-quota-limit-stats"));
    }
  }
  @Override
  public void operate() throws Exception {
    adminClient =
        RebalanceUtils.createTempAdminClient(
            voldemortConfig,
            metadataStore.getCluster(),
            voldemortConfig.getMaxParallelStoresRebalancing(),
            1);
    final List<Exception> failures = new ArrayList<Exception>();
    try {

      for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) {

        executors.submit(
            new Runnable() {

              public void run() {
                try {
                  boolean isReadOnlyStore =
                      metadataStore
                              .getStoreDef(storeName)
                              .getType()
                              .compareTo(ReadOnlyStorageConfiguration.TYPE_NAME)
                          == 0;

                  logger.info(getHeader(stealInfo) + "Working on store " + storeName);

                  rebalanceStore(storeName, adminClient, stealInfo, isReadOnlyStore);

                  // We finished the store, delete it
                  stealInfo.removeStore(storeName);

                  logger.info(getHeader(stealInfo) + "Completed working on store " + storeName);

                } catch (Exception e) {
                  logger.error(
                      getHeader(stealInfo)
                          + "Error while rebalancing for store "
                          + storeName
                          + " - "
                          + e.getMessage(),
                      e);
                  failures.add(e);
                }
              }
            });
      }

      waitForShutdown();

      // If empty, clean state
      List<String> unbalancedStores = Lists.newArrayList(stealInfo.getUnbalancedStoreList());
      if (unbalancedStores.isEmpty()) {
        logger.info(
            getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully.");
        updateStatus(
            getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully.");
        metadataStore.deleteRebalancingState(stealInfo);
      } else {
        throw new VoldemortRebalancingException(
            getHeader(stealInfo) + "Failed to rebalance task " + stealInfo, failures);
      }

    } finally {
      // free the permit in all cases.
      logger.info(
          getHeader(stealInfo) + "Releasing permit for donor node " + stealInfo.getDonorId());

      rebalancer.releaseRebalancingPermit(stealInfo.getDonorId());
      adminClient.stop();
      adminClient = null;
    }
  }
  public void run() {

    // don't try to run slop pusher job when rebalancing
    if (metadataStore
        .getServerState()
        .equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
      logger.error("Cannot run slop pusher job since Voldemort server is rebalancing");
      return;
    }

    boolean terminatedEarly = false;
    Date startTime = new Date();
    logger.info("Started streaming slop pusher job at " + startTime);

    SlopStorageEngine slopStorageEngine = storeRepo.getSlopStore();
    ClosableIterator<Pair<ByteArray, Versioned<Slop>>> iterator = null;

    if (adminClient == null) {
      adminClient =
          new AdminClient(
              cluster,
              new AdminClientConfig()
                  .setMaxThreads(cluster.getNumberOfNodes())
                  .setMaxConnectionsPerNode(1));
    }

    if (voldemortConfig.getSlopZonesDownToTerminate() > 0) {
      // Populating the zone mapping for early termination
      zoneMapping.clear();
      for (Node n : cluster.getNodes()) {
        if (failureDetector.isAvailable(n)) {
          Set<Integer> nodes = zoneMapping.get(n.getZoneId());
          if (nodes == null) {
            nodes = Sets.newHashSet();
            zoneMapping.put(n.getZoneId(), nodes);
          }
          nodes.add(n.getId());
        }
      }

      // Check how many zones are down
      int zonesDown = 0;
      for (Zone zone : cluster.getZones()) {
        if (zoneMapping.get(zone.getId()) == null || zoneMapping.get(zone.getId()).size() == 0)
          zonesDown++;
      }

      // Terminate early
      if (voldemortConfig.getSlopZonesDownToTerminate() <= zoneMapping.size()
          && zonesDown >= voldemortConfig.getSlopZonesDownToTerminate()) {
        logger.info(
            "Completed streaming slop pusher job at "
                + startTime
                + " early because "
                + zonesDown
                + " zones are down");
        stopAdminClient();
        return;
      }
    }

    // Clearing the statistics
    AtomicLong attemptedPushes = new AtomicLong(0);
    for (Node node : cluster.getNodes()) {
      attemptedByNode.put(node.getId(), 0L);
      succeededByNode.put(node.getId(), 0L);
    }

    acquireRepairPermit();
    try {
      StorageEngine<ByteArray, Slop, byte[]> slopStore = slopStorageEngine.asSlopStore();
      iterator = slopStore.entries();

      while (iterator.hasNext()) {
        Pair<ByteArray, Versioned<Slop>> keyAndVal;
        try {
          keyAndVal = iterator.next();
          Versioned<Slop> versioned = keyAndVal.getSecond();

          // Retrieve the node
          int nodeId = versioned.getValue().getNodeId();
          Node node = cluster.getNodeById(nodeId);

          attemptedPushes.incrementAndGet();
          Long attempted = attemptedByNode.get(nodeId);
          attemptedByNode.put(nodeId, attempted + 1L);
          if (attemptedPushes.get() % 10000 == 0)
            logger.info("Attempted pushing " + attemptedPushes + " slops");

          if (logger.isTraceEnabled())
            logger.trace(
                "Pushing slop for "
                    + versioned.getValue().getNodeId()
                    + " and store  "
                    + versioned.getValue().getStoreName());

          if (failureDetector.isAvailable(node)) {
            SynchronousQueue<Versioned<Slop>> slopQueue = slopQueues.get(nodeId);
            if (slopQueue == null) {
              // No previous slop queue, add one
              slopQueue = new SynchronousQueue<Versioned<Slop>>();
              slopQueues.put(nodeId, slopQueue);
              consumerResults.add(
                  consumerExecutor.submit(new SlopConsumer(nodeId, slopQueue, slopStorageEngine)));
            }
            boolean offered =
                slopQueue.offer(
                    versioned, voldemortConfig.getClientRoutingTimeoutMs(), TimeUnit.MILLISECONDS);
            if (!offered) {
              if (logger.isDebugEnabled())
                logger.debug(
                    "No consumer appeared for slop in "
                        + voldemortConfig.getClientConnectionTimeoutMs()
                        + " ms");
            }
            readThrottler.maybeThrottle(nBytesRead(keyAndVal));
          } else {
            logger.trace(node + " declared down, won't push slop");
          }
        } catch (RejectedExecutionException e) {
          throw new VoldemortException("Ran out of threads in executor", e);
        }
      }

    } catch (InterruptedException e) {
      logger.warn("Interrupted exception", e);
      terminatedEarly = true;
    } catch (Exception e) {
      logger.error(e, e);
      terminatedEarly = true;
    } finally {
      try {
        if (iterator != null) iterator.close();
      } catch (Exception e) {
        logger.warn("Failed to close iterator cleanly as database might be closed", e);
      }

      // Adding the poison pill
      for (SynchronousQueue<Versioned<Slop>> slopQueue : slopQueues.values()) {
        try {
          slopQueue.put(END);
        } catch (InterruptedException e) {
          logger.warn("Error putting poison pill", e);
        }
      }

      for (Future result : consumerResults) {
        try {
          result.get();
        } catch (Exception e) {
          logger.warn("Exception in consumer", e);
        }
      }

      // Only if exception didn't take place do we update the counts
      if (!terminatedEarly) {
        Map<Integer, Long> outstanding =
            Maps.newHashMapWithExpectedSize(cluster.getNumberOfNodes());
        for (int nodeId : succeededByNode.keySet()) {
          logger.info(
              "Slops to node "
                  + nodeId
                  + " - Succeeded - "
                  + succeededByNode.get(nodeId)
                  + " - Attempted - "
                  + attemptedByNode.get(nodeId));
          outstanding.put(nodeId, attemptedByNode.get(nodeId) - succeededByNode.get(nodeId));
        }
        slopStorageEngine.resetStats(outstanding);
        logger.info("Completed streaming slop pusher job which started at " + startTime);
      } else {
        for (int nodeId : succeededByNode.keySet()) {
          logger.info(
              "Slops to node "
                  + nodeId
                  + " - Succeeded - "
                  + succeededByNode.get(nodeId)
                  + " - Attempted - "
                  + attemptedByNode.get(nodeId));
        }
        logger.info("Completed early streaming slop pusher job which started at " + startTime);
      }

      // Shut down admin client as not to waste connections
      consumerResults.clear();
      slopQueues.clear();
      stopAdminClient();
      this.repairPermits.release();
    }
  }