@Override
  public void operate() throws Exception {
    adminClient =
        RebalanceUtils.createTempAdminClient(
            voldemortConfig,
            metadataStore.getCluster(),
            voldemortConfig.getMaxParallelStoresRebalancing(),
            1);
    final List<Exception> failures = new ArrayList<Exception>();
    try {

      for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) {

        executors.submit(
            new Runnable() {

              public void run() {
                try {
                  boolean isReadOnlyStore =
                      metadataStore
                              .getStoreDef(storeName)
                              .getType()
                              .compareTo(ReadOnlyStorageConfiguration.TYPE_NAME)
                          == 0;

                  logger.info(getHeader(stealInfo) + "Working on store " + storeName);

                  rebalanceStore(storeName, adminClient, stealInfo, isReadOnlyStore);

                  // We finished the store, delete it
                  stealInfo.removeStore(storeName);

                  logger.info(getHeader(stealInfo) + "Completed working on store " + storeName);

                } catch (Exception e) {
                  logger.error(
                      getHeader(stealInfo)
                          + "Error while rebalancing for store "
                          + storeName
                          + " - "
                          + e.getMessage(),
                      e);
                  failures.add(e);
                }
              }
            });
      }

      waitForShutdown();

      // If empty, clean state
      List<String> unbalancedStores = Lists.newArrayList(stealInfo.getUnbalancedStoreList());
      if (unbalancedStores.isEmpty()) {
        logger.info(
            getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully.");
        updateStatus(
            getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully.");
        metadataStore.deleteRebalancingState(stealInfo);
      } else {
        throw new VoldemortRebalancingException(
            getHeader(stealInfo) + "Failed to rebalance task " + stealInfo, failures);
      }

    } finally {
      // free the permit in all cases.
      logger.info(
          getHeader(stealInfo) + "Releasing permit for donor node " + stealInfo.getDonorId());

      rebalancer.releaseRebalancingPermit(stealInfo.getDonorId());
      adminClient.stop();
      adminClient = null;
    }
  }
Example #2
0
  /**
   * Rebalance logic at single node level.<br>
   * <imp> should be called by the rebalancing node itself</imp><br>
   * Attempt to rebalance from node {@link RebalancePartitionsInfo#getDonorId()} for partitionList
   * {@link RebalancePartitionsInfo#getPartitionList()}
   *
   * <p>Force Sets serverState to rebalancing, Sets stealInfo in MetadataStore, fetch keys from
   * remote node and upsert them locally.<br>
   * On success clean all states it changed
   *
   * @param metadataStore
   * @param stealInfo
   * @return taskId for asynchronous task.
   */
  public int rebalanceLocalNode(final RebalancePartitionsInfo stealInfo) {

    if (!acquireRebalancingPermit()) {
      RebalancePartitionsInfo info = metadataStore.getRebalancingStealInfo();
      throw new AlreadyRebalancingException(
          "Node "
              + metadataStore.getCluster().getNodeById(info.getStealerId())
              + " is already rebalancing from "
              + info.getDonorId()
              + " rebalanceInfo:"
              + info);
    }

    // check and set State
    checkCurrentState(metadataStore, stealInfo);
    setRebalancingState(metadataStore, stealInfo);

    // get max parallel store rebalancing allowed
    final int maxParallelStoresRebalancing =
        (-1 != voldemortConfig.getMaxParallelStoresRebalancing())
            ? voldemortConfig.getMaxParallelStoresRebalancing()
            : stealInfo.getUnbalancedStoreList().size();

    int requestId = asyncRunner.getUniqueRequestId();

    asyncRunner.submitOperation(
        requestId,
        new AsyncOperation(requestId, "Rebalance Operation:" + stealInfo.toString()) {

          private List<Integer> rebalanceStatusList = new ArrayList<Integer>();
          AdminClient adminClient = null;
          final ExecutorService executors = createExecutors(maxParallelStoresRebalancing);

          @Override
          public void operate() throws Exception {
            adminClient =
                RebalanceUtils.createTempAdminClient(
                    voldemortConfig,
                    metadataStore.getCluster(),
                    maxParallelStoresRebalancing * 4,
                    maxParallelStoresRebalancing * 2);
            final List<Exception> failures = new ArrayList<Exception>();
            try {
              logger.info("starting rebalancing task" + stealInfo);

              for (final String storeName :
                  ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) {

                executors.submit(
                    new Runnable() {

                      public void run() {
                        try {
                          rebalanceStore(storeName, adminClient, stealInfo);

                          List<String> tempUnbalancedStoreList =
                              new ArrayList<String>(stealInfo.getUnbalancedStoreList());
                          tempUnbalancedStoreList.remove(storeName);
                          stealInfo.setUnbalancedStoreList(tempUnbalancedStoreList);
                          setRebalancingState(metadataStore, stealInfo);
                        } catch (Exception e) {
                          logger.error(
                              "rebalanceSubTask:" + stealInfo + " failed for store:" + storeName,
                              e);
                          failures.add(e);
                        }
                      }
                    });
              }

              waitForShutdown();

              if (stealInfo.getUnbalancedStoreList().isEmpty()) {
                logger.info("Rebalancer: rebalance " + stealInfo + " completed successfully.");
                // clean state only if
                // successfull.
                metadataStore.cleanAllRebalancingState();
              } else {
                throw new VoldemortRebalancingException(
                    "Failed to rebalance task " + stealInfo, failures);
              }

            } finally {
              // free the permit in all cases.
              releaseRebalancingPermit();
              adminClient.stop();
              adminClient = null;
            }
          }

          private void waitForShutdown() {
            try {
              executors.shutdown();
              executors.awaitTermination(voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS);
            } catch (InterruptedException e) {
              logger.error("Interrupted while awaiting termination for executors.", e);
            }
          }

          @Override
          public void stop() {
            updateStatus("stop() called on rebalance operation !!");
            if (null != adminClient) {
              for (int asyncID : rebalanceStatusList) {
                adminClient.stopAsyncRequest(metadataStore.getNodeId(), asyncID);
              }
            }

            executors.shutdownNow();
          }

          private void rebalanceStore(
              String storeName, AdminClient adminClient, RebalancePartitionsInfo stealInfo)
              throws Exception {
            logger.info("starting partitions migration for store:" + storeName);
            int asyncId =
                adminClient.migratePartitions(
                    stealInfo.getDonorId(),
                    metadataStore.getNodeId(),
                    storeName,
                    stealInfo.getPartitionList(),
                    null);
            rebalanceStatusList.add(asyncId);

            adminClient.waitForCompletion(
                metadataStore.getNodeId(),
                asyncId,
                voldemortConfig.getAdminSocketTimeout(),
                TimeUnit.SECONDS);

            rebalanceStatusList.remove((Object) asyncId);

            if (stealInfo.getDeletePartitionsList().size() > 0) {
              adminClient.deletePartitions(
                  stealInfo.getDonorId(), storeName, stealInfo.getDeletePartitionsList(), null);
              logger.debug(
                  "Deleted partitions "
                      + stealInfo.getDeletePartitionsList()
                      + " from donorNode:"
                      + stealInfo.getDonorId()
                      + " for store "
                      + storeName);
            }

            logger.info("partitions migration for store:" + storeName + " completed.");
          }
        });

    return requestId;
  }