private void checkCurrentState(MetadataStore metadataStore, RebalancePartitionsInfo stealInfo) { if (metadataStore.getServerState().equals(VoldemortState.REBALANCING_MASTER_SERVER) && metadataStore.getRebalancingStealInfo().getDonorId() != stealInfo.getDonorId()) throw new VoldemortException( "Server " + metadataStore.getNodeId() + " is already rebalancing from:" + metadataStore.getRebalancingStealInfo() + " rejecting rebalance request:" + stealInfo); }
@Override public void put(ByteArray key, Versioned<byte[]> value, byte[] transforms) throws VoldemortException { RebalancePartitionsInfo stealInfo = redirectingKey(key); /** * If I am rebalancing for this key, try to do remote get() , put it locally first to get the * correct version ignoring any {@link ObsoleteVersionException} */ if (stealInfo != null) proxyGetAndLocalPut(key, stealInfo.getDonorId(), transforms); getInnerStore().put(key, value, transforms); }
@Override public List<Version> getVersions(ByteArray key) { RebalancePartitionsInfo stealInfo = redirectingKey(key); /** * If I am rebalancing for this key, try to do remote get(), put it locally first to get the * correct version ignoring any {@link ObsoleteVersionException}. */ if (stealInfo != null) { proxyGetAndLocalPut(key, stealInfo.getDonorId(), null); } return getInnerStore().getVersions(key); }
@Override public void operate() throws Exception { adminClient = RebalanceUtils.createTempAdminClient( voldemortConfig, metadataStore.getCluster(), voldemortConfig.getMaxParallelStoresRebalancing(), 1); final List<Exception> failures = new ArrayList<Exception>(); try { for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) { executors.submit( new Runnable() { public void run() { try { boolean isReadOnlyStore = metadataStore .getStoreDef(storeName) .getType() .compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0; logger.info(getHeader(stealInfo) + "Working on store " + storeName); rebalanceStore(storeName, adminClient, stealInfo, isReadOnlyStore); // We finished the store, delete it stealInfo.removeStore(storeName); logger.info(getHeader(stealInfo) + "Completed working on store " + storeName); } catch (Exception e) { logger.error( getHeader(stealInfo) + "Error while rebalancing for store " + storeName + " - " + e.getMessage(), e); failures.add(e); } } }); } waitForShutdown(); // If empty, clean state List<String> unbalancedStores = Lists.newArrayList(stealInfo.getUnbalancedStoreList()); if (unbalancedStores.isEmpty()) { logger.info( getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully."); updateStatus( getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully."); metadataStore.deleteRebalancingState(stealInfo); } else { throw new VoldemortRebalancingException( getHeader(stealInfo) + "Failed to rebalance task " + stealInfo, failures); } } finally { // free the permit in all cases. logger.info( getHeader(stealInfo) + "Releasing permit for donor node " + stealInfo.getDonorId()); rebalancer.releaseRebalancingPermit(stealInfo.getDonorId()); adminClient.stop(); adminClient = null; } }
/** * Blocking function which completes the migration of one store * * @param storeName The name of the store * @param adminClient Admin client used to initiate the copying of data * @param stealInfo The steal information * @param isReadOnlyStore Boolean indicating that this is a read-only store */ private void rebalanceStore( String storeName, final AdminClient adminClient, RebalancePartitionsInfo stealInfo, boolean isReadOnlyStore) { logger.info( getHeader(stealInfo) + "Starting partitions migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Started partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); int asyncId = adminClient.migratePartitions( stealInfo.getDonorId(), metadataStore.getNodeId(), storeName, stealInfo.getReplicaToAddPartitionList(storeName), null, stealInfo.getInitialCluster()); rebalanceStatusList.add(asyncId); if (logger.isDebugEnabled()) { logger.debug( getHeader(stealInfo) + "Waiting for completion for " + storeName + " with async id " + asyncId); } adminClient.waitForCompletion( metadataStore.getNodeId(), asyncId, voldemortConfig.getRebalancingTimeoutSec(), TimeUnit.SECONDS, getStatus()); rebalanceStatusList.remove((Object) asyncId); logger.info( getHeader(stealInfo) + "Completed partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Completed partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); if (stealInfo.getReplicaToDeletePartitionList(storeName) != null && stealInfo.getReplicaToDeletePartitionList(storeName).size() > 0 && !isReadOnlyStore) { logger.info( getHeader(stealInfo) + "Deleting partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Deleting partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); adminClient.deletePartitions( stealInfo.getDonorId(), storeName, stealInfo.getReplicaToDeletePartitionList(storeName), stealInfo.getInitialCluster(), null); logger.info( getHeader(stealInfo) + "Deleted partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Deleted partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); } logger.info(getHeader(stealInfo) + "Finished all migration for store " + storeName); updateStatus(getHeader(stealInfo) + "Finished all migration for store " + storeName); }
private String getHeader(RebalancePartitionsInfo stealInfo) { return "Stealer " + stealInfo.getStealerId() + ", Donor " + stealInfo.getDonorId() + "] "; }
/** * Rebalance logic at single node level.<br> * <imp> should be called by the rebalancing node itself</imp><br> * Attempt to rebalance from node {@link RebalancePartitionsInfo#getDonorId()} for partitionList * {@link RebalancePartitionsInfo#getPartitionList()} * * <p>Force Sets serverState to rebalancing, Sets stealInfo in MetadataStore, fetch keys from * remote node and upsert them locally.<br> * On success clean all states it changed * * @param metadataStore * @param stealInfo * @return taskId for asynchronous task. */ public int rebalanceLocalNode(final RebalancePartitionsInfo stealInfo) { if (!acquireRebalancingPermit()) { RebalancePartitionsInfo info = metadataStore.getRebalancingStealInfo(); throw new AlreadyRebalancingException( "Node " + metadataStore.getCluster().getNodeById(info.getStealerId()) + " is already rebalancing from " + info.getDonorId() + " rebalanceInfo:" + info); } // check and set State checkCurrentState(metadataStore, stealInfo); setRebalancingState(metadataStore, stealInfo); // get max parallel store rebalancing allowed final int maxParallelStoresRebalancing = (-1 != voldemortConfig.getMaxParallelStoresRebalancing()) ? voldemortConfig.getMaxParallelStoresRebalancing() : stealInfo.getUnbalancedStoreList().size(); int requestId = asyncRunner.getUniqueRequestId(); asyncRunner.submitOperation( requestId, new AsyncOperation(requestId, "Rebalance Operation:" + stealInfo.toString()) { private List<Integer> rebalanceStatusList = new ArrayList<Integer>(); AdminClient adminClient = null; final ExecutorService executors = createExecutors(maxParallelStoresRebalancing); @Override public void operate() throws Exception { adminClient = RebalanceUtils.createTempAdminClient( voldemortConfig, metadataStore.getCluster(), maxParallelStoresRebalancing * 4, maxParallelStoresRebalancing * 2); final List<Exception> failures = new ArrayList<Exception>(); try { logger.info("starting rebalancing task" + stealInfo); for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) { executors.submit( new Runnable() { public void run() { try { rebalanceStore(storeName, adminClient, stealInfo); List<String> tempUnbalancedStoreList = new ArrayList<String>(stealInfo.getUnbalancedStoreList()); tempUnbalancedStoreList.remove(storeName); stealInfo.setUnbalancedStoreList(tempUnbalancedStoreList); setRebalancingState(metadataStore, stealInfo); } catch (Exception e) { logger.error( "rebalanceSubTask:" + stealInfo + " failed for store:" + storeName, e); failures.add(e); } } }); } waitForShutdown(); if (stealInfo.getUnbalancedStoreList().isEmpty()) { logger.info("Rebalancer: rebalance " + stealInfo + " completed successfully."); // clean state only if // successfull. metadataStore.cleanAllRebalancingState(); } else { throw new VoldemortRebalancingException( "Failed to rebalance task " + stealInfo, failures); } } finally { // free the permit in all cases. releaseRebalancingPermit(); adminClient.stop(); adminClient = null; } } private void waitForShutdown() { try { executors.shutdown(); executors.awaitTermination(voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS); } catch (InterruptedException e) { logger.error("Interrupted while awaiting termination for executors.", e); } } @Override public void stop() { updateStatus("stop() called on rebalance operation !!"); if (null != adminClient) { for (int asyncID : rebalanceStatusList) { adminClient.stopAsyncRequest(metadataStore.getNodeId(), asyncID); } } executors.shutdownNow(); } private void rebalanceStore( String storeName, AdminClient adminClient, RebalancePartitionsInfo stealInfo) throws Exception { logger.info("starting partitions migration for store:" + storeName); int asyncId = adminClient.migratePartitions( stealInfo.getDonorId(), metadataStore.getNodeId(), storeName, stealInfo.getPartitionList(), null); rebalanceStatusList.add(asyncId); adminClient.waitForCompletion( metadataStore.getNodeId(), asyncId, voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS); rebalanceStatusList.remove((Object) asyncId); if (stealInfo.getDeletePartitionsList().size() > 0) { adminClient.deletePartitions( stealInfo.getDonorId(), storeName, stealInfo.getDeletePartitionsList(), null); logger.debug( "Deleted partitions " + stealInfo.getDeletePartitionsList() + " from donorNode:" + stealInfo.getDonorId() + " for store " + storeName); } logger.info("partitions migration for store:" + storeName + " completed."); } }); return requestId; }