public void run() { logger.debug("rebalancer run() called."); if (VoldemortState.REBALANCING_MASTER_SERVER.equals(metadataStore.getServerState()) && acquireRebalancingPermit()) { // free permit here for rebalanceLocalNode to acquire. releaseRebalancingPermit(); RebalancePartitionsInfo stealInfo = metadataStore.getRebalancingStealInfo(); try { logger.warn( "Rebalance server found incomplete rebalancing attempt, restarting rebalancing task " + stealInfo); if (stealInfo.getAttempt() < voldemortConfig.getMaxRebalancingAttempt()) { attemptRebalance(stealInfo); } else { logger.warn( "Rebalancing for rebalancing task " + stealInfo + " failed multiple times, Aborting more trials."); metadataStore.cleanAllRebalancingState(); } } catch (Exception e) { logger.error( "RebalanceService rebalancing attempt " + stealInfo + " failed with exception", e); } } }
public VAdminProto.AsyncOperationStatusResponse handleRebalanceNode( VAdminProto.InitiateRebalanceNodeRequest request) { VAdminProto.AsyncOperationStatusResponse.Builder response = VAdminProto.AsyncOperationStatusResponse.newBuilder(); try { if (!voldemortConfig.isEnableRebalanceService()) throw new VoldemortException( "Rebalance service is not enabled for node:" + metadataStore.getNodeId()); RebalancePartitionsInfo rebalanceStealInfo = new RebalancePartitionsInfo( request.getStealerId(), request.getDonorId(), request.getPartitionsList(), request.getDeletePartitionsList(), request.getUnbalancedStoreList(), request.getAttempt()); int requestId = rebalancer.rebalanceLocalNode(rebalanceStealInfo); response .setRequestId(requestId) .setDescription(rebalanceStealInfo.toString()) .setStatus("started") .setComplete(false); } catch (VoldemortException e) { response.setError(ProtoUtils.encodeError(errorCodeMapper, e)); logger.error("handleRebalanceNode failed for request(" + request.toString() + ")", e); } return response.build(); }
private void attemptRebalance(RebalancePartitionsInfo stealInfo) { stealInfo.setAttempt(stealInfo.getAttempt() + 1); AdminClient adminClient = RebalanceUtils.createTempAdminClient(voldemortConfig, metadataStore.getCluster(), 4, 2); int rebalanceAsyncId = rebalanceLocalNode(stealInfo); adminClient.waitForCompletion( stealInfo.getStealerId(), rebalanceAsyncId, voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS); }
@Override public void put(ByteArray key, Versioned<byte[]> value, byte[] transforms) throws VoldemortException { RebalancePartitionsInfo stealInfo = redirectingKey(key); /** * If I am rebalancing for this key, try to do remote get() , put it locally first to get the * correct version ignoring any {@link ObsoleteVersionException} */ if (stealInfo != null) proxyGetAndLocalPut(key, stealInfo.getDonorId(), transforms); getInnerStore().put(key, value, transforms); }
@Override public List<Version> getVersions(ByteArray key) { RebalancePartitionsInfo stealInfo = redirectingKey(key); /** * If I am rebalancing for this key, try to do remote get(), put it locally first to get the * correct version ignoring any {@link ObsoleteVersionException}. */ if (stealInfo != null) { proxyGetAndLocalPut(key, stealInfo.getDonorId(), null); } return getInnerStore().getVersions(key); }
/** * convert Object to String depending on key. * * <p>StoreRepository takes only StorageEngine<ByteArray,byte[]> and for persistence on disk we * need to convert them to String.<br> * * @param key * @param value * @return */ private Versioned<Object> convertStringToObject(String key, Versioned<String> value) { Object valueObject = null; if (CLUSTER_KEY.equals(key)) { valueObject = clusterMapper.readCluster(new StringReader(value.getValue())); } else if (STORES_KEY.equals(key)) { valueObject = storeMapper.readStoreList(new StringReader(value.getValue())); } else if (SERVER_STATE_KEY.equals(key) || CLUSTER_STATE_KEY.equals(key)) { valueObject = VoldemortState.valueOf(value.getValue()); } else if (NODE_ID_KEY.equals(key)) { valueObject = Integer.parseInt(value.getValue()); } else if (REBALANCING_STEAL_INFO.equals(key)) { String valueString = value.getValue(); if (valueString.startsWith("[")) { valueObject = RebalancerState.create(valueString); } else { valueObject = new RebalancerState(Arrays.asList(RebalancePartitionsInfo.create(valueString))); } } else { throw new VoldemortException( "Unhandled key:'" + key + "' for String to Object serialization."); } return new Versioned<Object>(valueObject, value.getVersion()); }
private void checkCurrentState(MetadataStore metadataStore, RebalancePartitionsInfo stealInfo) { if (metadataStore.getServerState().equals(VoldemortState.REBALANCING_MASTER_SERVER) && metadataStore.getRebalancingStealInfo().getDonorId() != stealInfo.getDonorId()) throw new VoldemortException( "Server " + metadataStore.getNodeId() + " is already rebalancing from:" + metadataStore.getRebalancingStealInfo() + " rejecting rebalance request:" + stealInfo); }
public RebalanceAsyncOperation( Rebalancer rebalancer, VoldemortConfig voldemortConfig, MetadataStore metadataStore, int requestId, RebalancePartitionsInfo stealInfo) { super(requestId, "Rebalance operation: " + stealInfo.toString()); this.rebalancer = rebalancer; this.voldemortConfig = voldemortConfig; this.metadataStore = metadataStore; this.stealInfo = stealInfo; this.rebalanceStatusList = new ArrayList<Integer>(); this.adminClient = null; this.executors = createExecutors(voldemortConfig.getMaxParallelStoresRebalancing()); }
@Override public void operate() throws Exception { adminClient = RebalanceUtils.createTempAdminClient( voldemortConfig, metadataStore.getCluster(), voldemortConfig.getMaxParallelStoresRebalancing(), 1); final List<Exception> failures = new ArrayList<Exception>(); try { for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) { executors.submit( new Runnable() { public void run() { try { boolean isReadOnlyStore = metadataStore .getStoreDef(storeName) .getType() .compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0; logger.info(getHeader(stealInfo) + "Working on store " + storeName); rebalanceStore(storeName, adminClient, stealInfo, isReadOnlyStore); // We finished the store, delete it stealInfo.removeStore(storeName); logger.info(getHeader(stealInfo) + "Completed working on store " + storeName); } catch (Exception e) { logger.error( getHeader(stealInfo) + "Error while rebalancing for store " + storeName + " - " + e.getMessage(), e); failures.add(e); } } }); } waitForShutdown(); // If empty, clean state List<String> unbalancedStores = Lists.newArrayList(stealInfo.getUnbalancedStoreList()); if (unbalancedStores.isEmpty()) { logger.info( getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully."); updateStatus( getHeader(stealInfo) + "Rebalance of " + stealInfo + " completed successfully."); metadataStore.deleteRebalancingState(stealInfo); } else { throw new VoldemortRebalancingException( getHeader(stealInfo) + "Failed to rebalance task " + stealInfo, failures); } } finally { // free the permit in all cases. logger.info( getHeader(stealInfo) + "Releasing permit for donor node " + stealInfo.getDonorId()); rebalancer.releaseRebalancingPermit(stealInfo.getDonorId()); adminClient.stop(); adminClient = null; } }
/** * Blocking function which completes the migration of one store * * @param storeName The name of the store * @param adminClient Admin client used to initiate the copying of data * @param stealInfo The steal information * @param isReadOnlyStore Boolean indicating that this is a read-only store */ private void rebalanceStore( String storeName, final AdminClient adminClient, RebalancePartitionsInfo stealInfo, boolean isReadOnlyStore) { logger.info( getHeader(stealInfo) + "Starting partitions migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Started partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); int asyncId = adminClient.migratePartitions( stealInfo.getDonorId(), metadataStore.getNodeId(), storeName, stealInfo.getReplicaToAddPartitionList(storeName), null, stealInfo.getInitialCluster()); rebalanceStatusList.add(asyncId); if (logger.isDebugEnabled()) { logger.debug( getHeader(stealInfo) + "Waiting for completion for " + storeName + " with async id " + asyncId); } adminClient.waitForCompletion( metadataStore.getNodeId(), asyncId, voldemortConfig.getRebalancingTimeoutSec(), TimeUnit.SECONDS, getStatus()); rebalanceStatusList.remove((Object) asyncId); logger.info( getHeader(stealInfo) + "Completed partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Completed partition migration for store " + storeName + " from donor node " + stealInfo.getDonorId()); if (stealInfo.getReplicaToDeletePartitionList(storeName) != null && stealInfo.getReplicaToDeletePartitionList(storeName).size() > 0 && !isReadOnlyStore) { logger.info( getHeader(stealInfo) + "Deleting partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Deleting partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); adminClient.deletePartitions( stealInfo.getDonorId(), storeName, stealInfo.getReplicaToDeletePartitionList(storeName), stealInfo.getInitialCluster(), null); logger.info( getHeader(stealInfo) + "Deleted partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); updateStatus( getHeader(stealInfo) + "Deleted partitions for store " + storeName + " on donor node " + stealInfo.getDonorId()); } logger.info(getHeader(stealInfo) + "Finished all migration for store " + storeName); updateStatus(getHeader(stealInfo) + "Finished all migration for store " + storeName); }
private String getHeader(RebalancePartitionsInfo stealInfo) { return "Stealer " + stealInfo.getStealerId() + ", Donor " + stealInfo.getDonorId() + "] "; }
/** * Rebalance logic at single node level.<br> * <imp> should be called by the rebalancing node itself</imp><br> * Attempt to rebalance from node {@link RebalancePartitionsInfo#getDonorId()} for partitionList * {@link RebalancePartitionsInfo#getPartitionList()} * * <p>Force Sets serverState to rebalancing, Sets stealInfo in MetadataStore, fetch keys from * remote node and upsert them locally.<br> * On success clean all states it changed * * @param metadataStore * @param stealInfo * @return taskId for asynchronous task. */ public int rebalanceLocalNode(final RebalancePartitionsInfo stealInfo) { if (!acquireRebalancingPermit()) { RebalancePartitionsInfo info = metadataStore.getRebalancingStealInfo(); throw new AlreadyRebalancingException( "Node " + metadataStore.getCluster().getNodeById(info.getStealerId()) + " is already rebalancing from " + info.getDonorId() + " rebalanceInfo:" + info); } // check and set State checkCurrentState(metadataStore, stealInfo); setRebalancingState(metadataStore, stealInfo); // get max parallel store rebalancing allowed final int maxParallelStoresRebalancing = (-1 != voldemortConfig.getMaxParallelStoresRebalancing()) ? voldemortConfig.getMaxParallelStoresRebalancing() : stealInfo.getUnbalancedStoreList().size(); int requestId = asyncRunner.getUniqueRequestId(); asyncRunner.submitOperation( requestId, new AsyncOperation(requestId, "Rebalance Operation:" + stealInfo.toString()) { private List<Integer> rebalanceStatusList = new ArrayList<Integer>(); AdminClient adminClient = null; final ExecutorService executors = createExecutors(maxParallelStoresRebalancing); @Override public void operate() throws Exception { adminClient = RebalanceUtils.createTempAdminClient( voldemortConfig, metadataStore.getCluster(), maxParallelStoresRebalancing * 4, maxParallelStoresRebalancing * 2); final List<Exception> failures = new ArrayList<Exception>(); try { logger.info("starting rebalancing task" + stealInfo); for (final String storeName : ImmutableList.copyOf(stealInfo.getUnbalancedStoreList())) { executors.submit( new Runnable() { public void run() { try { rebalanceStore(storeName, adminClient, stealInfo); List<String> tempUnbalancedStoreList = new ArrayList<String>(stealInfo.getUnbalancedStoreList()); tempUnbalancedStoreList.remove(storeName); stealInfo.setUnbalancedStoreList(tempUnbalancedStoreList); setRebalancingState(metadataStore, stealInfo); } catch (Exception e) { logger.error( "rebalanceSubTask:" + stealInfo + " failed for store:" + storeName, e); failures.add(e); } } }); } waitForShutdown(); if (stealInfo.getUnbalancedStoreList().isEmpty()) { logger.info("Rebalancer: rebalance " + stealInfo + " completed successfully."); // clean state only if // successfull. metadataStore.cleanAllRebalancingState(); } else { throw new VoldemortRebalancingException( "Failed to rebalance task " + stealInfo, failures); } } finally { // free the permit in all cases. releaseRebalancingPermit(); adminClient.stop(); adminClient = null; } } private void waitForShutdown() { try { executors.shutdown(); executors.awaitTermination(voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS); } catch (InterruptedException e) { logger.error("Interrupted while awaiting termination for executors.", e); } } @Override public void stop() { updateStatus("stop() called on rebalance operation !!"); if (null != adminClient) { for (int asyncID : rebalanceStatusList) { adminClient.stopAsyncRequest(metadataStore.getNodeId(), asyncID); } } executors.shutdownNow(); } private void rebalanceStore( String storeName, AdminClient adminClient, RebalancePartitionsInfo stealInfo) throws Exception { logger.info("starting partitions migration for store:" + storeName); int asyncId = adminClient.migratePartitions( stealInfo.getDonorId(), metadataStore.getNodeId(), storeName, stealInfo.getPartitionList(), null); rebalanceStatusList.add(asyncId); adminClient.waitForCompletion( metadataStore.getNodeId(), asyncId, voldemortConfig.getAdminSocketTimeout(), TimeUnit.SECONDS); rebalanceStatusList.remove((Object) asyncId); if (stealInfo.getDeletePartitionsList().size() > 0) { adminClient.deletePartitions( stealInfo.getDonorId(), storeName, stealInfo.getDeletePartitionsList(), null); logger.debug( "Deleted partitions " + stealInfo.getDeletePartitionsList() + " from donorNode:" + stealInfo.getDonorId() + " for store " + storeName); } logger.info("partitions migration for store:" + storeName + " completed."); } }); return requestId; }