/** Log the over utilized & under utilized nodes */ private void logImbalancedNodes() { if (LOG.isInfoEnabled()) { int underUtilized = 0, overUtilized = 0; for (BalancerDatanode node : this.datanodes.values()) { if (isUnderUtilized(node)) underUtilized++; else if (isOverUtilized(node)) overUtilized++; } StringBuilder msg = new StringBuilder(); msg.append(overUtilized); msg.append(" over utilized nodes:"); for (BalancerDatanode node : this.datanodes.values()) { if (isOverUtilized(node)) { msg.append(" "); msg.append(node.getName()); } } LOG.info(msg); msg = new StringBuilder(); msg.append(underUtilized); msg.append(" under utilized nodes: "); for (BalancerDatanode node : this.datanodes.values()) { if (isUnderUtilized(node)) { msg.append(" "); msg.append(node.getName()); } } LOG.info(msg); } }
@Override public int compare(BalancerDatanode o1, BalancerDatanode o2) { int ret = super.compare(o1, o2); if (ret == 0) { ret = Double.valueOf(o2.getCurrentRemaining()).compareTo(o1.getCurrentRemaining()); } // TODO concurrency level can also be taken into consideration return ret; }
/** Log node utilization after the plan execution */ private void logPlanOutcome() { if (LOG.isInfoEnabled()) { LOG.info( "Predicted plan outcome: bytesLeftToMove: " + bytesLeftToMove + ", bytesToMove: " + bytesToMove); for (BalancerDatanode node : this.datanodes.values()) { LOG.info(node.getName() + " remaining: " + node.getCurrentRemaining()); } } }
public double computeAvgRemaining() { long totalCapacity = 0L, totalRemainingSpace = 0L; for (BalancerDatanode node : sources) { totalCapacity += node.getDatanode().getCapacity(); totalRemainingSpace += node.getDatanode().getRemaining(); } for (BalancerDatanode node : targets) { totalCapacity += node.getDatanode().getCapacity(); totalRemainingSpace += node.getDatanode().getRemaining(); } return ((double) totalRemainingSpace) / totalCapacity * PERCENTAGE_BASE; }
/** Compute balance plan */ public BalancePlan(Balancer balancer, List<DatanodeInfo> datanodes) { if (datanodes == null || datanodes.isEmpty()) { throw new IllegalArgumentException("cannot prepare plan for empty cluster"); } avgRemaining = computeAvgRemaining(datanodes); lowerRemainingThreshold = Math.max(avgRemaining / 2, avgRemaining - balancer.threshold); upperRemainingThreshold = Math.min(PERCENTAGE_BASE, avgRemaining + balancer.threshold); if (lowerRemainingThreshold > upperRemainingThreshold) { throw new IllegalStateException("lowerThresh > upperThresh"); } LOG.info( "balanced range: [ " + lowerRemainingThreshold + ", " + upperRemainingThreshold + " ], average remaining: " + avgRemaining); long overLoadedBytes = 0L, underLoadedBytes = 0L; Bucket clusterBucket = new Bucket(); Map<Node, Bucket> rackBuckets = new HashMap<Node, Bucket>(); for (DatanodeInfo datanode : datanodes) { // Update network topology cluster.add(datanode); // Create bucket if none assert datanode.getParent() != null : "node outside of any rack"; Bucket bucket = rackBuckets.get(datanode.getParent()); if (bucket == null) { bucket = new Bucket(); rackBuckets.put(datanode.getParent(), bucket); } // Put DataNode into chosen bucket BalancerDatanode datanodeS; if (getRemaining(datanode) < avgRemaining) { // Above average utilized datanodeS = balancer.getSource(datanode, avgRemaining); bucket.addSource((Source) datanodeS); clusterBucket.addSource((Source) datanodeS); if (isOverUtilized(datanodeS)) { overLoadedBytes += (long) ((lowerRemainingThreshold - datanodeS.getCurrentRemaining()) * datanodeS.getDatanode().getCapacity() / PERCENTAGE_BASE); } } else { // Below average utilized datanodeS = new Target(datanode, avgRemaining); bucket.addTarget((Target) datanodeS); clusterBucket.addTarget((Target) datanodeS); if (isUnderUtilized(datanodeS)) { underLoadedBytes += (long) ((datanodeS.getCurrentRemaining() - upperRemainingThreshold) * datanodeS.getDatanode().getCapacity() / PERCENTAGE_BASE); } } // Update all DataNodes list this.datanodes.put(datanode.getStorageID(), datanodeS); } bytesLeftToMove = Math.max(overLoadedBytes, underLoadedBytes); logImbalancedNodes(); // Balance each rack bucket separately for (Bucket bucket : rackBuckets.values()) { double rackAverage = bucket.computeAvgRemaining(); if (lowerRemainingThreshold <= rackAverage && rackAverage <= upperRemainingThreshold) { bucket.updatePlan(); } // If perfectly balanced rack renders only over or underutilized DataNodes // we do not bother balancing it } // Balance cluster-wide afterwards clusterBucket.externalUpdate(); clusterBucket.updatePlan(); bytesToMove = 0L; for (Source src : sources) { bytesToMove += src.scheduledSize; } logPlanOutcome(); }
/** Determines how much data to move between given nodes */ private long moveSize(Source source, BalancerDatanode target) { // TODO balancing concurrency return Math.min(source.getAvailableMoveSize(), target.getAvailableMoveSize()); }
/** Determines if the node is underutilized */ private boolean isUnderUtilized(BalancerDatanode datanode) { return datanode.getCurrentRemaining() > upperRemainingThreshold; }