/** * Both the drop in hash ring points and the global drop rate influence the minimum call count * that we should see to qualify for a state update. Currently, both factors are equally weighed, * and multiplied together to come up with a scale factor. With this scheme, if either factor is * zero, then the overrideMinCallCount will be set to 1. If both factors are at half weight, then * the overall weight will be .5 * .5 = .25 of the original minCallCount. * * @param newOverrideDropRate * @param trackerClientUpdaters * @param pointsMap * @param pointsPerWeight */ public static void overrideMinCallCount( double newOverrideDropRate, List<TrackerClientUpdater> trackerClientUpdaters, Map<URI, Integer> pointsMap, int pointsPerWeight) { for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) { TrackerClient client = clientUpdater.getTrackerClient(); int currentOverrideMinCallCount = client.getDegraderControl(DEFAULT_PARTITION_ID).getOverrideMinCallCount(); double hashFactor = pointsMap.get(client.getUri()) / pointsPerWeight; double transmitFactor = 1.0 - newOverrideDropRate; int newOverrideMinCallCount = (int) Math.max( Math.round( client.getDegraderControl(DEFAULT_PARTITION_ID).getMinCallCount() * hashFactor * transmitFactor), 1); if (newOverrideMinCallCount != currentOverrideMinCallCount) { clientUpdater.setOverrideMinCallCount(newOverrideMinCallCount); warn( _log, "overriding Min Call Count to ", newOverrideMinCallCount, " for client: ", client.getUri()); } } }
static boolean isNewStateHealthy( DegraderLoadBalancerState newState, DegraderLoadBalancerStrategyConfig config, List<TrackerClientUpdater> trackerClientUpdaters) { if (newState.getCurrentAvgClusterLatency() > config.getLowWaterMark()) { return false; } Map<URI, Integer> pointsMap = newState.getPointsMap(); for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) { TrackerClient client = clientUpdater.getTrackerClient(); int perfectHealth = (int) (client.getPartitionWeight(DEFAULT_PARTITION_ID) * config.getPointsPerWeight()); Integer point = pointsMap.get(client.getUri()); if (point < perfectHealth) { return false; } } return true; }
/** * updateState * * <p>We have two mechanisms to influence the health and traffic patterns of the client. They are * by load balancing (switching traffic from one host to another) and by degrading service * (dropping calls). We load balance by allocating points in a consistent hash ring based on the * computedDropRate of the individual TrackerClients, which takes into account the latency seen by * that TrackerClient's requests. We can alternatively, if the cluster is unhealthy (by using a * high latency watermark) drop a portion of traffic across all tracker clients corresponding to * this cluster. * * <p>The reason we do not currently consider error rate when adjusting the hash ring is that * there are legitimate errors that servers can send back for clients to handle, such as 400 * return codes. A potential improvement would be to catch transport level exceptions and 500 * level return codes, but the implication of that would need to be carefully understood and * documented. * * <p>We don't want both to reduce hash points and allow clients to manage their own drop rates * because the clients do not have a global view that the load balancing strategy does. Without a * global view, the clients won't know if it already has a reduced number of hash points. If the * client continues to drop at the same drop rate as before their points have been reduced, then * the client would have its outbound request reduced by both reduction in points and the client's * drop rate. To avoid this, the drop rate is managed globally by the load balancing strategy and * provided to each client. The strategy will ALTERNATE between adjusting the hash ring points or * the global drop rate in order to avoid double penalizing a client. See below: * * <p>Period 1 We found the average latency is greater than high water mark. Then increase the * global drop rate for this cluster (let's say from 0% to 20%) so 20% of all calls gets dropped. * . . Period 2 The average latency is still higher than high water mark and we found it is * especially high for few specific clients in the cluster Then reduce the number of hash points * for those clients in the hash ring, with the hope we'll redirect the traffic to "healthier" * client and reduce the average latency . . Period 3 The average latency is still higher than * high water mark Then we will alternate strategy by increasing the global rate for the whole * cluster again . . repeat until the latency becomes smaller than high water mark and higher than * low water mark to maintain the state. If the latency becomes lower than low water mark that * means the cluster is getting healthier so we can serve more traffic so we'll start recovery as * explained below * * <p>We also have a mechanism for recovery if the number of points in the hash ring is not enough * to receive traffic. The initialRecoveryLevel is a number between 0.0 and 1.0, and corresponds * to a weight of the tracker client's full hash points. e.g. if a client has a default 100 hash * points in a ring, 0.0 means there's 0 point for the client in the ring and 1.0 means there are * 100 points in the ring for the client. The second configuration, rampFactor, will geometrically * increase the previous recoveryLevel if traffic still hasn't been seen for that tracker client. * * <p>The reason for using weight instead of real points is to allow an initialRecoveryLevel that * corresponds to less than one hash point. This would be useful if a "cooling off" period is * desirable for the misbehaving tracker clients i.e. given a full weight of 100 hash points, * 0.005 initialRecoverylevel 0 hashpoints at start and rampFactor = 2 means that there will be * one cooling off period before the client is reintroduced into the hash ring (see below). * * <p>Period 1 100 * 0.005 = 0.5 point -> So nothing in the hashring * * <p>Period 2 100 * (0.005 * 2 because of rampfactor) = 1 point -> So we'll add one point in the * hashring * * <p>Another example, given initialRecoveryLevel = 0.01, rampFactor = 2, and default tracker * client hash points of 100, we will increase the hash points in this pattern on successive * update States: 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, etc. -> 1, 2, 4, 8, 16, 32 points in the * hashring and aborting as soon as calls are recorded for that tracker client. * * <p>We also have highWaterMark and lowWaterMark as properties of the DegraderLoadBalancer * strategy so that the strategy can make decisions on whether to start dropping traffic GLOBALLY * across all tracker clients for this cluster. The amount of traffic to drop is controlled by the * globalStepUp and globalStepDown properties, where globalStepUp controls how much the global * drop rate increases per interval, and globalStepDown controls how much the global drop rate * decreases per interval. We only step up the global drop rate when the average cluster latency * is higher than the highWaterMark, and only step down the global drop rate when the average * cluster latency is lower than the global drop rate. * * <p>This code is thread reentrant. Multiple threads can potentially call this concurrently, and * so callers must pass in the DegraderLoadBalancerState that they based their shouldUpdate() call * on. The multiple threads may have different views of the trackerClients latency, but this is ok * as the new state in the end will have only taken one action (either loadbalance or * call-dropping with at most one step). Currently we will not call this concurrently, as * checkUpdateState will control entry to a single thread. * * @param clusterGenerationId * @param oldState * @param config * @param trackerClientUpdaters */ private static DegraderLoadBalancerState doUpdateState( long clusterGenerationId, DegraderLoadBalancerState oldState, DegraderLoadBalancerStrategyConfig config, List<TrackerClientUpdater> trackerClientUpdaters) { debug(_log, "updating state for: ", trackerClientUpdaters); double sumOfClusterLatencies = 0.0; double computedClusterDropSum = 0.0; double computedClusterWeight = 0.0; long totalClusterCallCount = 0; boolean hashRingChanges = false; boolean recoveryMapChanges = false; DegraderLoadBalancerState.Strategy strategy = oldState.getStrategy(); Map<TrackerClient, Double> oldRecoveryMap = oldState.getRecoveryMap(); Map<TrackerClient, Double> newRecoveryMap = new HashMap<TrackerClient, Double>(oldRecoveryMap); double currentOverrideDropRate = oldState.getCurrentOverrideDropRate(); double initialRecoveryLevel = config.getInitialRecoveryLevel(); double ringRampFactor = config.getRingRampFactor(); int pointsPerWeight = config.getPointsPerWeight(); DegraderLoadBalancerState newState; for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) { TrackerClient client = clientUpdater.getTrackerClient(); double averageLatency = client.getDegraderControl(DEFAULT_PARTITION_ID).getLatency(); long callCount = client.getDegraderControl(DEFAULT_PARTITION_ID).getCallCount(); oldState.getPreviousMaxDropRate().put(client, clientUpdater.getMaxDropRate()); sumOfClusterLatencies += averageLatency * callCount; totalClusterCallCount += callCount; double clientDropRate = client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate(); computedClusterDropSum += client.getPartitionWeight(DEFAULT_PARTITION_ID) * clientDropRate; computedClusterWeight += client.getPartitionWeight(DEFAULT_PARTITION_ID); boolean recoveryMapContainsClient = newRecoveryMap.containsKey(client); // The following block of code calculates and updates the maxDropRate if the client had been // fully degraded in the past and has not received any requests since being fully degraded. // To increase the chances of the client receiving a request, we change the maxDropRate, which // influences the maximum value of computedDropRate, which is used to compute the number of // points in the hash ring for the clients. if (callCount == 0) { // if this client is enrolled in the program, decrease the maxDropRate // it is important to note that this excludes clients that haven't gotten traffic // due solely to low volume. if (recoveryMapContainsClient) { // if it's the hash ring's turn to adjust, then adjust the maxDropRate. // Otherwise, we let the call dropping strategy take it's turn, even if // it may do nothing. if (strategy == DegraderLoadBalancerState.Strategy.LOAD_BALANCE) { double oldMaxDropRate = clientUpdater.getMaxDropRate(); double transmissionRate = 1.0 - oldMaxDropRate; if (transmissionRate <= 0.0) { // We use the initialRecoveryLevel to indicate how many points to initially set // the tracker client to when traffic has stopped flowing to this node. transmissionRate = initialRecoveryLevel; } else { transmissionRate *= ringRampFactor; transmissionRate = Math.min(transmissionRate, 1.0); } double newMaxDropRate = 1.0 - transmissionRate; clientUpdater.setMaxDropRate(newMaxDropRate); } recoveryMapChanges = true; } } // else we don't really need to change the client maxDropRate. else if (recoveryMapContainsClient) { // else if the recovery map contains the client and the call count was > 0 // tough love here, once the rehab clients start taking traffic, we // restore their maxDropRate to it's original value, and unenroll them // from the program. // This is safe because the hash ring points are controlled by the // computedDropRate variable, and the call dropping rate is controlled by // the overrideDropRate. The maxDropRate only serves to cap the computedDropRate and // overrideDropRate. // We store the maxDropRate and restore it here because the initialRecoveryLevel could // potentially be higher than what the default maxDropRate allowed. (the maxDropRate doesn't // necessarily have to be 1.0). For instance, if the maxDropRate was 0.99, and the // initialRecoveryLevel was 0.05 then we need to store the old maxDropRate. clientUpdater.setMaxDropRate(newRecoveryMap.get(client)); newRecoveryMap.remove(client); recoveryMapChanges = true; } } double computedClusterDropRate = computedClusterDropSum / computedClusterWeight; debug(_log, "total cluster call count: ", totalClusterCallCount); debug( _log, "computed cluster drop rate for ", trackerClientUpdaters.size(), " nodes: ", computedClusterDropRate); if (oldState.getClusterGenerationId() == clusterGenerationId && totalClusterCallCount <= 0 && !recoveryMapChanges) { // if the cluster has not been called recently (total cluster call count is <= 0) // and we already have a state with the same set of URIs (same cluster generation), // and no clients are in rehab, then don't change anything. debug( _log, "New state is the same as the old state so we're not changing anything. Old state = ", oldState, ", config=", config); return new DegraderLoadBalancerState( oldState, clusterGenerationId, config.getUpdateIntervalMs(), config.getClock().currentTimeMillis()); } // update our overrides. double newCurrentAvgClusterLatency = -1; if (totalClusterCallCount > 0) { newCurrentAvgClusterLatency = sumOfClusterLatencies / totalClusterCallCount; } debug(_log, "average cluster latency: ", newCurrentAvgClusterLatency); // This points map stores how many hash map points to allocate for each tracker client. Map<URI, Integer> points = new HashMap<URI, Integer>(); Map<URI, Integer> oldPointsMap = oldState.getPointsMap(); for (TrackerClientUpdater clientUpdater : trackerClientUpdaters) { TrackerClient client = clientUpdater.getTrackerClient(); double successfulTransmissionWeight; URI clientUri = client.getUri(); // Don't take into account cluster health when calculating the number of points // for each client. This is because the individual clients already take into account // latency, and a successfulTransmissionWeight can and should be made // independent of other nodes in the cluster. Otherwise, one unhealthy client in a small // cluster can take down the entire cluster if the avg latency is too high. // The global drop rate will take into account the cluster latency. High cluster-wide error // rates are not something d2 can address. // // this client's maxDropRate and currentComputedDropRate may have been adjusted if it's in the // rehab program (to gradually send traffic it's way). double dropRate = Math.min( client.getDegraderControl(DEFAULT_PARTITION_ID).getCurrentComputedDropRate(), clientUpdater.getMaxDropRate()); // calculate the weight as the probability of successful transmission to this // node divided by the probability of successful transmission to the entire // cluster successfulTransmissionWeight = client.getPartitionWeight(DEFAULT_PARTITION_ID) * (1.0 - dropRate); // calculate the weight as the probability of a successful transmission to this node // multiplied by the client's self-defined weight. thus, the node's final weight // takes into account both the self defined weight (to account for different // hardware in the same cluster) and the performance of the node (as defined by the // node's degrader). debug(_log, "computed new weight for uri ", clientUri, ": ", successfulTransmissionWeight); // keep track if we're making actual changes to the Hash Ring in this updateState. int newPoints = (int) (successfulTransmissionWeight * pointsPerWeight); if (newPoints == 0) { // We are choking off traffic to this tracker client. // Enroll this tracker client in the recovery program so that // we can make sure it still gets some traffic Double oldMaxDropRate = clientUpdater.getMaxDropRate(); // set the default recovery level. newPoints = (int) (initialRecoveryLevel * pointsPerWeight); // Keep track of the original maxDropRate if (!newRecoveryMap.containsKey(client)) { // keep track of this client, newRecoveryMap.put(client, oldMaxDropRate); clientUpdater.setMaxDropRate(1.0 - initialRecoveryLevel); } } points.put(clientUri, newPoints); if (!oldPointsMap.containsKey(clientUri) || oldPointsMap.get(clientUri) != newPoints) { hashRingChanges = true; } } // Here is where we actually make the decision what compensating action to take, if any. // if the strategy to try is Load balancing and there are new changes to the hash ring, or // if there were changes to the members of the cluster if ((strategy == DegraderLoadBalancerState.Strategy.LOAD_BALANCE && hashRingChanges == true) || // this boolean is there to make sure when we first generate a new state, we always start // with LOAD_BALANCE // strategy oldState.getClusterGenerationId() != clusterGenerationId) { // atomic overwrite // try Call Dropping next time we updateState. newState = new DegraderLoadBalancerState( config.getUpdateIntervalMs(), clusterGenerationId, points, config.getClock().currentTimeMillis(), DegraderLoadBalancerState.Strategy.CALL_DROPPING, currentOverrideDropRate, newCurrentAvgClusterLatency, true, newRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount); logState(oldState, newState, config, trackerClientUpdaters); } else { // time to try call dropping strategy, if necessary. // we are explicitly setting the override drop rate to a number between 0 and 1, inclusive. double newDropLevel = Math.max(0.0, currentOverrideDropRate); // if the cluster is unhealthy (above high water mark) // then increase the override drop rate // // note that the tracker clients in the recovery list are also affected by the global // overrideDropRate, and that their hash ring bump ups will also alternate with this // overrideDropRate adjustment, if necessary. This is fine because the first priority is // to get the cluster latency stabilized if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountHighWaterMark()) { // if we enter here that means we have enough call counts to be confident that our average // latency is // statistically significant if (newCurrentAvgClusterLatency >= config.getHighWaterMark() && currentOverrideDropRate != 1.0) { // if the cluster latency is too high and we can drop more traffic newDropLevel = Math.min(1.0, newDropLevel + config.getGlobalStepUp()); } else if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) { // else if the cluster latency is good and we can reduce the override drop rate newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown()); } // else the averageClusterLatency is between Low and High, or we can't change anything more, // then do not change anything. } else if (newCurrentAvgClusterLatency > 0 && totalClusterCallCount >= config.getMinClusterCallCountLowWaterMark()) { // if we enter here that means, we don't have enough calls to the cluster. We shouldn't // degrade more // but we might recover a bit if the latency is healthy if (newCurrentAvgClusterLatency <= config.getLowWaterMark() && currentOverrideDropRate != 0.0) { // the cluster latency is good and we can reduce the override drop rate newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown()); } // else the averageClusterLatency is somewhat high but since the qps is not that high, we // shouldn't degrade } else { // if we enter here that means we have very low traffic. We should reduce the // overrideDropRate, if possible. // when we have below 1 QPS traffic, we should be pretty confident that the cluster can // handle very low // traffic. Of course this is depending on the MinClusterCallCountLowWaterMark that the // service owner sets. // Another possible cause for this is if we had somehow choked off all traffic to the // cluster, most // likely in a one node/small cluster scenario. Obviously, we can't check latency here, // we'll have to rely on the metric in the next updateState. If the cluster is still having // latency problems, then we will oscillate between off and letting a little traffic // through, // and that is acceptable. If the latency, though high, is deemed acceptable, then the // watermarks can be adjusted to let more traffic through. newDropLevel = Math.max(0.0, newDropLevel - config.getGlobalStepDown()); } if (newDropLevel != currentOverrideDropRate) { overrideClusterDropRate(newDropLevel, trackerClientUpdaters); } // don't change the points map or the recoveryMap, but try load balancing strategy next time. newState = new DegraderLoadBalancerState( config.getUpdateIntervalMs(), clusterGenerationId, oldPointsMap, config.getClock().currentTimeMillis(), DegraderLoadBalancerState.Strategy.LOAD_BALANCE, newDropLevel, newCurrentAvgClusterLatency, true, oldRecoveryMap, oldState.getServiceName(), oldState.getDegraderProperties(), totalClusterCallCount); logState(oldState, newState, config, trackerClientUpdaters); points = oldPointsMap; } // adjust the min call count for each client based on the hash ring reduction and call dropping // fraction. overrideMinCallCount(currentOverrideDropRate, trackerClientUpdaters, points, pointsPerWeight); return newState; }
@Override public TrackerClient getTrackerClient( Request request, RequestContext requestContext, long clusterGenerationId, int partitionId, List<TrackerClient> trackerClients) { if (partitionId != DEFAULT_PARTITION_ID) { throw new UnsupportedOperationException( "Trying to access partition: " + partitionId + "on an unpartitioned cluster"); } debug( _log, "getTrackerClient with generation id ", clusterGenerationId, " on tracker clients: ", clusterGenerationId); if (trackerClients == null || trackerClients.size() == 0) { warn(_log, "getTrackerClient called with null/empty trackerClients, so returning null"); return null; } // only one thread will be allowed to enter updateState. checkUpdateState(clusterGenerationId, trackerClients); URI targetHostUri = KeyMapper.TargetHostHints.getRequestContextTargetHost(requestContext); URI hostHeaderUri = targetHostUri; // no valid target host header was found in the request if (targetHostUri == null) { // Compute the hash code int hashCode = _hashFunction.hash(request); // we operate only on URIs to ensure that we never hold on to an old tracker client // that the cluster manager has removed targetHostUri = _state.getRing().get(hashCode); } else { debug( _log, "Degrader honoring target host header in request, skipping hashing. URI: " + targetHostUri.toString()); } TrackerClient client = null; if (targetHostUri != null) { // These are the clients that were passed in, NOT necessarily the clients that make up the // consistent hash ring! Therefore, this linear scan is the best we can do. for (TrackerClient trackerClient : trackerClients) { if (trackerClient.getUri().equals(targetHostUri)) { client = trackerClient; break; } } if (client == null) { warn( _log, "No client found for " + targetHostUri + (hostHeaderUri == null ? ", degrader load balancer state is inconsistent with cluster manager" : ", target host specified is no longer part of cluster")); } } else { warn(_log, "unable to find a URI to use"); } boolean dropCall = client == null; if (!dropCall) { dropCall = client.getDegrader(DEFAULT_PARTITION_ID).checkDrop(); if (dropCall) { warn(_log, "client's degrader is dropping call for: ", client); } else { debug(_log, "returning client: ", client); } } return (!dropCall) ? client : null; }