/* * * Copyright 2013 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.netflix.loadbalancer; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.netflix.client.ClientFactory; import com.netflix.config.DynamicBooleanProperty; import com.netflix.config.DynamicDoubleProperty; import com.netflix.config.DynamicPropertyFactory; import com.netflix.servo.monitor.Monitors; import com.netflix.servo.monitor.Stopwatch; import com.netflix.servo.monitor.Timer; /** * Load balancer that can avoid a zone as a whole when choosing server. *

* The key metric used to measure the zone condition is Average Active Requests, which is aggregated per rest client per zone. It is the total outstanding requests in a zone divided by number of available targeted instances (excluding circuit breaker tripped instances). This metric is very effective when timeout occurs slowly on a bad zone.

The LoadBalancer will calculate and examine zone stats of all available zones. If the Average Active Requests for any zone has reached a configured threshold, this zone will be dropped from the active server list. In case more than one zone has reached the threshold, the zone with the most active requests per server will be dropped. Once the the worst zone is dropped, a zone will be chosen among the rest with the probability proportional to its number of instances. A server will be returned from the chosen zone with a given Rule (A Rule is a load balancing strategy, for example {@link AvailabilityFilteringRule}) For each request, the steps above will be repeated. That is to say, each zone related load balancing decisions are made at real time with the up-to-date statistics aiding the choice. * @author awang * * @param */ public class ZoneAwareLoadBalancer extends DynamicServerListLoadBalancer { private ConcurrentHashMap balancers = new ConcurrentHashMap(); private static final Logger logger = LoggerFactory.getLogger(ZoneAwareLoadBalancer.class); private static final Random random = new Random(); private volatile DynamicDoubleProperty triggeringLoad; private volatile DynamicDoubleProperty triggeringBlackoutPercentage; private static final DynamicBooleanProperty ENABLED = DynamicPropertyFactory.getInstance().getBooleanProperty("ZoneAwareNIWSDiscoveryLoadBalancer.enabled", true); private Timer chooseServerTimer; String monitorId; void setUpServerList(List upServerList) { this.upServerList = upServerList; } @Override protected void setServerListForZones(Map> zoneServersMap) { super.setServerListForZones(zoneServersMap); for (Map.Entry> entry: zoneServersMap.entrySet()) { String zone = entry.getKey().toLowerCase(); getLoadBalancer(zone).setServersList(entry.getValue()); } } private Map createSnapshot() { Map map = new HashMap(); LoadBalancerStats lbStats = getLoadBalancerStats(); for (String zone: lbStats.getAvailableZones()) { ZoneSnapshot snapshot = lbStats.getZoneSnapshot(zone); map.put(zone, snapshot); } return map; } @Override public Server chooseServer(Object key) { Stopwatch stopWatch = chooseServerTimer.start(); try { if (!ENABLED.get() || getLoadBalancerStats().getAvailableZones().size() <= 1) { logger.debug("Zone aware logic disabled or there is only one zone"); return super.chooseServer(key); } Server server = null; try { LoadBalancerStats lbStats = getLoadBalancerStats(); Map zoneSnapshot = ZoneAvoidanceRule.createSnapshot(lbStats); logger.debug("Zone snapshots: {}", zoneSnapshot); if (triggeringLoad == null) { triggeringLoad = DynamicPropertyFactory.getInstance().getDoubleProperty( "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".triggeringLoadPerServerThreshold", 0.2d); } if (triggeringBlackoutPercentage == null) { triggeringBlackoutPercentage = DynamicPropertyFactory.getInstance().getDoubleProperty( "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".avoidZoneWithBlackoutPercetage", 0.99999d); } Set availableZones = ZoneAvoidanceRule.getAvailableZones(zoneSnapshot, triggeringLoad.get(), triggeringBlackoutPercentage.get()); logger.debug("Available zones: {}", availableZones); if (availableZones != null && availableZones.size() < zoneSnapshot.keySet().size()) { String zone = ZoneAvoidanceRule.randomChooseZone(zoneSnapshot, availableZones); logger.debug("Zone chosen: {}", zone); if (zone != null) { BaseLoadBalancer zoneLoadBalancer = getLoadBalancer(zone); server = zoneLoadBalancer.chooseServer(key); } } } catch (Throwable e) { logger.error("Unexpected exception when choosing server using zone aware logic", e); } if (server != null) { return server; } else { logger.debug("Zone avoidance logic is not invoked."); return super.chooseServer(key); } } finally { stopWatch.stop(); } } private BaseLoadBalancer getLoadBalancer(String zone) { zone = zone.toLowerCase(); BaseLoadBalancer loadBalancer = balancers.get(zone); if (loadBalancer == null) { // We need to create rule object for load balancer for each zone IRule rule = cloneRule(this.getRule()); loadBalancer = new BaseLoadBalancer(this.getName() + "_" + zone, rule, this.getLoadBalancerStats()); BaseLoadBalancer prev = balancers.putIfAbsent(zone, loadBalancer); if (prev != null) { loadBalancer = prev; } } return loadBalancer; } private IRule cloneRule(IRule toClone) { IRule rule; if (toClone == null) { rule = new AvailabilityFilteringRule(); } else { String ruleClass = toClone.getClass().getName(); try { rule = (IRule) ClientFactory.instantiateInstanceWithClientConfig(ruleClass, this.getClientConfig()); } catch (Exception e) { throw new RuntimeException("Unexpected exception creating rule for ZoneAwareLoadBalancer", e); } } return rule; } @Override public void setRule(IRule rule) { super.setRule(rule); if (balancers != null) { for (String zone: balancers.keySet()) { balancers.get(zone).setRule(cloneRule(rule)); } } } @Override public void init() { chooseServerTimer = Monitors.newTimer("ZoneAwareLoadBalancer_" + getName() + "_chooseServerTimer", TimeUnit.MILLISECONDS); super.init(); } }