blob: 5dea660f309c5b1ed537e41e424cf55d1c4ee2a0 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2014-present Open Networking Foundation
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.cluster.impl;
Ayaka Koshibe16609692014-09-23 12:46:15 -070017
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080018import com.codahale.metrics.Timer;
19import com.codahale.metrics.Timer.Context;
Madan Jampanide003d92015-05-11 17:14:20 -070020import com.google.common.collect.Lists;
Claudine Chiudce08152016-03-09 18:19:28 +000021import com.google.common.collect.Sets;
Madan Jampanide003d92015-05-11 17:14:20 -070022import com.google.common.util.concurrent.Futures;
Ayaka Koshibe16609692014-09-23 12:46:15 -070023import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070024import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070025import org.apache.felix.scr.annotations.Deactivate;
Victor Silvaf2b9d032016-09-19 19:43:20 -030026import org.apache.felix.scr.annotations.Modified;
27import org.apache.felix.scr.annotations.Property;
Ayaka Koshibe16609692014-09-23 12:46:15 -070028import org.apache.felix.scr.annotations.Reference;
29import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070030import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080031import org.onlab.metrics.MetricsService;
Victor Silvaf2b9d032016-09-19 19:43:20 -030032import org.onosproject.cfg.ComponentConfigService;
33import org.onosproject.cfg.ConfigProperty;
Brian O'Connorabafb502014-12-02 22:26:20 -080034import org.onosproject.cluster.ClusterService;
35import org.onosproject.cluster.ControllerNode;
36import org.onosproject.cluster.NodeId;
37import org.onosproject.cluster.RoleInfo;
38import org.onosproject.core.MetricsHelper;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080039import org.onosproject.event.AbstractListenerManager;
Brian O'Connorabafb502014-12-02 22:26:20 -080040import org.onosproject.mastership.MastershipAdminService;
41import org.onosproject.mastership.MastershipEvent;
Jordan Halterman0a2bd452018-06-13 17:24:58 -070042import org.onosproject.mastership.MastershipInfo;
Brian O'Connorabafb502014-12-02 22:26:20 -080043import org.onosproject.mastership.MastershipListener;
44import org.onosproject.mastership.MastershipService;
45import org.onosproject.mastership.MastershipStore;
46import org.onosproject.mastership.MastershipStoreDelegate;
47import org.onosproject.mastership.MastershipTerm;
48import org.onosproject.mastership.MastershipTermService;
49import org.onosproject.net.DeviceId;
50import org.onosproject.net.MastershipRole;
Claudine Chiudce08152016-03-09 18:19:28 +000051import org.onosproject.net.region.Region;
52import org.onosproject.net.region.RegionService;
Jordan Halterman713830d2017-10-07 13:40:44 -070053import org.onosproject.upgrade.UpgradeEvent;
54import org.onosproject.upgrade.UpgradeEventListener;
55import org.onosproject.upgrade.UpgradeService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070056import org.slf4j.Logger;
57
Claudine Chiudce08152016-03-09 18:19:28 +000058import java.util.ArrayList;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080059import java.util.Collection;
Claudine Chiudce08152016-03-09 18:19:28 +000060import java.util.Collections;
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -080061import java.util.Comparator;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080062import java.util.HashMap;
63import java.util.HashSet;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080064import java.util.Iterator;
65import java.util.List;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080066import java.util.Map;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080067import java.util.Set;
Madan Jampanif7536ab2015-05-07 23:23:23 -070068import java.util.concurrent.CompletableFuture;
69
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080070import static com.google.common.base.Preconditions.checkNotNull;
71import static com.google.common.collect.Lists.newArrayList;
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -080072import static java.util.concurrent.CompletableFuture.allOf;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080073import static org.onlab.metrics.MetricsUtil.startTimer;
74import static org.onlab.metrics.MetricsUtil.stopTimer;
Brian O'Connorabafb502014-12-02 22:26:20 -080075import static org.onosproject.net.MastershipRole.MASTER;
Changhoon Yoon541ef712015-05-23 17:18:34 +090076import static org.onosproject.security.AppGuard.checkPermission;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080077import static org.onosproject.security.AppPermission.Type.CLUSTER_READ;
78import static org.onosproject.security.AppPermission.Type.CLUSTER_WRITE;
Thomas Vachuska42e8cce2015-07-29 19:25:18 -070079import static org.slf4j.LoggerFactory.getLogger;
Changhoon Yoonb856b812015-08-10 03:47:19 +090080
Changhoon Yoon541ef712015-05-23 17:18:34 +090081
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -080082/**
83 * Component providing the node-device mastership service.
84 */
tom4a5d1712014-09-23 17:49:39 -070085@Component(immediate = true)
86@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070087public class MastershipManager
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -080088 extends AbstractListenerManager<MastershipEvent, MastershipListener>
89 implements MastershipService, MastershipAdminService, MastershipTermService,
90 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070091
92 private static final String NODE_ID_NULL = "Node ID cannot be null";
93 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
94 private static final String ROLE_NULL = "Mastership role cannot be null";
95
96 private final Logger log = getLogger(getClass());
97
alshabib339a3d92014-09-26 17:54:32 -070098 private final MastershipStoreDelegate delegate = new InternalDelegate();
Jordan Halterman713830d2017-10-07 13:40:44 -070099 private final UpgradeEventListener upgradeEventListener = new InternalUpgradeEventListener();
Ayaka Koshibe16609692014-09-23 12:46:15 -0700100
101 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
102 protected MastershipStore store;
103
104 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -0700105 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -0700106
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800107 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
108 protected MetricsService metricsService;
109
Claudine Chiudce08152016-03-09 18:19:28 +0000110 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
111 protected RegionService regionService;
112
Victor Silvaf2b9d032016-09-19 19:43:20 -0300113 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
114 protected ComponentConfigService cfgService;
115
Jordan Halterman713830d2017-10-07 13:40:44 -0700116 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
117 protected UpgradeService upgradeService;
118
Madan Jampanic6e574f2015-05-29 13:41:52 -0700119 private NodeId localNodeId;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800120 private Timer requestRoleTimer;
Victor Silvaf2b9d032016-09-19 19:43:20 -0300121
122 static final boolean DEFAULT_USE_REGION_FOR_BALANCE_ROLES = false;
123 @Property(name = "useRegionForBalanceRoles", boolValue = DEFAULT_USE_REGION_FOR_BALANCE_ROLES,
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800124 label = "Use Regions for balancing roles")
Jordan Halterman713830d2017-10-07 13:40:44 -0700125 protected boolean useRegionForBalanceRoles;
126
127 private static final boolean DEFAULT_REBALANCE_ROLES_ON_UPGRADE = true;
128 @Property(name = "rebalanceRolesOnUpgrade",
129 boolValue = DEFAULT_REBALANCE_ROLES_ON_UPGRADE,
130 label = "Automatically rebalance roles following an upgrade")
Jordan Halterman0676c972017-11-05 11:49:23 -0800131 protected boolean rebalanceRolesOnUpgrade = DEFAULT_REBALANCE_ROLES_ON_UPGRADE;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700132
Ayaka Koshibe16609692014-09-23 12:46:15 -0700133 @Activate
134 public void activate() {
Victor Silvaf2b9d032016-09-19 19:43:20 -0300135 cfgService.registerProperties(getClass());
136 modified();
137
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800138 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
Madan Jampanic6e574f2015-05-29 13:41:52 -0700139 localNodeId = clusterService.getLocalNode().id();
Jordan Halterman61aeb352017-10-18 16:22:17 -0700140 upgradeService.addListener(upgradeEventListener);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700141 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
alshabib339a3d92014-09-26 17:54:32 -0700142 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700143 log.info("Started");
144 }
145
Victor Silvaf2b9d032016-09-19 19:43:20 -0300146 @Modified
147 public void modified() {
148 Set<ConfigProperty> configProperties = cfgService.getProperties(getClass().getCanonicalName());
149 for (ConfigProperty property : configProperties) {
Jon Halla3fcf672017-03-28 16:53:22 -0700150 if ("useRegionForBalanceRoles".equals(property.name())) {
Victor Silvaf2b9d032016-09-19 19:43:20 -0300151 useRegionForBalanceRoles = property.asBoolean();
152 }
153 }
154 }
155
Ayaka Koshibe16609692014-09-23 12:46:15 -0700156 @Deactivate
157 public void deactivate() {
158 eventDispatcher.removeSink(MastershipEvent.class);
Jordan Halterman61aeb352017-10-18 16:22:17 -0700159 upgradeService.removeListener(upgradeEventListener);
alshabib339a3d92014-09-26 17:54:32 -0700160 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700161 log.info("Stopped");
Victor Silvaf2b9d032016-09-19 19:43:20 -0300162 cfgService.unregisterProperties(getClass(), false);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700163 }
164
Ayaka Koshibe16609692014-09-23 12:46:15 -0700165 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700166 public CompletableFuture<Void> setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
Ayaka Koshibe16609692014-09-23 12:46:15 -0700167 checkNotNull(nodeId, NODE_ID_NULL);
168 checkNotNull(deviceId, DEVICE_ID_NULL);
169 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700170
Madan Jampanif7536ab2015-05-07 23:23:23 -0700171 CompletableFuture<MastershipEvent> eventFuture = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700172
173 switch (role) {
174 case MASTER:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700175 eventFuture = store.setMaster(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700176 break;
177 case STANDBY:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700178 eventFuture = store.setStandby(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700179 break;
180 case NONE:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700181 eventFuture = store.relinquishRole(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700182 break;
183 default:
184 log.info("Unknown role; ignoring");
Madan Jampanide003d92015-05-11 17:14:20 -0700185 return CompletableFuture.completedFuture(null);
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700186 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700187
Madan Jampanic6e574f2015-05-29 13:41:52 -0700188 return eventFuture.thenAccept(this::post)
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800189 .thenApply(v -> null);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700190 }
191
192 @Override
tomb41d1ac2014-09-24 01:51:24 -0700193 public MastershipRole getLocalRole(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900194 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900195
tomb41d1ac2014-09-24 01:51:24 -0700196 checkNotNull(deviceId, DEVICE_ID_NULL);
197 return store.getRole(clusterService.getLocalNode().id(), deviceId);
198 }
199
200 @Override
Madan Jampanic6e574f2015-05-29 13:41:52 -0700201 public CompletableFuture<Void> relinquishMastership(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900202 checkPermission(CLUSTER_WRITE);
Madan Jampanic6e574f2015-05-29 13:41:52 -0700203 return store.relinquishRole(localNodeId, deviceId)
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800204 .thenAccept(this::post)
205 .thenApply(v -> null);
tomb41d1ac2014-09-24 01:51:24 -0700206 }
207
208 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700209 public CompletableFuture<MastershipRole> requestRoleFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900210 checkPermission(CLUSTER_WRITE);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900211
tomb41d1ac2014-09-24 01:51:24 -0700212 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800213 final Context timer = startTimer(requestRoleTimer);
Madan Jampanide003d92015-05-11 17:14:20 -0700214 return store.requestRole(deviceId).whenComplete((result, error) -> stopTimer(timer));
215
tomb41d1ac2014-09-24 01:51:24 -0700216 }
217
218 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700219 public NodeId getMasterFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900220 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900221
Ayaka Koshibe16609692014-09-23 12:46:15 -0700222 checkNotNull(deviceId, DEVICE_ID_NULL);
223 return store.getMaster(deviceId);
224 }
225
226 @Override
227 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900228 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900229
Ayaka Koshibe16609692014-09-23 12:46:15 -0700230 checkNotNull(nodeId, NODE_ID_NULL);
231 return store.getDevices(nodeId);
232 }
233
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700234 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700235 public RoleInfo getNodesFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900236 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900237
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700238 checkNotNull(deviceId, DEVICE_ID_NULL);
239 return store.getNodes(deviceId);
240 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700241
242 @Override
Jordan Halterman0a2bd452018-06-13 17:24:58 -0700243 public MastershipInfo getMastershipFor(DeviceId deviceId) {
244 checkPermission(CLUSTER_READ);
245 checkNotNull(deviceId, DEVICE_ID_NULL);
246 return store.getMastership(deviceId);
247 }
248
249 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800250 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900251 checkPermission(CLUSTER_READ);
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800252 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700253 }
254
Ayaka Koshibe16609692014-09-23 12:46:15 -0700255 @Override
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800256 public MetricsService metricsService() {
257 return metricsService;
258 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700259
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800260 @Override
261 public void balanceRoles() {
262 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800263 Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800264 Set<DeviceId> orphanedDevices = Sets.newHashSet();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800265 int deviceCount = 0;
266
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800267 // Create buckets reflecting current ownership; do this irrespective of
268 // whether the node is active.
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800269 for (ControllerNode node : nodes) {
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800270 Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800271 if (clusterService.getState(node.id()).isActive()) {
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800272 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800273 deviceCount += devicesOf.size();
274 controllerDevices.put(node, devicesOf);
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800275 } else if (!devicesOf.isEmpty()) {
276 log.warn("Inactive node {} has {} orphaned devices.", node.id(), devicesOf.size());
277 orphanedDevices.addAll(getDevicesOf(node.id()));
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800278 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800279 }
280
Claudine Chiudce08152016-03-09 18:19:28 +0000281 if (useRegionForBalanceRoles && balanceRolesUsingRegions(controllerDevices)) {
282 return;
283 }
284
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800285 List<CompletableFuture<Void>> balanceBucketsFutures = Lists.newLinkedList();
Claudine Chiudce08152016-03-09 18:19:28 +0000286
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800287 // First re-balance the buckets until they are roughly even.
288 balanceControllerNodes(controllerDevices, deviceCount, balanceBucketsFutures);
289
290 // Then attempt to distribute any orphaned devices among the buckets.
291 distributeOrphanedDevices(controllerDevices, orphanedDevices, balanceBucketsFutures);
292
293 CompletableFuture<Void> balanceRolesFuture =
294 allOf(balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
Claudine Chiudce08152016-03-09 18:19:28 +0000295
296 Futures.getUnchecked(balanceRolesFuture);
297 }
298
299 /**
300 * Balances the nodes specified in controllerDevices.
301 *
302 * @param controllerDevices controller nodes to devices map
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800303 * @param deviceCount number of devices mastered by controller nodes
304 * @param futures list of setRole futures for "moved" devices
Claudine Chiudce08152016-03-09 18:19:28 +0000305 */
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800306 private void balanceControllerNodes(Map<ControllerNode, Set<DeviceId>> controllerDevices,
307 int deviceCount,
308 List<CompletableFuture<Void>> futures) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800309 // Now re-balance the buckets until they are roughly even.
310 int rounds = controllerDevices.keySet().size();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800311 for (int i = 0; i < rounds; i++) {
312 // Iterate over the buckets and find the smallest and the largest.
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800313 ControllerNode smallest = findBucket(true, controllerDevices);
314 ControllerNode largest = findBucket(false, controllerDevices);
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800315 futures.add(balanceBuckets(smallest, largest, controllerDevices, deviceCount));
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800316 }
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800317 }
318
319 /**
320 * Uses the set of orphaned devices to even out the load among the controllers.
321 *
322 * @param controllerDevices controller nodes to devices map
323 * @param orphanedDevices set of orphaned devices without an active master
324 * @param futures list of completable future to track the progress of the balancing operation
325 */
326 private void distributeOrphanedDevices(Map<ControllerNode, Set<DeviceId>> controllerDevices,
327 Set<DeviceId> orphanedDevices,
328 List<CompletableFuture<Void>> futures) {
329 // Now re-distribute the orphaned devices into buckets until they are roughly even.
330 while (!orphanedDevices.isEmpty()) {
331 // Iterate over the buckets and find the smallest bucket.
332 ControllerNode smallest = findBucket(true, controllerDevices);
333 changeMastership(smallest, controllerDevices.get(smallest),
334 orphanedDevices, 1, futures);
335 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800336 }
337
Claudine Chiudce08152016-03-09 18:19:28 +0000338 /**
339 * Finds node with the minimum/maximum devices from a list of nodes.
340 *
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800341 * @param min true: minimum, false: maximum
Claudine Chiudce08152016-03-09 18:19:28 +0000342 * @param controllerDevices controller nodes to devices map
343 * @return controller node with minimum/maximum devices
344 */
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800345
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800346 private ControllerNode findBucket(boolean min,
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800347 Map<ControllerNode, Set<DeviceId>> controllerDevices) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800348 int xSize = min ? Integer.MAX_VALUE : -1;
349 ControllerNode xNode = null;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800350 for (ControllerNode node : controllerDevices.keySet()) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800351 int size = controllerDevices.get(node).size();
352 if ((min && size < xSize) || (!min && size > xSize)) {
353 xSize = size;
354 xNode = node;
355 }
356 }
357 return xNode;
358 }
359
Claudine Chiudce08152016-03-09 18:19:28 +0000360 /**
361 * Balance the node buckets by moving devices from largest to smallest node.
362 *
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800363 * @param smallest node that is master of the smallest number of devices
364 * @param largest node that is master of the largest number of devices
Claudine Chiudce08152016-03-09 18:19:28 +0000365 * @param controllerDevices controller nodes to devices map
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800366 * @param deviceCount number of devices mastered by controller nodes
Claudine Chiudce08152016-03-09 18:19:28 +0000367 * @return list of setRole futures for "moved" devices
368 */
Madan Jampanide003d92015-05-11 17:14:20 -0700369 private CompletableFuture<Void> balanceBuckets(ControllerNode smallest, ControllerNode largest,
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800370 Map<ControllerNode, Set<DeviceId>> controllerDevices,
371 int deviceCount) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800372 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
373 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
374 int bucketCount = controllerDevices.keySet().size();
375
376 int delta = (maxBucket.size() - minBucket.size()) / 2;
377 delta = Math.min(deviceCount / bucketCount, delta);
378
Madan Jampanide003d92015-05-11 17:14:20 -0700379 List<CompletableFuture<Void>> setRoleFutures = Lists.newLinkedList();
380
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800381 if (delta > 0) {
382 log.info("Attempting to move {} nodes from {} to {}...", delta,
383 largest.id(), smallest.id());
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800384 changeMastership(smallest, minBucket, maxBucket, delta, setRoleFutures);
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800385 }
Madan Jampanide003d92015-05-11 17:14:20 -0700386
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800387 return allOf(setRoleFutures.toArray(new CompletableFuture[setRoleFutures.size()]));
388 }
389
390 /**
391 * Changes mastership for the specified number of devices in the given source
392 * bucket to the specified node and ads those devices to the given target
393 * bucket. Also adds the futures for tracking the role reassignment progress.
394 *
395 * @param toNode target controller node
396 * @param toBucket target bucket
397 * @param fromBucket source bucket
398 * @param count number of devices
399 * @param futures futures for tracking operation progress
400 */
401 private void changeMastership(ControllerNode toNode, Collection<DeviceId> toBucket,
402 Collection<DeviceId> fromBucket, int count,
403 List<CompletableFuture<Void>> futures) {
404 int i = 0;
405 Iterator<DeviceId> it = fromBucket.iterator();
406 while (it.hasNext() && i < count) {
407 DeviceId deviceId = it.next();
408 log.info("Setting {} as the master for {}", toNode.id(), deviceId);
409 futures.add(setRole(toNode.id(), deviceId, MASTER));
410 toBucket.add(deviceId);
411 it.remove();
412 i++;
413 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800414 }
415
Claudine Chiudce08152016-03-09 18:19:28 +0000416 /**
417 * Balances the nodes considering Region information.
418 *
419 * @param allControllerDevices controller nodes to devices map
420 * @return true: nodes balanced; false: nodes not balanced
421 */
422 private boolean balanceRolesUsingRegions(Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
423 Set<Region> regions = regionService.getRegions();
424 if (regions.isEmpty()) {
425 return false; // no balancing was done using regions.
426 }
427
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800428 // Handle nodes belonging to regions
Claudine Chiudce08152016-03-09 18:19:28 +0000429 Set<ControllerNode> nodesInRegions = Sets.newHashSet();
430 for (Region region : regions) {
431 Map<ControllerNode, Set<DeviceId>> activeRegionControllers =
432 balanceRolesInRegion(region, allControllerDevices);
433 nodesInRegions.addAll(activeRegionControllers.keySet());
434 }
435
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800436 // Handle nodes not belonging to any region
Claudine Chiudce08152016-03-09 18:19:28 +0000437 Set<ControllerNode> nodesNotInRegions = Sets.difference(allControllerDevices.keySet(), nodesInRegions);
438 if (!nodesNotInRegions.isEmpty()) {
439 int deviceCount = 0;
440 Map<ControllerNode, Set<DeviceId>> controllerDevicesNotInRegions = new HashMap<>();
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800441 for (ControllerNode controllerNode : nodesNotInRegions) {
Claudine Chiudce08152016-03-09 18:19:28 +0000442 controllerDevicesNotInRegions.put(controllerNode, allControllerDevices.get(controllerNode));
443 deviceCount += allControllerDevices.get(controllerNode).size();
444 }
445 // Now re-balance the buckets until they are roughly even.
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800446 List<CompletableFuture<Void>> balanceBucketsFutures = Lists.newArrayList();
447 balanceControllerNodes(controllerDevicesNotInRegions, deviceCount, balanceBucketsFutures);
Claudine Chiudce08152016-03-09 18:19:28 +0000448
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800449 CompletableFuture<Void> balanceRolesFuture = allOf(
Claudine Chiudce08152016-03-09 18:19:28 +0000450 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
451
452 Futures.getUnchecked(balanceRolesFuture);
453 }
454 return true; // balancing was done using regions.
455 }
456
457 /**
458 * Balances the nodes in specified region.
459 *
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800460 * @param region region in which nodes are to be balanced
Claudine Chiudce08152016-03-09 18:19:28 +0000461 * @param allControllerDevices controller nodes to devices map
462 * @return controller nodes that were balanced
463 */
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800464 private Map<ControllerNode, Set<DeviceId>>
465 balanceRolesInRegion(Region region,
466 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
Claudine Chiudce08152016-03-09 18:19:28 +0000467
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800468 // Retrieve all devices associated with specified region
Claudine Chiudce08152016-03-09 18:19:28 +0000469 Set<DeviceId> devicesInRegion = regionService.getRegionDevices(region.id());
470 log.info("Region {} has {} devices.", region.id(), devicesInRegion.size());
471 if (devicesInRegion.isEmpty()) {
472 return new HashMap<>(); // no devices in this region, so nothing to balance.
473 }
474
475 List<Set<NodeId>> mastersList = region.masters();
476 log.info("Region {} has {} sets of masters.", region.id(), mastersList.size());
477 if (mastersList.isEmpty()) {
478 // TODO handle devices that belong to a region, which has no masters defined
479 return new HashMap<>(); // for now just leave devices alone
480 }
481
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800482 // Get the region's preferred set of masters
Claudine Chiudce08152016-03-09 18:19:28 +0000483 Set<DeviceId> devicesInMasters = Sets.newHashSet();
484 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices =
485 getRegionsPreferredMasters(region, devicesInMasters, allControllerDevices);
486
487 // Now re-balance the buckets until they are roughly even.
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800488 List<CompletableFuture<Void>> balanceBucketsFutures = Lists.newArrayList();
489 balanceControllerNodes(regionalControllerDevices, devicesInMasters.size(), balanceBucketsFutures);
Claudine Chiudce08152016-03-09 18:19:28 +0000490
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800491 // Handle devices that are not currently mastered by the master node set
Claudine Chiudce08152016-03-09 18:19:28 +0000492 Set<DeviceId> devicesNotMasteredWithControllers = Sets.difference(devicesInRegion, devicesInMasters);
493 if (!devicesNotMasteredWithControllers.isEmpty()) {
494 // active controllers in master node set are already balanced, just
495 // assign device mastership in sequence
496 List<ControllerNode> sorted = new ArrayList<>(regionalControllerDevices.keySet());
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800497 Collections.sort(sorted, Comparator.comparingInt(o -> (regionalControllerDevices.get(o)).size()));
Claudine Chiudce08152016-03-09 18:19:28 +0000498 int deviceIndex = 0;
499 for (DeviceId deviceId : devicesNotMasteredWithControllers) {
500 ControllerNode cnode = sorted.get(deviceIndex % sorted.size());
501 balanceBucketsFutures.add(setRole(cnode.id(), deviceId, MASTER));
502 regionalControllerDevices.get(cnode).add(deviceId);
503 deviceIndex++;
504 }
505 }
506
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800507 CompletableFuture<Void> balanceRolesFuture =
508 allOf(balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
Claudine Chiudce08152016-03-09 18:19:28 +0000509
510 Futures.getUnchecked(balanceRolesFuture);
511
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800512 // Update the map before returning
Claudine Chiudce08152016-03-09 18:19:28 +0000513 regionalControllerDevices.forEach((controllerNode, deviceIds) -> {
514 regionalControllerDevices.put(controllerNode, new HashSet<>(getDevicesOf(controllerNode.id())));
515 });
516
517 return regionalControllerDevices;
518 }
519
520 /**
521 * Get region's preferred set of master nodes - the first master node set that has at
522 * least one active node.
523 *
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800524 * @param region region for which preferred set of master nodes is requested
525 * @param devicesInMasters device set to track devices in preferred set of master nodes
Claudine Chiudce08152016-03-09 18:19:28 +0000526 * @param allControllerDevices controller nodes to devices map
527 * @return region's preferred master nodes (and devices that use them as masters)
528 */
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800529 private Map<ControllerNode, Set<DeviceId>>
530 getRegionsPreferredMasters(Region region,
531 Set<DeviceId> devicesInMasters,
532 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
Claudine Chiudce08152016-03-09 18:19:28 +0000533 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices = new HashMap<>();
534 int listIndex = 0;
Thomas Vachuskaf2e09cb2017-11-06 15:17:06 -0800535 for (Set<NodeId> masterSet : region.masters()) {
Claudine Chiudce08152016-03-09 18:19:28 +0000536 log.info("Region {} masters set {} has {} nodes.",
537 region.id(), listIndex, masterSet.size());
538 if (masterSet.isEmpty()) { // nothing on this level
539 listIndex++;
540 continue;
541 }
542 // Create buckets reflecting current ownership.
543 for (NodeId nodeId : masterSet) {
Michele Santuari6ebb36e2016-03-28 10:12:04 -0700544 if (clusterService.getState(nodeId).isActive()) {
Claudine Chiudce08152016-03-09 18:19:28 +0000545 ControllerNode controllerNode = clusterService.getNode(nodeId);
546 Set<DeviceId> devicesOf = new HashSet<>(allControllerDevices.get(controllerNode));
547 regionalControllerDevices.put(controllerNode, devicesOf);
548 devicesInMasters.addAll(devicesOf);
549 log.info("Active Node {} has {} devices.", nodeId, devicesOf.size());
550 }
551 }
552 if (!regionalControllerDevices.isEmpty()) {
553 break; // now have a set of >0 active controllers
554 }
555 listIndex++; // keep on looking
556 }
557 return regionalControllerDevices;
558 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800559
alshabib339a3d92014-09-26 17:54:32 -0700560 public class InternalDelegate implements MastershipStoreDelegate {
alshabib339a3d92014-09-26 17:54:32 -0700561 @Override
562 public void notify(MastershipEvent event) {
Thomas Vachuska42e8cce2015-07-29 19:25:18 -0700563 post(event);
alshabib339a3d92014-09-26 17:54:32 -0700564 }
alshabib339a3d92014-09-26 17:54:32 -0700565 }
566
Jordan Halterman713830d2017-10-07 13:40:44 -0700567 private class InternalUpgradeEventListener implements UpgradeEventListener {
568 @Override
569 public void event(UpgradeEvent event) {
570 if (rebalanceRolesOnUpgrade &&
571 (event.type() == UpgradeEvent.Type.COMMITTED || event.type() == UpgradeEvent.Type.RESET)) {
572 balanceRoles();
573 }
574 }
575 }
576
Ayaka Koshibe16609692014-09-23 12:46:15 -0700577}