blob: 1a4fc5f020c4d7ad3a8d68d87d2188d4902aebaa [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Brian O'Connora09fe5b2017-08-03 21:12:30 -07002 * Copyright 2014-present Open Networking Foundation
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.cluster.impl;
Ayaka Koshibe16609692014-09-23 12:46:15 -070017
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080018import com.codahale.metrics.Timer;
19import com.codahale.metrics.Timer.Context;
Madan Jampanide003d92015-05-11 17:14:20 -070020import com.google.common.collect.Lists;
Claudine Chiudce08152016-03-09 18:19:28 +000021import com.google.common.collect.Sets;
Madan Jampanide003d92015-05-11 17:14:20 -070022import com.google.common.util.concurrent.Futures;
Ayaka Koshibe16609692014-09-23 12:46:15 -070023import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070024import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070025import org.apache.felix.scr.annotations.Deactivate;
Victor Silvaf2b9d032016-09-19 19:43:20 -030026import org.apache.felix.scr.annotations.Modified;
27import org.apache.felix.scr.annotations.Property;
Ayaka Koshibe16609692014-09-23 12:46:15 -070028import org.apache.felix.scr.annotations.Reference;
29import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070030import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080031import org.onlab.metrics.MetricsService;
Victor Silvaf2b9d032016-09-19 19:43:20 -030032import org.onosproject.cfg.ComponentConfigService;
33import org.onosproject.cfg.ConfigProperty;
Brian O'Connorabafb502014-12-02 22:26:20 -080034import org.onosproject.cluster.ClusterService;
35import org.onosproject.cluster.ControllerNode;
36import org.onosproject.cluster.NodeId;
37import org.onosproject.cluster.RoleInfo;
38import org.onosproject.core.MetricsHelper;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080039import org.onosproject.event.AbstractListenerManager;
Brian O'Connorabafb502014-12-02 22:26:20 -080040import org.onosproject.mastership.MastershipAdminService;
41import org.onosproject.mastership.MastershipEvent;
42import org.onosproject.mastership.MastershipListener;
43import org.onosproject.mastership.MastershipService;
44import org.onosproject.mastership.MastershipStore;
45import org.onosproject.mastership.MastershipStoreDelegate;
46import org.onosproject.mastership.MastershipTerm;
47import org.onosproject.mastership.MastershipTermService;
48import org.onosproject.net.DeviceId;
49import org.onosproject.net.MastershipRole;
Claudine Chiudce08152016-03-09 18:19:28 +000050import org.onosproject.net.region.Region;
51import org.onosproject.net.region.RegionService;
Jordan Halterman713830d2017-10-07 13:40:44 -070052import org.onosproject.upgrade.UpgradeEvent;
53import org.onosproject.upgrade.UpgradeEventListener;
54import org.onosproject.upgrade.UpgradeService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070055import org.slf4j.Logger;
56
Claudine Chiudce08152016-03-09 18:19:28 +000057import java.util.ArrayList;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080058import java.util.Collection;
Claudine Chiudce08152016-03-09 18:19:28 +000059import java.util.Collections;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080060import java.util.HashMap;
61import java.util.HashSet;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080062import java.util.Iterator;
63import java.util.List;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080064import java.util.Map;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080065import java.util.Set;
Madan Jampanif7536ab2015-05-07 23:23:23 -070066import java.util.concurrent.CompletableFuture;
67
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080068import static com.google.common.base.Preconditions.checkNotNull;
69import static com.google.common.collect.Lists.newArrayList;
70import static org.onlab.metrics.MetricsUtil.startTimer;
71import static org.onlab.metrics.MetricsUtil.stopTimer;
Brian O'Connorabafb502014-12-02 22:26:20 -080072import static org.onosproject.net.MastershipRole.MASTER;
Changhoon Yoon541ef712015-05-23 17:18:34 +090073import static org.onosproject.security.AppGuard.checkPermission;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080074import static org.onosproject.security.AppPermission.Type.CLUSTER_READ;
75import static org.onosproject.security.AppPermission.Type.CLUSTER_WRITE;
Thomas Vachuska42e8cce2015-07-29 19:25:18 -070076import static org.slf4j.LoggerFactory.getLogger;
Changhoon Yoonb856b812015-08-10 03:47:19 +090077
Changhoon Yoon541ef712015-05-23 17:18:34 +090078
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080079
tom4a5d1712014-09-23 17:49:39 -070080@Component(immediate = true)
81@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070082public class MastershipManager
Thomas Vachuska42e8cce2015-07-29 19:25:18 -070083 extends AbstractListenerManager<MastershipEvent, MastershipListener>
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080084 implements MastershipService, MastershipAdminService, MastershipTermService,
85 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070086
87 private static final String NODE_ID_NULL = "Node ID cannot be null";
88 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
89 private static final String ROLE_NULL = "Mastership role cannot be null";
90
91 private final Logger log = getLogger(getClass());
92
alshabib339a3d92014-09-26 17:54:32 -070093 private final MastershipStoreDelegate delegate = new InternalDelegate();
Jordan Halterman713830d2017-10-07 13:40:44 -070094 private final UpgradeEventListener upgradeEventListener = new InternalUpgradeEventListener();
Ayaka Koshibe16609692014-09-23 12:46:15 -070095
96 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
97 protected MastershipStore store;
98
99 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -0700100 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -0700101
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800102 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
103 protected MetricsService metricsService;
104
Claudine Chiudce08152016-03-09 18:19:28 +0000105 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
106 protected RegionService regionService;
107
Victor Silvaf2b9d032016-09-19 19:43:20 -0300108 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
109 protected ComponentConfigService cfgService;
110
Jordan Halterman713830d2017-10-07 13:40:44 -0700111 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
112 protected UpgradeService upgradeService;
113
Madan Jampanic6e574f2015-05-29 13:41:52 -0700114 private NodeId localNodeId;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800115 private Timer requestRoleTimer;
Victor Silvaf2b9d032016-09-19 19:43:20 -0300116
117 static final boolean DEFAULT_USE_REGION_FOR_BALANCE_ROLES = false;
118 @Property(name = "useRegionForBalanceRoles", boolValue = DEFAULT_USE_REGION_FOR_BALANCE_ROLES,
119 label = "Use Regions for balancing roles")
Jordan Halterman713830d2017-10-07 13:40:44 -0700120 protected boolean useRegionForBalanceRoles;
121
122 private static final boolean DEFAULT_REBALANCE_ROLES_ON_UPGRADE = true;
123 @Property(name = "rebalanceRolesOnUpgrade",
124 boolValue = DEFAULT_REBALANCE_ROLES_ON_UPGRADE,
125 label = "Automatically rebalance roles following an upgrade")
Jordan Halterman0676c972017-11-05 11:49:23 -0800126 protected boolean rebalanceRolesOnUpgrade = DEFAULT_REBALANCE_ROLES_ON_UPGRADE;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700127
Ayaka Koshibe16609692014-09-23 12:46:15 -0700128 @Activate
129 public void activate() {
Victor Silvaf2b9d032016-09-19 19:43:20 -0300130 cfgService.registerProperties(getClass());
131 modified();
132
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800133 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
Madan Jampanic6e574f2015-05-29 13:41:52 -0700134 localNodeId = clusterService.getLocalNode().id();
Jordan Halterman61aeb352017-10-18 16:22:17 -0700135 upgradeService.addListener(upgradeEventListener);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700136 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
alshabib339a3d92014-09-26 17:54:32 -0700137 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700138 log.info("Started");
139 }
140
Victor Silvaf2b9d032016-09-19 19:43:20 -0300141 @Modified
142 public void modified() {
143 Set<ConfigProperty> configProperties = cfgService.getProperties(getClass().getCanonicalName());
144 for (ConfigProperty property : configProperties) {
Jon Halla3fcf672017-03-28 16:53:22 -0700145 if ("useRegionForBalanceRoles".equals(property.name())) {
Victor Silvaf2b9d032016-09-19 19:43:20 -0300146 useRegionForBalanceRoles = property.asBoolean();
147 }
148 }
149 }
150
Ayaka Koshibe16609692014-09-23 12:46:15 -0700151 @Deactivate
152 public void deactivate() {
153 eventDispatcher.removeSink(MastershipEvent.class);
Jordan Halterman61aeb352017-10-18 16:22:17 -0700154 upgradeService.removeListener(upgradeEventListener);
alshabib339a3d92014-09-26 17:54:32 -0700155 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700156 log.info("Stopped");
Victor Silvaf2b9d032016-09-19 19:43:20 -0300157 cfgService.unregisterProperties(getClass(), false);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700158 }
159
Ayaka Koshibe16609692014-09-23 12:46:15 -0700160 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700161 public CompletableFuture<Void> setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
Ayaka Koshibe16609692014-09-23 12:46:15 -0700162 checkNotNull(nodeId, NODE_ID_NULL);
163 checkNotNull(deviceId, DEVICE_ID_NULL);
164 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700165
Madan Jampanif7536ab2015-05-07 23:23:23 -0700166 CompletableFuture<MastershipEvent> eventFuture = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700167
168 switch (role) {
169 case MASTER:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700170 eventFuture = store.setMaster(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700171 break;
172 case STANDBY:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700173 eventFuture = store.setStandby(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700174 break;
175 case NONE:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700176 eventFuture = store.relinquishRole(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700177 break;
178 default:
179 log.info("Unknown role; ignoring");
Madan Jampanide003d92015-05-11 17:14:20 -0700180 return CompletableFuture.completedFuture(null);
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700181 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700182
Madan Jampanic6e574f2015-05-29 13:41:52 -0700183 return eventFuture.thenAccept(this::post)
184 .thenApply(v -> null);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700185 }
186
187 @Override
tomb41d1ac2014-09-24 01:51:24 -0700188 public MastershipRole getLocalRole(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900189 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900190
tomb41d1ac2014-09-24 01:51:24 -0700191 checkNotNull(deviceId, DEVICE_ID_NULL);
192 return store.getRole(clusterService.getLocalNode().id(), deviceId);
193 }
194
195 @Override
Madan Jampanic6e574f2015-05-29 13:41:52 -0700196 public CompletableFuture<Void> relinquishMastership(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900197 checkPermission(CLUSTER_WRITE);
Madan Jampanic6e574f2015-05-29 13:41:52 -0700198 return store.relinquishRole(localNodeId, deviceId)
199 .thenAccept(this::post)
200 .thenApply(v -> null);
tomb41d1ac2014-09-24 01:51:24 -0700201 }
202
203 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700204 public CompletableFuture<MastershipRole> requestRoleFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900205 checkPermission(CLUSTER_WRITE);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900206
tomb41d1ac2014-09-24 01:51:24 -0700207 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800208 final Context timer = startTimer(requestRoleTimer);
Madan Jampanide003d92015-05-11 17:14:20 -0700209 return store.requestRole(deviceId).whenComplete((result, error) -> stopTimer(timer));
210
tomb41d1ac2014-09-24 01:51:24 -0700211 }
212
213 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700214 public NodeId getMasterFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900215 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900216
Ayaka Koshibe16609692014-09-23 12:46:15 -0700217 checkNotNull(deviceId, DEVICE_ID_NULL);
218 return store.getMaster(deviceId);
219 }
220
221 @Override
222 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900223 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900224
Ayaka Koshibe16609692014-09-23 12:46:15 -0700225 checkNotNull(nodeId, NODE_ID_NULL);
226 return store.getDevices(nodeId);
227 }
228
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700229 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700230 public RoleInfo getNodesFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900231 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900232
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700233 checkNotNull(deviceId, DEVICE_ID_NULL);
234 return store.getNodes(deviceId);
235 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700236
237 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800238 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900239 checkPermission(CLUSTER_READ);
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800240 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700241 }
242
Ayaka Koshibe16609692014-09-23 12:46:15 -0700243 @Override
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800244 public MetricsService metricsService() {
245 return metricsService;
246 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700247
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800248 @Override
249 public void balanceRoles() {
250 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800251 Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800252 int deviceCount = 0;
253
254 // Create buckets reflecting current ownership.
255 for (ControllerNode node : nodes) {
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800256 if (clusterService.getState(node.id()).isActive()) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800257 Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
258 deviceCount += devicesOf.size();
259 controllerDevices.put(node, devicesOf);
260 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
261 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800262 }
263
Claudine Chiudce08152016-03-09 18:19:28 +0000264 if (useRegionForBalanceRoles && balanceRolesUsingRegions(controllerDevices)) {
265 return;
266 }
267
268 // Now re-balance the buckets until they are roughly even.
269 List<CompletableFuture<Void>> balanceBucketsFutures = balanceControllerNodes(controllerDevices, deviceCount);
270
271 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
272 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
273
274 Futures.getUnchecked(balanceRolesFuture);
275 }
276
277 /**
278 * Balances the nodes specified in controllerDevices.
279 *
280 * @param controllerDevices controller nodes to devices map
281 * @param deviceCount number of devices mastered by controller nodes
282 * @return list of setRole futures for "moved" devices
283 */
284 private List<CompletableFuture<Void>> balanceControllerNodes(
285 Map<ControllerNode, Set<DeviceId>> controllerDevices, int deviceCount) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800286 // Now re-balance the buckets until they are roughly even.
Madan Jampanide003d92015-05-11 17:14:20 -0700287 List<CompletableFuture<Void>> balanceBucketsFutures = Lists.newLinkedList();
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800288 int rounds = controllerDevices.keySet().size();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800289 for (int i = 0; i < rounds; i++) {
290 // Iterate over the buckets and find the smallest and the largest.
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800291 ControllerNode smallest = findBucket(true, controllerDevices);
292 ControllerNode largest = findBucket(false, controllerDevices);
Madan Jampanide003d92015-05-11 17:14:20 -0700293 balanceBucketsFutures.add(balanceBuckets(smallest, largest, controllerDevices, deviceCount));
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800294 }
Claudine Chiudce08152016-03-09 18:19:28 +0000295 return balanceBucketsFutures;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800296 }
297
Claudine Chiudce08152016-03-09 18:19:28 +0000298 /**
299 * Finds node with the minimum/maximum devices from a list of nodes.
300 *
301 * @param min true: minimum, false: maximum
302 * @param controllerDevices controller nodes to devices map
303 * @return controller node with minimum/maximum devices
304 */
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800305 private ControllerNode findBucket(boolean min,
306 Map<ControllerNode, Set<DeviceId>> controllerDevices) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800307 int xSize = min ? Integer.MAX_VALUE : -1;
308 ControllerNode xNode = null;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800309 for (ControllerNode node : controllerDevices.keySet()) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800310 int size = controllerDevices.get(node).size();
311 if ((min && size < xSize) || (!min && size > xSize)) {
312 xSize = size;
313 xNode = node;
314 }
315 }
316 return xNode;
317 }
318
Claudine Chiudce08152016-03-09 18:19:28 +0000319 /**
320 * Balance the node buckets by moving devices from largest to smallest node.
321 *
322 * @param smallest node that is master of the smallest number of devices
323 * @param largest node that is master of the largest number of devices
324 * @param controllerDevices controller nodes to devices map
325 * @param deviceCount number of devices mastered by controller nodes
326 * @return list of setRole futures for "moved" devices
327 */
Madan Jampanide003d92015-05-11 17:14:20 -0700328 private CompletableFuture<Void> balanceBuckets(ControllerNode smallest, ControllerNode largest,
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800329 Map<ControllerNode, Set<DeviceId>> controllerDevices,
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800330 int deviceCount) {
331 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
332 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
333 int bucketCount = controllerDevices.keySet().size();
334
335 int delta = (maxBucket.size() - minBucket.size()) / 2;
336 delta = Math.min(deviceCount / bucketCount, delta);
337
Madan Jampanide003d92015-05-11 17:14:20 -0700338 List<CompletableFuture<Void>> setRoleFutures = Lists.newLinkedList();
339
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800340 if (delta > 0) {
341 log.info("Attempting to move {} nodes from {} to {}...", delta,
342 largest.id(), smallest.id());
343
344 int i = 0;
345 Iterator<DeviceId> it = maxBucket.iterator();
346 while (it.hasNext() && i < delta) {
347 DeviceId deviceId = it.next();
348 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
Madan Jampanide003d92015-05-11 17:14:20 -0700349 setRoleFutures.add(setRole(smallest.id(), deviceId, MASTER));
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800350 controllerDevices.get(smallest).add(deviceId);
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800351 it.remove();
352 i++;
353 }
354 }
Madan Jampanide003d92015-05-11 17:14:20 -0700355
356 return CompletableFuture.allOf(setRoleFutures.toArray(new CompletableFuture[setRoleFutures.size()]));
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800357 }
358
Claudine Chiudce08152016-03-09 18:19:28 +0000359 /**
360 * Balances the nodes considering Region information.
361 *
362 * @param allControllerDevices controller nodes to devices map
363 * @return true: nodes balanced; false: nodes not balanced
364 */
365 private boolean balanceRolesUsingRegions(Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
366 Set<Region> regions = regionService.getRegions();
367 if (regions.isEmpty()) {
368 return false; // no balancing was done using regions.
369 }
370
371 // handle nodes belonging to regions
372 Set<ControllerNode> nodesInRegions = Sets.newHashSet();
373 for (Region region : regions) {
374 Map<ControllerNode, Set<DeviceId>> activeRegionControllers =
375 balanceRolesInRegion(region, allControllerDevices);
376 nodesInRegions.addAll(activeRegionControllers.keySet());
377 }
378
379 // handle nodes not belonging to any region
380 Set<ControllerNode> nodesNotInRegions = Sets.difference(allControllerDevices.keySet(), nodesInRegions);
381 if (!nodesNotInRegions.isEmpty()) {
382 int deviceCount = 0;
383 Map<ControllerNode, Set<DeviceId>> controllerDevicesNotInRegions = new HashMap<>();
384 for (ControllerNode controllerNode: nodesNotInRegions) {
385 controllerDevicesNotInRegions.put(controllerNode, allControllerDevices.get(controllerNode));
386 deviceCount += allControllerDevices.get(controllerNode).size();
387 }
388 // Now re-balance the buckets until they are roughly even.
389 List<CompletableFuture<Void>> balanceBucketsFutures =
390 balanceControllerNodes(controllerDevicesNotInRegions, deviceCount);
391
392 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
393 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
394
395 Futures.getUnchecked(balanceRolesFuture);
396 }
397 return true; // balancing was done using regions.
398 }
399
400 /**
401 * Balances the nodes in specified region.
402 *
403 * @param region region in which nodes are to be balanced
404 * @param allControllerDevices controller nodes to devices map
405 * @return controller nodes that were balanced
406 */
407 private Map<ControllerNode, Set<DeviceId>> balanceRolesInRegion(Region region,
408 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
409
410 // retrieve all devices associated with specified region
411 Set<DeviceId> devicesInRegion = regionService.getRegionDevices(region.id());
412 log.info("Region {} has {} devices.", region.id(), devicesInRegion.size());
413 if (devicesInRegion.isEmpty()) {
414 return new HashMap<>(); // no devices in this region, so nothing to balance.
415 }
416
417 List<Set<NodeId>> mastersList = region.masters();
418 log.info("Region {} has {} sets of masters.", region.id(), mastersList.size());
419 if (mastersList.isEmpty()) {
420 // TODO handle devices that belong to a region, which has no masters defined
421 return new HashMap<>(); // for now just leave devices alone
422 }
423
424 // get the region's preferred set of masters
425 Set<DeviceId> devicesInMasters = Sets.newHashSet();
426 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices =
427 getRegionsPreferredMasters(region, devicesInMasters, allControllerDevices);
428
429 // Now re-balance the buckets until they are roughly even.
430 List<CompletableFuture<Void>> balanceBucketsFutures =
431 balanceControllerNodes(regionalControllerDevices, devicesInMasters.size());
432
433 // handle devices that are not currently mastered by the master node set
434 Set<DeviceId> devicesNotMasteredWithControllers = Sets.difference(devicesInRegion, devicesInMasters);
435 if (!devicesNotMasteredWithControllers.isEmpty()) {
436 // active controllers in master node set are already balanced, just
437 // assign device mastership in sequence
438 List<ControllerNode> sorted = new ArrayList<>(regionalControllerDevices.keySet());
439 Collections.sort(sorted, (o1, o2) ->
440 ((Integer) (regionalControllerDevices.get(o1)).size())
441 .compareTo((Integer) (regionalControllerDevices.get(o2)).size()));
442 int deviceIndex = 0;
443 for (DeviceId deviceId : devicesNotMasteredWithControllers) {
444 ControllerNode cnode = sorted.get(deviceIndex % sorted.size());
445 balanceBucketsFutures.add(setRole(cnode.id(), deviceId, MASTER));
446 regionalControllerDevices.get(cnode).add(deviceId);
447 deviceIndex++;
448 }
449 }
450
451 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
452 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
453
454 Futures.getUnchecked(balanceRolesFuture);
455
456 // update the map before returning
457 regionalControllerDevices.forEach((controllerNode, deviceIds) -> {
458 regionalControllerDevices.put(controllerNode, new HashSet<>(getDevicesOf(controllerNode.id())));
459 });
460
461 return regionalControllerDevices;
462 }
463
464 /**
465 * Get region's preferred set of master nodes - the first master node set that has at
466 * least one active node.
467 *
468 * @param region region for which preferred set of master nodes is requested
469 * @param devicesInMasters device set to track devices in preferred set of master nodes
470 * @param allControllerDevices controller nodes to devices map
471 * @return region's preferred master nodes (and devices that use them as masters)
472 */
473 private Map<ControllerNode, Set<DeviceId>> getRegionsPreferredMasters(Region region,
474 Set<DeviceId> devicesInMasters,
475 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
476 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices = new HashMap<>();
477 int listIndex = 0;
478 for (Set<NodeId> masterSet: region.masters()) {
479 log.info("Region {} masters set {} has {} nodes.",
480 region.id(), listIndex, masterSet.size());
481 if (masterSet.isEmpty()) { // nothing on this level
482 listIndex++;
483 continue;
484 }
485 // Create buckets reflecting current ownership.
486 for (NodeId nodeId : masterSet) {
Michele Santuari6ebb36e2016-03-28 10:12:04 -0700487 if (clusterService.getState(nodeId).isActive()) {
Claudine Chiudce08152016-03-09 18:19:28 +0000488 ControllerNode controllerNode = clusterService.getNode(nodeId);
489 Set<DeviceId> devicesOf = new HashSet<>(allControllerDevices.get(controllerNode));
490 regionalControllerDevices.put(controllerNode, devicesOf);
491 devicesInMasters.addAll(devicesOf);
492 log.info("Active Node {} has {} devices.", nodeId, devicesOf.size());
493 }
494 }
495 if (!regionalControllerDevices.isEmpty()) {
496 break; // now have a set of >0 active controllers
497 }
498 listIndex++; // keep on looking
499 }
500 return regionalControllerDevices;
501 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800502
alshabib339a3d92014-09-26 17:54:32 -0700503 public class InternalDelegate implements MastershipStoreDelegate {
alshabib339a3d92014-09-26 17:54:32 -0700504 @Override
505 public void notify(MastershipEvent event) {
Thomas Vachuska42e8cce2015-07-29 19:25:18 -0700506 post(event);
alshabib339a3d92014-09-26 17:54:32 -0700507 }
alshabib339a3d92014-09-26 17:54:32 -0700508 }
509
Jordan Halterman713830d2017-10-07 13:40:44 -0700510 private class InternalUpgradeEventListener implements UpgradeEventListener {
511 @Override
512 public void event(UpgradeEvent event) {
513 if (rebalanceRolesOnUpgrade &&
514 (event.type() == UpgradeEvent.Type.COMMITTED || event.type() == UpgradeEvent.Type.RESET)) {
515 balanceRoles();
516 }
517 }
518 }
519
Ayaka Koshibe16609692014-09-23 12:46:15 -0700520}