blob: f63aab174123a145ceabbc6d9fe67cf6c0322568 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
Ray Milkey34c95902015-04-15 09:47:53 -07002 * Copyright 2014-2015 Open Networking Laboratory
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Brian O'Connorabafb502014-12-02 22:26:20 -080016package org.onosproject.cluster.impl;
Ayaka Koshibe16609692014-09-23 12:46:15 -070017
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080018import com.codahale.metrics.Timer;
19import com.codahale.metrics.Timer.Context;
Madan Jampanide003d92015-05-11 17:14:20 -070020import com.google.common.collect.Lists;
Claudine Chiudce08152016-03-09 18:19:28 +000021import com.google.common.collect.Sets;
Madan Jampanide003d92015-05-11 17:14:20 -070022import com.google.common.util.concurrent.Futures;
Ayaka Koshibe16609692014-09-23 12:46:15 -070023import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070024import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070025import org.apache.felix.scr.annotations.Deactivate;
26import org.apache.felix.scr.annotations.Reference;
27import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070028import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080029import org.onlab.metrics.MetricsService;
Brian O'Connorabafb502014-12-02 22:26:20 -080030import org.onosproject.cluster.ClusterService;
31import org.onosproject.cluster.ControllerNode;
32import org.onosproject.cluster.NodeId;
33import org.onosproject.cluster.RoleInfo;
34import org.onosproject.core.MetricsHelper;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080035import org.onosproject.event.AbstractListenerManager;
Brian O'Connorabafb502014-12-02 22:26:20 -080036import org.onosproject.mastership.MastershipAdminService;
37import org.onosproject.mastership.MastershipEvent;
38import org.onosproject.mastership.MastershipListener;
39import org.onosproject.mastership.MastershipService;
40import org.onosproject.mastership.MastershipStore;
41import org.onosproject.mastership.MastershipStoreDelegate;
42import org.onosproject.mastership.MastershipTerm;
43import org.onosproject.mastership.MastershipTermService;
44import org.onosproject.net.DeviceId;
45import org.onosproject.net.MastershipRole;
Claudine Chiudce08152016-03-09 18:19:28 +000046import org.onosproject.net.region.Region;
47import org.onosproject.net.region.RegionService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070048import org.slf4j.Logger;
49
Claudine Chiudce08152016-03-09 18:19:28 +000050import java.util.ArrayList;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080051import java.util.Collection;
Claudine Chiudce08152016-03-09 18:19:28 +000052import java.util.Collections;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080053import java.util.HashMap;
54import java.util.HashSet;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080055import java.util.Iterator;
56import java.util.List;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080057import java.util.Map;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080058import java.util.Set;
Madan Jampanif7536ab2015-05-07 23:23:23 -070059import java.util.concurrent.CompletableFuture;
60
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080061import static com.google.common.base.Preconditions.checkNotNull;
62import static com.google.common.collect.Lists.newArrayList;
63import static org.onlab.metrics.MetricsUtil.startTimer;
64import static org.onlab.metrics.MetricsUtil.stopTimer;
Brian O'Connorabafb502014-12-02 22:26:20 -080065import static org.onosproject.net.MastershipRole.MASTER;
Changhoon Yoon541ef712015-05-23 17:18:34 +090066import static org.onosproject.security.AppGuard.checkPermission;
Thomas Vachuska7a8de842016-03-07 20:56:35 -080067import static org.onosproject.security.AppPermission.Type.CLUSTER_READ;
68import static org.onosproject.security.AppPermission.Type.CLUSTER_WRITE;
Thomas Vachuska42e8cce2015-07-29 19:25:18 -070069import static org.slf4j.LoggerFactory.getLogger;
Changhoon Yoonb856b812015-08-10 03:47:19 +090070
Changhoon Yoon541ef712015-05-23 17:18:34 +090071
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080072
tom4a5d1712014-09-23 17:49:39 -070073@Component(immediate = true)
74@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070075public class MastershipManager
Thomas Vachuska42e8cce2015-07-29 19:25:18 -070076 extends AbstractListenerManager<MastershipEvent, MastershipListener>
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080077 implements MastershipService, MastershipAdminService, MastershipTermService,
78 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070079
80 private static final String NODE_ID_NULL = "Node ID cannot be null";
81 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
82 private static final String ROLE_NULL = "Mastership role cannot be null";
83
84 private final Logger log = getLogger(getClass());
85
alshabib339a3d92014-09-26 17:54:32 -070086 private final MastershipStoreDelegate delegate = new InternalDelegate();
Ayaka Koshibe16609692014-09-23 12:46:15 -070087
88 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
89 protected MastershipStore store;
90
91 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -070092 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070093
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080094 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
95 protected MetricsService metricsService;
96
Claudine Chiudce08152016-03-09 18:19:28 +000097 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
98 protected RegionService regionService;
99
Madan Jampanic6e574f2015-05-29 13:41:52 -0700100 private NodeId localNodeId;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800101 private Timer requestRoleTimer;
Claudine Chiudce08152016-03-09 18:19:28 +0000102 public boolean useRegionForBalanceRoles;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700103
Ayaka Koshibe16609692014-09-23 12:46:15 -0700104 @Activate
105 public void activate() {
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800106 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
Madan Jampanic6e574f2015-05-29 13:41:52 -0700107 localNodeId = clusterService.getLocalNode().id();
Ayaka Koshibe16609692014-09-23 12:46:15 -0700108 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
alshabib339a3d92014-09-26 17:54:32 -0700109 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700110 log.info("Started");
111 }
112
113 @Deactivate
114 public void deactivate() {
115 eventDispatcher.removeSink(MastershipEvent.class);
alshabib339a3d92014-09-26 17:54:32 -0700116 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700117 log.info("Stopped");
118 }
119
Ayaka Koshibe16609692014-09-23 12:46:15 -0700120 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700121 public CompletableFuture<Void> setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
Ayaka Koshibe16609692014-09-23 12:46:15 -0700122 checkNotNull(nodeId, NODE_ID_NULL);
123 checkNotNull(deviceId, DEVICE_ID_NULL);
124 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700125
Madan Jampanif7536ab2015-05-07 23:23:23 -0700126 CompletableFuture<MastershipEvent> eventFuture = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700127
128 switch (role) {
129 case MASTER:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700130 eventFuture = store.setMaster(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700131 break;
132 case STANDBY:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700133 eventFuture = store.setStandby(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700134 break;
135 case NONE:
Madan Jampanif7536ab2015-05-07 23:23:23 -0700136 eventFuture = store.relinquishRole(nodeId, deviceId);
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700137 break;
138 default:
139 log.info("Unknown role; ignoring");
Madan Jampanide003d92015-05-11 17:14:20 -0700140 return CompletableFuture.completedFuture(null);
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700141 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700142
Madan Jampanic6e574f2015-05-29 13:41:52 -0700143 return eventFuture.thenAccept(this::post)
144 .thenApply(v -> null);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700145 }
146
147 @Override
tomb41d1ac2014-09-24 01:51:24 -0700148 public MastershipRole getLocalRole(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900149 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900150
tomb41d1ac2014-09-24 01:51:24 -0700151 checkNotNull(deviceId, DEVICE_ID_NULL);
152 return store.getRole(clusterService.getLocalNode().id(), deviceId);
153 }
154
155 @Override
Madan Jampanic6e574f2015-05-29 13:41:52 -0700156 public CompletableFuture<Void> relinquishMastership(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900157 checkPermission(CLUSTER_WRITE);
Madan Jampanic6e574f2015-05-29 13:41:52 -0700158 return store.relinquishRole(localNodeId, deviceId)
159 .thenAccept(this::post)
160 .thenApply(v -> null);
tomb41d1ac2014-09-24 01:51:24 -0700161 }
162
163 @Override
Madan Jampanide003d92015-05-11 17:14:20 -0700164 public CompletableFuture<MastershipRole> requestRoleFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900165 checkPermission(CLUSTER_WRITE);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900166
tomb41d1ac2014-09-24 01:51:24 -0700167 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800168 final Context timer = startTimer(requestRoleTimer);
Madan Jampanide003d92015-05-11 17:14:20 -0700169 return store.requestRole(deviceId).whenComplete((result, error) -> stopTimer(timer));
170
tomb41d1ac2014-09-24 01:51:24 -0700171 }
172
173 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700174 public NodeId getMasterFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900175 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900176
Ayaka Koshibe16609692014-09-23 12:46:15 -0700177 checkNotNull(deviceId, DEVICE_ID_NULL);
178 return store.getMaster(deviceId);
179 }
180
181 @Override
182 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900183 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900184
Ayaka Koshibe16609692014-09-23 12:46:15 -0700185 checkNotNull(nodeId, NODE_ID_NULL);
186 return store.getDevices(nodeId);
187 }
188
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700189 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700190 public RoleInfo getNodesFor(DeviceId deviceId) {
Changhoon Yoonb856b812015-08-10 03:47:19 +0900191 checkPermission(CLUSTER_READ);
Changhoon Yoon541ef712015-05-23 17:18:34 +0900192
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700193 checkNotNull(deviceId, DEVICE_ID_NULL);
194 return store.getNodes(deviceId);
195 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700196
197 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800198 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
Heedo Kang4a47a302016-02-29 17:40:23 +0900199 checkPermission(CLUSTER_READ);
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800200 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700201 }
202
Ayaka Koshibe16609692014-09-23 12:46:15 -0700203 @Override
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800204 public MetricsService metricsService() {
205 return metricsService;
206 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700207
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800208 @Override
209 public void balanceRoles() {
210 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800211 Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800212 int deviceCount = 0;
213
214 // Create buckets reflecting current ownership.
215 for (ControllerNode node : nodes) {
Thomas Vachuska7a8de842016-03-07 20:56:35 -0800216 if (clusterService.getState(node.id()).isActive()) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800217 Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
218 deviceCount += devicesOf.size();
219 controllerDevices.put(node, devicesOf);
220 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
221 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800222 }
223
Claudine Chiudce08152016-03-09 18:19:28 +0000224 if (useRegionForBalanceRoles && balanceRolesUsingRegions(controllerDevices)) {
225 return;
226 }
227
228 // Now re-balance the buckets until they are roughly even.
229 List<CompletableFuture<Void>> balanceBucketsFutures = balanceControllerNodes(controllerDevices, deviceCount);
230
231 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
232 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
233
234 Futures.getUnchecked(balanceRolesFuture);
235 }
236
237 /**
238 * Balances the nodes specified in controllerDevices.
239 *
240 * @param controllerDevices controller nodes to devices map
241 * @param deviceCount number of devices mastered by controller nodes
242 * @return list of setRole futures for "moved" devices
243 */
244 private List<CompletableFuture<Void>> balanceControllerNodes(
245 Map<ControllerNode, Set<DeviceId>> controllerDevices, int deviceCount) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800246 // Now re-balance the buckets until they are roughly even.
Madan Jampanide003d92015-05-11 17:14:20 -0700247 List<CompletableFuture<Void>> balanceBucketsFutures = Lists.newLinkedList();
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800248 int rounds = controllerDevices.keySet().size();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800249 for (int i = 0; i < rounds; i++) {
250 // Iterate over the buckets and find the smallest and the largest.
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800251 ControllerNode smallest = findBucket(true, controllerDevices);
252 ControllerNode largest = findBucket(false, controllerDevices);
Madan Jampanide003d92015-05-11 17:14:20 -0700253 balanceBucketsFutures.add(balanceBuckets(smallest, largest, controllerDevices, deviceCount));
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800254 }
Claudine Chiudce08152016-03-09 18:19:28 +0000255 return balanceBucketsFutures;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800256 }
257
Claudine Chiudce08152016-03-09 18:19:28 +0000258 /**
259 * Finds node with the minimum/maximum devices from a list of nodes.
260 *
261 * @param min true: minimum, false: maximum
262 * @param controllerDevices controller nodes to devices map
263 * @return controller node with minimum/maximum devices
264 */
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800265 private ControllerNode findBucket(boolean min,
266 Map<ControllerNode, Set<DeviceId>> controllerDevices) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800267 int xSize = min ? Integer.MAX_VALUE : -1;
268 ControllerNode xNode = null;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800269 for (ControllerNode node : controllerDevices.keySet()) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800270 int size = controllerDevices.get(node).size();
271 if ((min && size < xSize) || (!min && size > xSize)) {
272 xSize = size;
273 xNode = node;
274 }
275 }
276 return xNode;
277 }
278
Claudine Chiudce08152016-03-09 18:19:28 +0000279 /**
280 * Balance the node buckets by moving devices from largest to smallest node.
281 *
282 * @param smallest node that is master of the smallest number of devices
283 * @param largest node that is master of the largest number of devices
284 * @param controllerDevices controller nodes to devices map
285 * @param deviceCount number of devices mastered by controller nodes
286 * @return list of setRole futures for "moved" devices
287 */
Madan Jampanide003d92015-05-11 17:14:20 -0700288 private CompletableFuture<Void> balanceBuckets(ControllerNode smallest, ControllerNode largest,
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800289 Map<ControllerNode, Set<DeviceId>> controllerDevices,
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800290 int deviceCount) {
291 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
292 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
293 int bucketCount = controllerDevices.keySet().size();
294
295 int delta = (maxBucket.size() - minBucket.size()) / 2;
296 delta = Math.min(deviceCount / bucketCount, delta);
297
Madan Jampanide003d92015-05-11 17:14:20 -0700298 List<CompletableFuture<Void>> setRoleFutures = Lists.newLinkedList();
299
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800300 if (delta > 0) {
301 log.info("Attempting to move {} nodes from {} to {}...", delta,
302 largest.id(), smallest.id());
303
304 int i = 0;
305 Iterator<DeviceId> it = maxBucket.iterator();
306 while (it.hasNext() && i < delta) {
307 DeviceId deviceId = it.next();
308 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
Madan Jampanide003d92015-05-11 17:14:20 -0700309 setRoleFutures.add(setRole(smallest.id(), deviceId, MASTER));
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800310 controllerDevices.get(smallest).add(deviceId);
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800311 it.remove();
312 i++;
313 }
314 }
Madan Jampanide003d92015-05-11 17:14:20 -0700315
316 return CompletableFuture.allOf(setRoleFutures.toArray(new CompletableFuture[setRoleFutures.size()]));
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800317 }
318
Claudine Chiudce08152016-03-09 18:19:28 +0000319 /**
320 * Balances the nodes considering Region information.
321 *
322 * @param allControllerDevices controller nodes to devices map
323 * @return true: nodes balanced; false: nodes not balanced
324 */
325 private boolean balanceRolesUsingRegions(Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
326 Set<Region> regions = regionService.getRegions();
327 if (regions.isEmpty()) {
328 return false; // no balancing was done using regions.
329 }
330
331 // handle nodes belonging to regions
332 Set<ControllerNode> nodesInRegions = Sets.newHashSet();
333 for (Region region : regions) {
334 Map<ControllerNode, Set<DeviceId>> activeRegionControllers =
335 balanceRolesInRegion(region, allControllerDevices);
336 nodesInRegions.addAll(activeRegionControllers.keySet());
337 }
338
339 // handle nodes not belonging to any region
340 Set<ControllerNode> nodesNotInRegions = Sets.difference(allControllerDevices.keySet(), nodesInRegions);
341 if (!nodesNotInRegions.isEmpty()) {
342 int deviceCount = 0;
343 Map<ControllerNode, Set<DeviceId>> controllerDevicesNotInRegions = new HashMap<>();
344 for (ControllerNode controllerNode: nodesNotInRegions) {
345 controllerDevicesNotInRegions.put(controllerNode, allControllerDevices.get(controllerNode));
346 deviceCount += allControllerDevices.get(controllerNode).size();
347 }
348 // Now re-balance the buckets until they are roughly even.
349 List<CompletableFuture<Void>> balanceBucketsFutures =
350 balanceControllerNodes(controllerDevicesNotInRegions, deviceCount);
351
352 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
353 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
354
355 Futures.getUnchecked(balanceRolesFuture);
356 }
357 return true; // balancing was done using regions.
358 }
359
360 /**
361 * Balances the nodes in specified region.
362 *
363 * @param region region in which nodes are to be balanced
364 * @param allControllerDevices controller nodes to devices map
365 * @return controller nodes that were balanced
366 */
367 private Map<ControllerNode, Set<DeviceId>> balanceRolesInRegion(Region region,
368 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
369
370 // retrieve all devices associated with specified region
371 Set<DeviceId> devicesInRegion = regionService.getRegionDevices(region.id());
372 log.info("Region {} has {} devices.", region.id(), devicesInRegion.size());
373 if (devicesInRegion.isEmpty()) {
374 return new HashMap<>(); // no devices in this region, so nothing to balance.
375 }
376
377 List<Set<NodeId>> mastersList = region.masters();
378 log.info("Region {} has {} sets of masters.", region.id(), mastersList.size());
379 if (mastersList.isEmpty()) {
380 // TODO handle devices that belong to a region, which has no masters defined
381 return new HashMap<>(); // for now just leave devices alone
382 }
383
384 // get the region's preferred set of masters
385 Set<DeviceId> devicesInMasters = Sets.newHashSet();
386 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices =
387 getRegionsPreferredMasters(region, devicesInMasters, allControllerDevices);
388
389 // Now re-balance the buckets until they are roughly even.
390 List<CompletableFuture<Void>> balanceBucketsFutures =
391 balanceControllerNodes(regionalControllerDevices, devicesInMasters.size());
392
393 // handle devices that are not currently mastered by the master node set
394 Set<DeviceId> devicesNotMasteredWithControllers = Sets.difference(devicesInRegion, devicesInMasters);
395 if (!devicesNotMasteredWithControllers.isEmpty()) {
396 // active controllers in master node set are already balanced, just
397 // assign device mastership in sequence
398 List<ControllerNode> sorted = new ArrayList<>(regionalControllerDevices.keySet());
399 Collections.sort(sorted, (o1, o2) ->
400 ((Integer) (regionalControllerDevices.get(o1)).size())
401 .compareTo((Integer) (regionalControllerDevices.get(o2)).size()));
402 int deviceIndex = 0;
403 for (DeviceId deviceId : devicesNotMasteredWithControllers) {
404 ControllerNode cnode = sorted.get(deviceIndex % sorted.size());
405 balanceBucketsFutures.add(setRole(cnode.id(), deviceId, MASTER));
406 regionalControllerDevices.get(cnode).add(deviceId);
407 deviceIndex++;
408 }
409 }
410
411 CompletableFuture<Void> balanceRolesFuture = CompletableFuture.allOf(
412 balanceBucketsFutures.toArray(new CompletableFuture[balanceBucketsFutures.size()]));
413
414 Futures.getUnchecked(balanceRolesFuture);
415
416 // update the map before returning
417 regionalControllerDevices.forEach((controllerNode, deviceIds) -> {
418 regionalControllerDevices.put(controllerNode, new HashSet<>(getDevicesOf(controllerNode.id())));
419 });
420
421 return regionalControllerDevices;
422 }
423
424 /**
425 * Get region's preferred set of master nodes - the first master node set that has at
426 * least one active node.
427 *
428 * @param region region for which preferred set of master nodes is requested
429 * @param devicesInMasters device set to track devices in preferred set of master nodes
430 * @param allControllerDevices controller nodes to devices map
431 * @return region's preferred master nodes (and devices that use them as masters)
432 */
433 private Map<ControllerNode, Set<DeviceId>> getRegionsPreferredMasters(Region region,
434 Set<DeviceId> devicesInMasters,
435 Map<ControllerNode, Set<DeviceId>> allControllerDevices) {
436 Map<ControllerNode, Set<DeviceId>> regionalControllerDevices = new HashMap<>();
437 int listIndex = 0;
438 for (Set<NodeId> masterSet: region.masters()) {
439 log.info("Region {} masters set {} has {} nodes.",
440 region.id(), listIndex, masterSet.size());
441 if (masterSet.isEmpty()) { // nothing on this level
442 listIndex++;
443 continue;
444 }
445 // Create buckets reflecting current ownership.
446 for (NodeId nodeId : masterSet) {
Michele Santuari6ebb36e2016-03-28 10:12:04 -0700447 if (clusterService.getState(nodeId).isActive()) {
Claudine Chiudce08152016-03-09 18:19:28 +0000448 ControllerNode controllerNode = clusterService.getNode(nodeId);
449 Set<DeviceId> devicesOf = new HashSet<>(allControllerDevices.get(controllerNode));
450 regionalControllerDevices.put(controllerNode, devicesOf);
451 devicesInMasters.addAll(devicesOf);
452 log.info("Active Node {} has {} devices.", nodeId, devicesOf.size());
453 }
454 }
455 if (!regionalControllerDevices.isEmpty()) {
456 break; // now have a set of >0 active controllers
457 }
458 listIndex++; // keep on looking
459 }
460 return regionalControllerDevices;
461 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800462
alshabib339a3d92014-09-26 17:54:32 -0700463 public class InternalDelegate implements MastershipStoreDelegate {
alshabib339a3d92014-09-26 17:54:32 -0700464 @Override
465 public void notify(MastershipEvent event) {
Thomas Vachuska42e8cce2015-07-29 19:25:18 -0700466 post(event);
alshabib339a3d92014-09-26 17:54:32 -0700467 }
alshabib339a3d92014-09-26 17:54:32 -0700468 }
469
Ayaka Koshibe16609692014-09-23 12:46:15 -0700470}