blob: b316986ae256f46ec62e888e360b8cdaf9ea4c70 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Ayaka Koshibe16609692014-09-23 12:46:15 -070016package org.onlab.onos.cluster.impl;
17
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080018import com.codahale.metrics.Timer;
19import com.codahale.metrics.Timer.Context;
Ayaka Koshibe16609692014-09-23 12:46:15 -070020import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070021import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070022import org.apache.felix.scr.annotations.Deactivate;
23import org.apache.felix.scr.annotations.Reference;
24import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070025import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080026import org.onlab.metrics.MetricsService;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070027import org.onlab.onos.cluster.ClusterEvent;
28import org.onlab.onos.cluster.ClusterEventListener;
tom4a5d1712014-09-23 17:49:39 -070029import org.onlab.onos.cluster.ClusterService;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070030import org.onlab.onos.cluster.ControllerNode;
Ayaka Koshibe16609692014-09-23 12:46:15 -070031import org.onlab.onos.cluster.NodeId;
Ayaka Koshibeabedb092014-10-20 17:01:31 -070032import org.onlab.onos.cluster.RoleInfo;
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080033import org.onlab.onos.core.MetricsHelper;
Ayaka Koshibe16609692014-09-23 12:46:15 -070034import org.onlab.onos.event.AbstractListenerRegistry;
35import org.onlab.onos.event.EventDeliveryService;
Yuta HIGUCHI80912e62014-10-12 00:15:47 -070036import org.onlab.onos.mastership.MastershipAdminService;
37import org.onlab.onos.mastership.MastershipEvent;
38import org.onlab.onos.mastership.MastershipListener;
39import org.onlab.onos.mastership.MastershipService;
40import org.onlab.onos.mastership.MastershipStore;
41import org.onlab.onos.mastership.MastershipStoreDelegate;
42import org.onlab.onos.mastership.MastershipTerm;
43import org.onlab.onos.mastership.MastershipTermService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070044import org.onlab.onos.net.DeviceId;
45import org.onlab.onos.net.MastershipRole;
Ayaka Koshibe16609692014-09-23 12:46:15 -070046import org.slf4j.Logger;
47
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080048import java.util.Collection;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080049import java.util.HashMap;
50import java.util.HashSet;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080051import java.util.Iterator;
52import java.util.List;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080053import java.util.Map;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080054import java.util.Set;
55import java.util.concurrent.atomic.AtomicInteger;
56
57import static com.google.common.base.Preconditions.checkNotNull;
58import static com.google.common.collect.Lists.newArrayList;
59import static org.onlab.metrics.MetricsUtil.startTimer;
60import static org.onlab.metrics.MetricsUtil.stopTimer;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -080061import static org.onlab.onos.cluster.ControllerNode.State.ACTIVE;
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080062import static org.onlab.onos.net.MastershipRole.MASTER;
63import static org.slf4j.LoggerFactory.getLogger;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080064
tom4a5d1712014-09-23 17:49:39 -070065@Component(immediate = true)
66@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070067public class MastershipManager
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080068 implements MastershipService, MastershipAdminService, MastershipTermService,
69 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070070
71 private static final String NODE_ID_NULL = "Node ID cannot be null";
72 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
73 private static final String ROLE_NULL = "Mastership role cannot be null";
74
75 private final Logger log = getLogger(getClass());
76
77 protected final AbstractListenerRegistry<MastershipEvent, MastershipListener>
alshabib339a3d92014-09-26 17:54:32 -070078 listenerRegistry = new AbstractListenerRegistry<>();
79
80 private final MastershipStoreDelegate delegate = new InternalDelegate();
Ayaka Koshibe16609692014-09-23 12:46:15 -070081
82 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
83 protected MastershipStore store;
84
85 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
86 protected EventDeliveryService eventDispatcher;
87
88 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -070089 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070090
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080091 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
92 protected MetricsService metricsService;
93
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070094 private ClusterEventListener clusterListener = new InternalClusterEventListener();
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080095 private Timer requestRoleTimer;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070096
Ayaka Koshibe16609692014-09-23 12:46:15 -070097 @Activate
98 public void activate() {
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080099 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
100
Ayaka Koshibe16609692014-09-23 12:46:15 -0700101 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700102 clusterService.addListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700103 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700104 log.info("Started");
105 }
106
107 @Deactivate
108 public void deactivate() {
109 eventDispatcher.removeSink(MastershipEvent.class);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700110 clusterService.removeListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700111 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700112 log.info("Stopped");
113 }
114
Ayaka Koshibe16609692014-09-23 12:46:15 -0700115 @Override
116 public void setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
117 checkNotNull(nodeId, NODE_ID_NULL);
118 checkNotNull(deviceId, DEVICE_ID_NULL);
119 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700120
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700121 MastershipEvent event = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700122
123 switch (role) {
124 case MASTER:
125 event = store.setMaster(nodeId, deviceId);
126 break;
127 case STANDBY:
128 event = store.setStandby(nodeId, deviceId);
129 break;
130 case NONE:
131 event = store.relinquishRole(nodeId, deviceId);
132 break;
133 default:
134 log.info("Unknown role; ignoring");
135 return;
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700136 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700137
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700138 if (event != null) {
139 post(event);
Ayaka Koshibea7f044e2014-09-23 16:56:20 -0700140 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700141 }
142
143 @Override
tomb41d1ac2014-09-24 01:51:24 -0700144 public MastershipRole getLocalRole(DeviceId deviceId) {
145 checkNotNull(deviceId, DEVICE_ID_NULL);
146 return store.getRole(clusterService.getLocalNode().id(), deviceId);
147 }
148
149 @Override
150 public void relinquishMastership(DeviceId deviceId) {
Ayaka Koshibec4047702014-10-07 14:43:52 -0700151 MastershipEvent event = null;
Ayaka Koshibe1c292d72014-10-08 17:46:07 -0700152 event = store.relinquishRole(
153 clusterService.getLocalNode().id(), deviceId);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700154 if (event != null) {
155 post(event);
156 }
tomb41d1ac2014-09-24 01:51:24 -0700157 }
158
159 @Override
160 public MastershipRole requestRoleFor(DeviceId deviceId) {
161 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800162 final Context timer = startTimer(requestRoleTimer);
163 try {
164 return store.requestRole(deviceId);
165 } finally {
166 stopTimer(timer);
167 }
tomb41d1ac2014-09-24 01:51:24 -0700168 }
169
170 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700171 public NodeId getMasterFor(DeviceId deviceId) {
172 checkNotNull(deviceId, DEVICE_ID_NULL);
173 return store.getMaster(deviceId);
174 }
175
176 @Override
177 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
178 checkNotNull(nodeId, NODE_ID_NULL);
179 return store.getDevices(nodeId);
180 }
181
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700182 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700183 public RoleInfo getNodesFor(DeviceId deviceId) {
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700184 checkNotNull(deviceId, DEVICE_ID_NULL);
185 return store.getNodes(deviceId);
186 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700187
188 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800189 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
190 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700191 }
192
Ayaka Koshibe16609692014-09-23 12:46:15 -0700193 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700194 public void addListener(MastershipListener listener) {
195 checkNotNull(listener);
196 listenerRegistry.addListener(listener);
197 }
198
199 @Override
200 public void removeListener(MastershipListener listener) {
201 checkNotNull(listener);
202 listenerRegistry.removeListener(listener);
203 }
204
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800205 @Override
206 public MetricsService metricsService() {
207 return metricsService;
208 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700209
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800210 @Override
211 public void balanceRoles() {
212 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800213 Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800214 int deviceCount = 0;
215
216 // Create buckets reflecting current ownership.
217 for (ControllerNode node : nodes) {
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800218 if (clusterService.getState(node.id()) == ACTIVE) {
219 Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
220 deviceCount += devicesOf.size();
221 controllerDevices.put(node, devicesOf);
222 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
223 }
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800224 }
225
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800226 // Now re-balance the buckets until they are roughly even.
227 int rounds = controllerDevices.keySet().size();
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800228 for (int i = 0; i < rounds; i++) {
229 // Iterate over the buckets and find the smallest and the largest.
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800230 ControllerNode smallest = findBucket(true, controllerDevices);
231 ControllerNode largest = findBucket(false, controllerDevices);
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800232 balanceBuckets(smallest, largest, controllerDevices, deviceCount);
233 }
234 }
235
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800236 private ControllerNode findBucket(boolean min,
237 Map<ControllerNode, Set<DeviceId>> controllerDevices) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800238 int xSize = min ? Integer.MAX_VALUE : -1;
239 ControllerNode xNode = null;
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800240 for (ControllerNode node : controllerDevices.keySet()) {
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800241 int size = controllerDevices.get(node).size();
242 if ((min && size < xSize) || (!min && size > xSize)) {
243 xSize = size;
244 xNode = node;
245 }
246 }
247 return xNode;
248 }
249
250 private void balanceBuckets(ControllerNode smallest, ControllerNode largest,
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800251 Map<ControllerNode, Set<DeviceId>> controllerDevices,
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800252 int deviceCount) {
253 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
254 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
255 int bucketCount = controllerDevices.keySet().size();
256
257 int delta = (maxBucket.size() - minBucket.size()) / 2;
258 delta = Math.min(deviceCount / bucketCount, delta);
259
260 if (delta > 0) {
261 log.info("Attempting to move {} nodes from {} to {}...", delta,
262 largest.id(), smallest.id());
263
264 int i = 0;
265 Iterator<DeviceId> it = maxBucket.iterator();
266 while (it.hasNext() && i < delta) {
267 DeviceId deviceId = it.next();
268 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
269 setRole(smallest.id(), deviceId, MASTER);
Thomas Vachuska12dfdc32014-11-29 16:03:12 -0800270 controllerDevices.get(smallest).add(deviceId);
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800271 it.remove();
272 i++;
273 }
274 }
275 }
276
277
Ayaka Koshibe16609692014-09-23 12:46:15 -0700278 // Posts the specified event to the local event dispatcher.
279 private void post(MastershipEvent event) {
280 if (event != null && eventDispatcher != null) {
281 eventDispatcher.post(event);
282 }
283 }
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -0700284
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700285 //callback for reacting to cluster events
286 private class InternalClusterEventListener implements ClusterEventListener {
287
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700288 // A notion of a local maximum cluster size, used to tie-break.
289 // Think of a better way to do this.
290 private AtomicInteger clusterSize;
291
292 InternalClusterEventListener() {
293 clusterSize = new AtomicInteger(0);
294 }
295
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700296 @Override
297 public void event(ClusterEvent event) {
298 switch (event.type()) {
299 //FIXME: worry about addition when the time comes
300 case INSTANCE_ADDED:
301 case INSTANCE_ACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700302 clusterSize.incrementAndGet();
303 log.info("instance {} added/activated", event.subject());
304 break;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700305 case INSTANCE_REMOVED:
306 case INSTANCE_DEACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700307 ControllerNode node = event.subject();
308
309 if (node.equals(clusterService.getLocalNode())) {
310 //If we are in smaller cluster, relinquish and return
311 for (DeviceId device : getDevicesOf(node.id())) {
312 if (!isInMajority()) {
313 //own DeviceManager should catch event and tell switch
314 store.relinquishRole(node.id(), device);
315 }
316 }
317 log.info("broke off from cluster, relinquished devices");
318 break;
319 }
320
321 // if we are the larger one and the removed node(s) are brain dead,
322 // force relinquish on behalf of disabled node.
323 // check network channel to do this?
324 for (DeviceId device : getDevicesOf(node.id())) {
325 //some things to check:
326 // 1. we didn't break off as well while we're at it
327 // 2. others don't pile in and try too - maybe a lock
328 if (isInMajority()) {
329 store.relinquishRole(node.id(), device);
330 }
331 }
332 clusterSize.decrementAndGet();
333 log.info("instance {} removed/deactivated", event.subject());
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700334 break;
335 default:
336 log.warn("unknown cluster event {}", event);
337 }
338 }
339
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700340 private boolean isInMajority() {
341 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
342 return true;
343 }
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700344// else {
Ayaka Koshibe67af1f42014-10-20 15:26:37 -0700345 //FIXME: break tie for equal-sized clusters, by number of
346 // connected switches, then masters, then nodeId hash
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700347 // problem is, how do we get at channel info cleanly here?
348 // Also, what's the time hit for a distributed store look-up
349 // versus channel re-negotiation? bet on the latter being worse.
350
351// }
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700352 return false;
353 }
354
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700355 }
Ayaka Koshibe65efaef2014-09-29 18:21:56 -0700356
alshabib339a3d92014-09-26 17:54:32 -0700357 public class InternalDelegate implements MastershipStoreDelegate {
358
359 @Override
360 public void notify(MastershipEvent event) {
Yuta HIGUCHI9e11ac02014-11-12 10:09:49 -0800361 log.trace("dispatching mastership event {}", event);
alshabib339a3d92014-09-26 17:54:32 -0700362 eventDispatcher.post(event);
363 }
364
365 }
366
Ayaka Koshibe16609692014-09-23 12:46:15 -0700367}