blob: aaac4933d4a55487244672edb42eac4fd603cce6 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Ayaka Koshibe16609692014-09-23 12:46:15 -070016package org.onlab.onos.cluster.impl;
17
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080018import com.codahale.metrics.Timer;
19import com.codahale.metrics.Timer.Context;
20import com.google.common.collect.HashMultimap;
21import com.google.common.collect.Multimap;
Ayaka Koshibe16609692014-09-23 12:46:15 -070022import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070023import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070024import org.apache.felix.scr.annotations.Deactivate;
25import org.apache.felix.scr.annotations.Reference;
26import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070027import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080028import org.onlab.metrics.MetricsService;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070029import org.onlab.onos.cluster.ClusterEvent;
30import org.onlab.onos.cluster.ClusterEventListener;
tom4a5d1712014-09-23 17:49:39 -070031import org.onlab.onos.cluster.ClusterService;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070032import org.onlab.onos.cluster.ControllerNode;
Ayaka Koshibe16609692014-09-23 12:46:15 -070033import org.onlab.onos.cluster.NodeId;
Ayaka Koshibeabedb092014-10-20 17:01:31 -070034import org.onlab.onos.cluster.RoleInfo;
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080035import org.onlab.onos.core.MetricsHelper;
Ayaka Koshibe16609692014-09-23 12:46:15 -070036import org.onlab.onos.event.AbstractListenerRegistry;
37import org.onlab.onos.event.EventDeliveryService;
Yuta HIGUCHI80912e62014-10-12 00:15:47 -070038import org.onlab.onos.mastership.MastershipAdminService;
39import org.onlab.onos.mastership.MastershipEvent;
40import org.onlab.onos.mastership.MastershipListener;
41import org.onlab.onos.mastership.MastershipService;
42import org.onlab.onos.mastership.MastershipStore;
43import org.onlab.onos.mastership.MastershipStoreDelegate;
44import org.onlab.onos.mastership.MastershipTerm;
45import org.onlab.onos.mastership.MastershipTermService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070046import org.onlab.onos.net.DeviceId;
47import org.onlab.onos.net.MastershipRole;
Ayaka Koshibe16609692014-09-23 12:46:15 -070048import org.slf4j.Logger;
49
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -080050import java.util.Collection;
51import java.util.Iterator;
52import java.util.List;
53import java.util.Set;
54import java.util.concurrent.atomic.AtomicInteger;
55
56import static com.google.common.base.Preconditions.checkNotNull;
57import static com.google.common.collect.Lists.newArrayList;
58import static org.onlab.metrics.MetricsUtil.startTimer;
59import static org.onlab.metrics.MetricsUtil.stopTimer;
60import static org.onlab.onos.net.MastershipRole.MASTER;
61import static org.slf4j.LoggerFactory.getLogger;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080062
tom4a5d1712014-09-23 17:49:39 -070063@Component(immediate = true)
64@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070065public class MastershipManager
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080066 implements MastershipService, MastershipAdminService, MastershipTermService,
67 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070068
69 private static final String NODE_ID_NULL = "Node ID cannot be null";
70 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
71 private static final String ROLE_NULL = "Mastership role cannot be null";
72
73 private final Logger log = getLogger(getClass());
74
75 protected final AbstractListenerRegistry<MastershipEvent, MastershipListener>
alshabib339a3d92014-09-26 17:54:32 -070076 listenerRegistry = new AbstractListenerRegistry<>();
77
78 private final MastershipStoreDelegate delegate = new InternalDelegate();
Ayaka Koshibe16609692014-09-23 12:46:15 -070079
80 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
81 protected MastershipStore store;
82
83 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
84 protected EventDeliveryService eventDispatcher;
85
86 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -070087 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070088
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080089 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
90 protected MetricsService metricsService;
91
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070092 private ClusterEventListener clusterListener = new InternalClusterEventListener();
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080093 private Timer requestRoleTimer;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070094
Ayaka Koshibe16609692014-09-23 12:46:15 -070095 @Activate
96 public void activate() {
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080097 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
98
Ayaka Koshibe16609692014-09-23 12:46:15 -070099 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700100 clusterService.addListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700101 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700102 log.info("Started");
103 }
104
105 @Deactivate
106 public void deactivate() {
107 eventDispatcher.removeSink(MastershipEvent.class);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700108 clusterService.removeListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700109 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700110 log.info("Stopped");
111 }
112
Ayaka Koshibe16609692014-09-23 12:46:15 -0700113 @Override
114 public void setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
115 checkNotNull(nodeId, NODE_ID_NULL);
116 checkNotNull(deviceId, DEVICE_ID_NULL);
117 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700118
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700119 MastershipEvent event = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700120
121 switch (role) {
122 case MASTER:
123 event = store.setMaster(nodeId, deviceId);
124 break;
125 case STANDBY:
126 event = store.setStandby(nodeId, deviceId);
127 break;
128 case NONE:
129 event = store.relinquishRole(nodeId, deviceId);
130 break;
131 default:
132 log.info("Unknown role; ignoring");
133 return;
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700134 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700135
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700136 if (event != null) {
137 post(event);
Ayaka Koshibea7f044e2014-09-23 16:56:20 -0700138 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700139 }
140
141 @Override
tomb41d1ac2014-09-24 01:51:24 -0700142 public MastershipRole getLocalRole(DeviceId deviceId) {
143 checkNotNull(deviceId, DEVICE_ID_NULL);
144 return store.getRole(clusterService.getLocalNode().id(), deviceId);
145 }
146
147 @Override
148 public void relinquishMastership(DeviceId deviceId) {
Ayaka Koshibec4047702014-10-07 14:43:52 -0700149 MastershipEvent event = null;
Ayaka Koshibe1c292d72014-10-08 17:46:07 -0700150 event = store.relinquishRole(
151 clusterService.getLocalNode().id(), deviceId);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700152 if (event != null) {
153 post(event);
154 }
tomb41d1ac2014-09-24 01:51:24 -0700155 }
156
157 @Override
158 public MastershipRole requestRoleFor(DeviceId deviceId) {
159 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800160 final Context timer = startTimer(requestRoleTimer);
161 try {
162 return store.requestRole(deviceId);
163 } finally {
164 stopTimer(timer);
165 }
tomb41d1ac2014-09-24 01:51:24 -0700166 }
167
168 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700169 public NodeId getMasterFor(DeviceId deviceId) {
170 checkNotNull(deviceId, DEVICE_ID_NULL);
171 return store.getMaster(deviceId);
172 }
173
174 @Override
175 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
176 checkNotNull(nodeId, NODE_ID_NULL);
177 return store.getDevices(nodeId);
178 }
179
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700180 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700181 public RoleInfo getNodesFor(DeviceId deviceId) {
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700182 checkNotNull(deviceId, DEVICE_ID_NULL);
183 return store.getNodes(deviceId);
184 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700185
186 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800187 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
188 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700189 }
190
Ayaka Koshibe16609692014-09-23 12:46:15 -0700191 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700192 public void addListener(MastershipListener listener) {
193 checkNotNull(listener);
194 listenerRegistry.addListener(listener);
195 }
196
197 @Override
198 public void removeListener(MastershipListener listener) {
199 checkNotNull(listener);
200 listenerRegistry.removeListener(listener);
201 }
202
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800203 @Override
204 public MetricsService metricsService() {
205 return metricsService;
206 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700207
Thomas Vachuska1e68bdd2014-11-29 13:53:10 -0800208 @Override
209 public void balanceRoles() {
210 List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
211 Multimap<ControllerNode, DeviceId> controllerDevices = HashMultimap.create();
212 int deviceCount = 0;
213
214 // Create buckets reflecting current ownership.
215 for (ControllerNode node : nodes) {
216 Set<DeviceId> devicesOf = getDevicesOf(node.id());
217 deviceCount += devicesOf.size();
218 controllerDevices.putAll(node, devicesOf);
219 log.info("Node {} has {} devices.", node.id(), devicesOf.size());
220 }
221
222 int rounds = nodes.size();
223 for (int i = 0; i < rounds; i++) {
224 // Iterate over the buckets and find the smallest and the largest.
225 ControllerNode smallest = findBucket(true, nodes, controllerDevices);
226 ControllerNode largest = findBucket(false, nodes, controllerDevices);
227 balanceBuckets(smallest, largest, controllerDevices, deviceCount);
228 }
229 }
230
231 private ControllerNode findBucket(boolean min, Collection<ControllerNode> nodes,
232 Multimap<ControllerNode, DeviceId> controllerDevices) {
233 int xSize = min ? Integer.MAX_VALUE : -1;
234 ControllerNode xNode = null;
235 for (ControllerNode node : nodes) {
236 int size = controllerDevices.get(node).size();
237 if ((min && size < xSize) || (!min && size > xSize)) {
238 xSize = size;
239 xNode = node;
240 }
241 }
242 return xNode;
243 }
244
245 private void balanceBuckets(ControllerNode smallest, ControllerNode largest,
246 Multimap<ControllerNode, DeviceId> controllerDevices,
247 int deviceCount) {
248 Collection<DeviceId> minBucket = controllerDevices.get(smallest);
249 Collection<DeviceId> maxBucket = controllerDevices.get(largest);
250 int bucketCount = controllerDevices.keySet().size();
251
252 int delta = (maxBucket.size() - minBucket.size()) / 2;
253 delta = Math.min(deviceCount / bucketCount, delta);
254
255 if (delta > 0) {
256 log.info("Attempting to move {} nodes from {} to {}...", delta,
257 largest.id(), smallest.id());
258
259 int i = 0;
260 Iterator<DeviceId> it = maxBucket.iterator();
261 while (it.hasNext() && i < delta) {
262 DeviceId deviceId = it.next();
263 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
264 setRole(smallest.id(), deviceId, MASTER);
265 controllerDevices.put(smallest, deviceId);
266 it.remove();
267 i++;
268 }
269 }
270 }
271
272
Ayaka Koshibe16609692014-09-23 12:46:15 -0700273 // Posts the specified event to the local event dispatcher.
274 private void post(MastershipEvent event) {
275 if (event != null && eventDispatcher != null) {
276 eventDispatcher.post(event);
277 }
278 }
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -0700279
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700280 //callback for reacting to cluster events
281 private class InternalClusterEventListener implements ClusterEventListener {
282
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700283 // A notion of a local maximum cluster size, used to tie-break.
284 // Think of a better way to do this.
285 private AtomicInteger clusterSize;
286
287 InternalClusterEventListener() {
288 clusterSize = new AtomicInteger(0);
289 }
290
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700291 @Override
292 public void event(ClusterEvent event) {
293 switch (event.type()) {
294 //FIXME: worry about addition when the time comes
295 case INSTANCE_ADDED:
296 case INSTANCE_ACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700297 clusterSize.incrementAndGet();
298 log.info("instance {} added/activated", event.subject());
299 break;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700300 case INSTANCE_REMOVED:
301 case INSTANCE_DEACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700302 ControllerNode node = event.subject();
303
304 if (node.equals(clusterService.getLocalNode())) {
305 //If we are in smaller cluster, relinquish and return
306 for (DeviceId device : getDevicesOf(node.id())) {
307 if (!isInMajority()) {
308 //own DeviceManager should catch event and tell switch
309 store.relinquishRole(node.id(), device);
310 }
311 }
312 log.info("broke off from cluster, relinquished devices");
313 break;
314 }
315
316 // if we are the larger one and the removed node(s) are brain dead,
317 // force relinquish on behalf of disabled node.
318 // check network channel to do this?
319 for (DeviceId device : getDevicesOf(node.id())) {
320 //some things to check:
321 // 1. we didn't break off as well while we're at it
322 // 2. others don't pile in and try too - maybe a lock
323 if (isInMajority()) {
324 store.relinquishRole(node.id(), device);
325 }
326 }
327 clusterSize.decrementAndGet();
328 log.info("instance {} removed/deactivated", event.subject());
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700329 break;
330 default:
331 log.warn("unknown cluster event {}", event);
332 }
333 }
334
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700335 private boolean isInMajority() {
336 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
337 return true;
338 }
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700339// else {
Ayaka Koshibe67af1f42014-10-20 15:26:37 -0700340 //FIXME: break tie for equal-sized clusters, by number of
341 // connected switches, then masters, then nodeId hash
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700342 // problem is, how do we get at channel info cleanly here?
343 // Also, what's the time hit for a distributed store look-up
344 // versus channel re-negotiation? bet on the latter being worse.
345
346// }
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700347 return false;
348 }
349
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700350 }
Ayaka Koshibe65efaef2014-09-29 18:21:56 -0700351
alshabib339a3d92014-09-26 17:54:32 -0700352 public class InternalDelegate implements MastershipStoreDelegate {
353
354 @Override
355 public void notify(MastershipEvent event) {
Yuta HIGUCHI9e11ac02014-11-12 10:09:49 -0800356 log.trace("dispatching mastership event {}", event);
alshabib339a3d92014-09-26 17:54:32 -0700357 eventDispatcher.post(event);
358 }
359
360 }
361
Ayaka Koshibe16609692014-09-23 12:46:15 -0700362}