blob: 66509b6c3249b1863d6172e333ea38a3e8ca4cb2 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Ayaka Koshibe16609692014-09-23 12:46:15 -070016package org.onlab.onos.cluster.impl;
17
alshabib339a3d92014-09-26 17:54:32 -070018import static com.google.common.base.Preconditions.checkNotNull;
19import static org.slf4j.LoggerFactory.getLogger;
20
21import java.util.Set;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070022import java.util.concurrent.atomic.AtomicInteger;
alshabib339a3d92014-09-26 17:54:32 -070023
Ayaka Koshibe16609692014-09-23 12:46:15 -070024import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070025import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070026import org.apache.felix.scr.annotations.Deactivate;
27import org.apache.felix.scr.annotations.Reference;
28import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070029import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080030import org.onlab.metrics.MetricsComponent;
31import org.onlab.metrics.MetricsFeature;
32import org.onlab.metrics.MetricsService;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070033import org.onlab.onos.cluster.ClusterEvent;
34import org.onlab.onos.cluster.ClusterEventListener;
tom4a5d1712014-09-23 17:49:39 -070035import org.onlab.onos.cluster.ClusterService;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070036import org.onlab.onos.cluster.ControllerNode;
Ayaka Koshibe16609692014-09-23 12:46:15 -070037import org.onlab.onos.cluster.NodeId;
Ayaka Koshibeabedb092014-10-20 17:01:31 -070038import org.onlab.onos.cluster.RoleInfo;
Ayaka Koshibe16609692014-09-23 12:46:15 -070039import org.onlab.onos.event.AbstractListenerRegistry;
40import org.onlab.onos.event.EventDeliveryService;
Yuta HIGUCHI80912e62014-10-12 00:15:47 -070041import org.onlab.onos.mastership.MastershipAdminService;
42import org.onlab.onos.mastership.MastershipEvent;
43import org.onlab.onos.mastership.MastershipListener;
44import org.onlab.onos.mastership.MastershipService;
45import org.onlab.onos.mastership.MastershipStore;
46import org.onlab.onos.mastership.MastershipStoreDelegate;
47import org.onlab.onos.mastership.MastershipTerm;
48import org.onlab.onos.mastership.MastershipTermService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070049import org.onlab.onos.net.DeviceId;
50import org.onlab.onos.net.MastershipRole;
Ayaka Koshibe16609692014-09-23 12:46:15 -070051import org.slf4j.Logger;
52
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080053import com.codahale.metrics.Timer;
54import com.codahale.metrics.Timer.Context;
55
tom4a5d1712014-09-23 17:49:39 -070056@Component(immediate = true)
57@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070058public class MastershipManager
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -080059 implements MastershipService, MastershipAdminService, MastershipTermService {
Ayaka Koshibe16609692014-09-23 12:46:15 -070060
61 private static final String NODE_ID_NULL = "Node ID cannot be null";
62 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
63 private static final String ROLE_NULL = "Mastership role cannot be null";
64
65 private final Logger log = getLogger(getClass());
66
67 protected final AbstractListenerRegistry<MastershipEvent, MastershipListener>
alshabib339a3d92014-09-26 17:54:32 -070068 listenerRegistry = new AbstractListenerRegistry<>();
69
70 private final MastershipStoreDelegate delegate = new InternalDelegate();
Ayaka Koshibe16609692014-09-23 12:46:15 -070071
72 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
73 protected MastershipStore store;
74
75 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
76 protected EventDeliveryService eventDispatcher;
77
78 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -070079 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070080
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080081 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
82 protected MetricsService metricsService;
83
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070084 private ClusterEventListener clusterListener = new InternalClusterEventListener();
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080085 private Timer requestRoleTimer;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070086
Ayaka Koshibe16609692014-09-23 12:46:15 -070087 @Activate
88 public void activate() {
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080089 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
90
Ayaka Koshibe16609692014-09-23 12:46:15 -070091 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070092 clusterService.addListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -070093 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -070094 log.info("Started");
95 }
96
97 @Deactivate
98 public void deactivate() {
99 eventDispatcher.removeSink(MastershipEvent.class);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700100 clusterService.removeListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700101 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700102 log.info("Stopped");
103 }
104
Ayaka Koshibe16609692014-09-23 12:46:15 -0700105 @Override
106 public void setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
107 checkNotNull(nodeId, NODE_ID_NULL);
108 checkNotNull(deviceId, DEVICE_ID_NULL);
109 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700110
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700111 MastershipEvent event = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700112
113 switch (role) {
114 case MASTER:
115 event = store.setMaster(nodeId, deviceId);
116 break;
117 case STANDBY:
118 event = store.setStandby(nodeId, deviceId);
119 break;
120 case NONE:
121 event = store.relinquishRole(nodeId, deviceId);
122 break;
123 default:
124 log.info("Unknown role; ignoring");
125 return;
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700126 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700127
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700128 if (event != null) {
129 post(event);
Ayaka Koshibea7f044e2014-09-23 16:56:20 -0700130 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700131 }
132
133 @Override
tomb41d1ac2014-09-24 01:51:24 -0700134 public MastershipRole getLocalRole(DeviceId deviceId) {
135 checkNotNull(deviceId, DEVICE_ID_NULL);
136 return store.getRole(clusterService.getLocalNode().id(), deviceId);
137 }
138
139 @Override
140 public void relinquishMastership(DeviceId deviceId) {
Ayaka Koshibec4047702014-10-07 14:43:52 -0700141 MastershipEvent event = null;
Ayaka Koshibe1c292d72014-10-08 17:46:07 -0700142 event = store.relinquishRole(
143 clusterService.getLocalNode().id(), deviceId);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700144 if (event != null) {
145 post(event);
146 }
tomb41d1ac2014-09-24 01:51:24 -0700147 }
148
149 @Override
150 public MastershipRole requestRoleFor(DeviceId deviceId) {
151 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800152 final Context timer = startTimer(requestRoleTimer);
153 try {
154 return store.requestRole(deviceId);
155 } finally {
156 stopTimer(timer);
157 }
tomb41d1ac2014-09-24 01:51:24 -0700158 }
159
160 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700161 public NodeId getMasterFor(DeviceId deviceId) {
162 checkNotNull(deviceId, DEVICE_ID_NULL);
163 return store.getMaster(deviceId);
164 }
165
166 @Override
167 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
168 checkNotNull(nodeId, NODE_ID_NULL);
169 return store.getDevices(nodeId);
170 }
171
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700172 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700173 public RoleInfo getNodesFor(DeviceId deviceId) {
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700174 checkNotNull(deviceId, DEVICE_ID_NULL);
175 return store.getNodes(deviceId);
176 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700177
178 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800179 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
180 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700181 }
182
Ayaka Koshibe16609692014-09-23 12:46:15 -0700183 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700184 public void addListener(MastershipListener listener) {
185 checkNotNull(listener);
186 listenerRegistry.addListener(listener);
187 }
188
189 @Override
190 public void removeListener(MastershipListener listener) {
191 checkNotNull(listener);
192 listenerRegistry.removeListener(listener);
193 }
194
tomb41d1ac2014-09-24 01:51:24 -0700195 // FIXME: provide wiring to allow events to be triggered by changes within the store
Ayaka Koshibe16609692014-09-23 12:46:15 -0700196
197 // Posts the specified event to the local event dispatcher.
198 private void post(MastershipEvent event) {
199 if (event != null && eventDispatcher != null) {
200 eventDispatcher.post(event);
201 }
202 }
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -0700203
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700204
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700205
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800206 private Timer createTimer(String component, String feature, String name) {
207 if (metricsService != null) {
208 MetricsComponent c = metricsService.registerComponent(component);
209 MetricsFeature f = c.registerFeature(feature);
210 return metricsService.createTimer(c, f, name);
211 }
212 return null;
213 }
214
215 private static final Context startTimer(Timer timer) {
216 if (timer != null) {
217 return timer.time();
218 }
219 return null;
220 }
221
222 private static final void stopTimer(Context context) {
223 if (context != null) {
224 context.stop();
225 }
226 }
227
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700228 //callback for reacting to cluster events
229 private class InternalClusterEventListener implements ClusterEventListener {
230
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700231 // A notion of a local maximum cluster size, used to tie-break.
232 // Think of a better way to do this.
233 private AtomicInteger clusterSize;
234
235 InternalClusterEventListener() {
236 clusterSize = new AtomicInteger(0);
237 }
238
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700239 @Override
240 public void event(ClusterEvent event) {
241 switch (event.type()) {
242 //FIXME: worry about addition when the time comes
243 case INSTANCE_ADDED:
244 case INSTANCE_ACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700245 clusterSize.incrementAndGet();
246 log.info("instance {} added/activated", event.subject());
247 break;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700248 case INSTANCE_REMOVED:
249 case INSTANCE_DEACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700250 ControllerNode node = event.subject();
251
252 if (node.equals(clusterService.getLocalNode())) {
253 //If we are in smaller cluster, relinquish and return
254 for (DeviceId device : getDevicesOf(node.id())) {
255 if (!isInMajority()) {
256 //own DeviceManager should catch event and tell switch
257 store.relinquishRole(node.id(), device);
258 }
259 }
260 log.info("broke off from cluster, relinquished devices");
261 break;
262 }
263
264 // if we are the larger one and the removed node(s) are brain dead,
265 // force relinquish on behalf of disabled node.
266 // check network channel to do this?
267 for (DeviceId device : getDevicesOf(node.id())) {
268 //some things to check:
269 // 1. we didn't break off as well while we're at it
270 // 2. others don't pile in and try too - maybe a lock
271 if (isInMajority()) {
272 store.relinquishRole(node.id(), device);
273 }
274 }
275 clusterSize.decrementAndGet();
276 log.info("instance {} removed/deactivated", event.subject());
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700277 break;
278 default:
279 log.warn("unknown cluster event {}", event);
280 }
281 }
282
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700283 private boolean isInMajority() {
284 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
285 return true;
286 }
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700287// else {
Ayaka Koshibe67af1f42014-10-20 15:26:37 -0700288 //FIXME: break tie for equal-sized clusters, by number of
289 // connected switches, then masters, then nodeId hash
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700290 // problem is, how do we get at channel info cleanly here?
291 // Also, what's the time hit for a distributed store look-up
292 // versus channel re-negotiation? bet on the latter being worse.
293
294// }
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700295 return false;
296 }
297
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700298 }
Ayaka Koshibe65efaef2014-09-29 18:21:56 -0700299
alshabib339a3d92014-09-26 17:54:32 -0700300 public class InternalDelegate implements MastershipStoreDelegate {
301
302 @Override
303 public void notify(MastershipEvent event) {
Yuta HIGUCHI9e11ac02014-11-12 10:09:49 -0800304 log.trace("dispatching mastership event {}", event);
alshabib339a3d92014-09-26 17:54:32 -0700305 eventDispatcher.post(event);
306 }
307
308 }
309
Ayaka Koshibe16609692014-09-23 12:46:15 -0700310}