blob: 4f993c425fdcc30330e45f5370954169cb83c739 [file] [log] [blame]
Thomas Vachuska4f1a60c2014-10-28 13:39:07 -07001/*
2 * Copyright 2014 Open Networking Laboratory
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Ayaka Koshibe16609692014-09-23 12:46:15 -070016package org.onlab.onos.cluster.impl;
17
alshabib339a3d92014-09-26 17:54:32 -070018import static com.google.common.base.Preconditions.checkNotNull;
19import static org.slf4j.LoggerFactory.getLogger;
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080020import static org.onlab.metrics.MetricsUtil.*;
alshabib339a3d92014-09-26 17:54:32 -070021
22import java.util.Set;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070023import java.util.concurrent.atomic.AtomicInteger;
alshabib339a3d92014-09-26 17:54:32 -070024
Ayaka Koshibe16609692014-09-23 12:46:15 -070025import org.apache.felix.scr.annotations.Activate;
tom4a5d1712014-09-23 17:49:39 -070026import org.apache.felix.scr.annotations.Component;
Ayaka Koshibe16609692014-09-23 12:46:15 -070027import org.apache.felix.scr.annotations.Deactivate;
28import org.apache.felix.scr.annotations.Reference;
29import org.apache.felix.scr.annotations.ReferenceCardinality;
tom4a5d1712014-09-23 17:49:39 -070030import org.apache.felix.scr.annotations.Service;
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080031import org.onlab.metrics.MetricsService;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070032import org.onlab.onos.cluster.ClusterEvent;
33import org.onlab.onos.cluster.ClusterEventListener;
tom4a5d1712014-09-23 17:49:39 -070034import org.onlab.onos.cluster.ClusterService;
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -070035import org.onlab.onos.cluster.ControllerNode;
Ayaka Koshibe16609692014-09-23 12:46:15 -070036import org.onlab.onos.cluster.NodeId;
Ayaka Koshibeabedb092014-10-20 17:01:31 -070037import org.onlab.onos.cluster.RoleInfo;
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080038import org.onlab.onos.core.MetricsHelper;
Ayaka Koshibe16609692014-09-23 12:46:15 -070039import org.onlab.onos.event.AbstractListenerRegistry;
40import org.onlab.onos.event.EventDeliveryService;
Yuta HIGUCHI80912e62014-10-12 00:15:47 -070041import org.onlab.onos.mastership.MastershipAdminService;
42import org.onlab.onos.mastership.MastershipEvent;
43import org.onlab.onos.mastership.MastershipListener;
44import org.onlab.onos.mastership.MastershipService;
45import org.onlab.onos.mastership.MastershipStore;
46import org.onlab.onos.mastership.MastershipStoreDelegate;
47import org.onlab.onos.mastership.MastershipTerm;
48import org.onlab.onos.mastership.MastershipTermService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070049import org.onlab.onos.net.DeviceId;
50import org.onlab.onos.net.MastershipRole;
Ayaka Koshibe16609692014-09-23 12:46:15 -070051import org.slf4j.Logger;
52
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080053import com.codahale.metrics.Timer;
54import com.codahale.metrics.Timer.Context;
55
tom4a5d1712014-09-23 17:49:39 -070056@Component(immediate = true)
57@Service
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -070058public class MastershipManager
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -080059 implements MastershipService, MastershipAdminService, MastershipTermService,
60 MetricsHelper {
Ayaka Koshibe16609692014-09-23 12:46:15 -070061
62 private static final String NODE_ID_NULL = "Node ID cannot be null";
63 private static final String DEVICE_ID_NULL = "Device ID cannot be null";
64 private static final String ROLE_NULL = "Mastership role cannot be null";
65
66 private final Logger log = getLogger(getClass());
67
68 protected final AbstractListenerRegistry<MastershipEvent, MastershipListener>
alshabib339a3d92014-09-26 17:54:32 -070069 listenerRegistry = new AbstractListenerRegistry<>();
70
71 private final MastershipStoreDelegate delegate = new InternalDelegate();
Ayaka Koshibe16609692014-09-23 12:46:15 -070072
73 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
74 protected MastershipStore store;
75
76 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
77 protected EventDeliveryService eventDispatcher;
78
79 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
tom4a5d1712014-09-23 17:49:39 -070080 protected ClusterService clusterService;
Ayaka Koshibe16609692014-09-23 12:46:15 -070081
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080082 @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
83 protected MetricsService metricsService;
84
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070085 private ClusterEventListener clusterListener = new InternalClusterEventListener();
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080086 private Timer requestRoleTimer;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070087
Ayaka Koshibe16609692014-09-23 12:46:15 -070088 @Activate
89 public void activate() {
Yuta HIGUCHI6a462832014-11-23 23:56:03 -080090 requestRoleTimer = createTimer("Mastership", "requestRole", "responseTime");
91
Ayaka Koshibe16609692014-09-23 12:46:15 -070092 eventDispatcher.addSink(MastershipEvent.class, listenerRegistry);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -070093 clusterService.addListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -070094 store.setDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -070095 log.info("Started");
96 }
97
98 @Deactivate
99 public void deactivate() {
100 eventDispatcher.removeSink(MastershipEvent.class);
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700101 clusterService.removeListener(clusterListener);
alshabib339a3d92014-09-26 17:54:32 -0700102 store.unsetDelegate(delegate);
Ayaka Koshibe16609692014-09-23 12:46:15 -0700103 log.info("Stopped");
104 }
105
Ayaka Koshibe16609692014-09-23 12:46:15 -0700106 @Override
107 public void setRole(NodeId nodeId, DeviceId deviceId, MastershipRole role) {
108 checkNotNull(nodeId, NODE_ID_NULL);
109 checkNotNull(deviceId, DEVICE_ID_NULL);
110 checkNotNull(role, ROLE_NULL);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700111
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700112 MastershipEvent event = null;
Ayaka Koshibee60d4522014-10-28 15:07:00 -0700113
114 switch (role) {
115 case MASTER:
116 event = store.setMaster(nodeId, deviceId);
117 break;
118 case STANDBY:
119 event = store.setStandby(nodeId, deviceId);
120 break;
121 case NONE:
122 event = store.relinquishRole(nodeId, deviceId);
123 break;
124 default:
125 log.info("Unknown role; ignoring");
126 return;
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700127 }
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700128
Ayaka Koshibe971a38a2014-09-30 11:56:23 -0700129 if (event != null) {
130 post(event);
Ayaka Koshibea7f044e2014-09-23 16:56:20 -0700131 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700132 }
133
134 @Override
tomb41d1ac2014-09-24 01:51:24 -0700135 public MastershipRole getLocalRole(DeviceId deviceId) {
136 checkNotNull(deviceId, DEVICE_ID_NULL);
137 return store.getRole(clusterService.getLocalNode().id(), deviceId);
138 }
139
140 @Override
141 public void relinquishMastership(DeviceId deviceId) {
Ayaka Koshibec4047702014-10-07 14:43:52 -0700142 MastershipEvent event = null;
Ayaka Koshibe1c292d72014-10-08 17:46:07 -0700143 event = store.relinquishRole(
144 clusterService.getLocalNode().id(), deviceId);
Ayaka Koshibed9f693e2014-09-29 18:04:54 -0700145 if (event != null) {
146 post(event);
147 }
tomb41d1ac2014-09-24 01:51:24 -0700148 }
149
150 @Override
151 public MastershipRole requestRoleFor(DeviceId deviceId) {
152 checkNotNull(deviceId, DEVICE_ID_NULL);
Yuta HIGUCHI6a462832014-11-23 23:56:03 -0800153 final Context timer = startTimer(requestRoleTimer);
154 try {
155 return store.requestRole(deviceId);
156 } finally {
157 stopTimer(timer);
158 }
tomb41d1ac2014-09-24 01:51:24 -0700159 }
160
161 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700162 public NodeId getMasterFor(DeviceId deviceId) {
163 checkNotNull(deviceId, DEVICE_ID_NULL);
164 return store.getMaster(deviceId);
165 }
166
167 @Override
168 public Set<DeviceId> getDevicesOf(NodeId nodeId) {
169 checkNotNull(nodeId, NODE_ID_NULL);
170 return store.getDevices(nodeId);
171 }
172
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700173 @Override
Ayaka Koshibeabedb092014-10-20 17:01:31 -0700174 public RoleInfo getNodesFor(DeviceId deviceId) {
Ayaka Koshibe45503ce2014-10-14 11:26:45 -0700175 checkNotNull(deviceId, DEVICE_ID_NULL);
176 return store.getNodes(deviceId);
177 }
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700178
179 @Override
Yuta HIGUCHIbcac4992014-11-22 19:27:57 -0800180 public MastershipTerm getMastershipTerm(DeviceId deviceId) {
181 return store.getTermFor(deviceId);
Ayaka Koshibeb70d34b2014-09-25 15:43:01 -0700182 }
183
Ayaka Koshibe16609692014-09-23 12:46:15 -0700184 @Override
Ayaka Koshibe16609692014-09-23 12:46:15 -0700185 public void addListener(MastershipListener listener) {
186 checkNotNull(listener);
187 listenerRegistry.addListener(listener);
188 }
189
190 @Override
191 public void removeListener(MastershipListener listener) {
192 checkNotNull(listener);
193 listenerRegistry.removeListener(listener);
194 }
195
Yuta HIGUCHIa22f69f2014-11-24 22:25:17 -0800196 @Override
197 public MetricsService metricsService() {
198 return metricsService;
199 }
Ayaka Koshibe16609692014-09-23 12:46:15 -0700200
201 // Posts the specified event to the local event dispatcher.
202 private void post(MastershipEvent event) {
203 if (event != null && eventDispatcher != null) {
204 eventDispatcher.post(event);
205 }
206 }
Ayaka Koshibe3eed2b02014-09-23 13:28:05 -0700207
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700208 //callback for reacting to cluster events
209 private class InternalClusterEventListener implements ClusterEventListener {
210
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700211 // A notion of a local maximum cluster size, used to tie-break.
212 // Think of a better way to do this.
213 private AtomicInteger clusterSize;
214
215 InternalClusterEventListener() {
216 clusterSize = new AtomicInteger(0);
217 }
218
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700219 @Override
220 public void event(ClusterEvent event) {
221 switch (event.type()) {
222 //FIXME: worry about addition when the time comes
223 case INSTANCE_ADDED:
224 case INSTANCE_ACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700225 clusterSize.incrementAndGet();
226 log.info("instance {} added/activated", event.subject());
227 break;
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700228 case INSTANCE_REMOVED:
229 case INSTANCE_DEACTIVATED:
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700230 ControllerNode node = event.subject();
231
232 if (node.equals(clusterService.getLocalNode())) {
233 //If we are in smaller cluster, relinquish and return
234 for (DeviceId device : getDevicesOf(node.id())) {
235 if (!isInMajority()) {
236 //own DeviceManager should catch event and tell switch
237 store.relinquishRole(node.id(), device);
238 }
239 }
240 log.info("broke off from cluster, relinquished devices");
241 break;
242 }
243
244 // if we are the larger one and the removed node(s) are brain dead,
245 // force relinquish on behalf of disabled node.
246 // check network channel to do this?
247 for (DeviceId device : getDevicesOf(node.id())) {
248 //some things to check:
249 // 1. we didn't break off as well while we're at it
250 // 2. others don't pile in and try too - maybe a lock
251 if (isInMajority()) {
252 store.relinquishRole(node.id(), device);
253 }
254 }
255 clusterSize.decrementAndGet();
256 log.info("instance {} removed/deactivated", event.subject());
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700257 break;
258 default:
259 log.warn("unknown cluster event {}", event);
260 }
261 }
262
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700263 private boolean isInMajority() {
264 if (clusterService.getNodes().size() > (clusterSize.intValue() / 2)) {
265 return true;
266 }
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700267// else {
Ayaka Koshibe67af1f42014-10-20 15:26:37 -0700268 //FIXME: break tie for equal-sized clusters, by number of
269 // connected switches, then masters, then nodeId hash
Ayaka Koshibea7384a82014-10-22 18:59:06 -0700270 // problem is, how do we get at channel info cleanly here?
271 // Also, what's the time hit for a distributed store look-up
272 // versus channel re-negotiation? bet on the latter being worse.
273
274// }
Ayaka Koshibeea5b4ce2014-10-11 14:17:17 -0700275 return false;
276 }
277
Ayaka Koshibe3de43ca2014-09-26 16:40:23 -0700278 }
Ayaka Koshibe65efaef2014-09-29 18:21:56 -0700279
alshabib339a3d92014-09-26 17:54:32 -0700280 public class InternalDelegate implements MastershipStoreDelegate {
281
282 @Override
283 public void notify(MastershipEvent event) {
Yuta HIGUCHI9e11ac02014-11-12 10:09:49 -0800284 log.trace("dispatching mastership event {}", event);
alshabib339a3d92014-09-26 17:54:32 -0700285 eventDispatcher.post(event);
286 }
287
288 }
289
Ayaka Koshibe16609692014-09-23 12:46:15 -0700290}