blob: 78f60b8c9519c3f485cc48da757cec93fb27bad0 [file] [log] [blame]
Jonathan Hartdeda0ba2014-04-03 11:14:12 -07001package net.onrc.onos.core.registry;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08002
Jonathan Hartbd181b62013-02-17 16:05:38 -08003import java.io.IOException;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08004import java.util.ArrayList;
5import java.util.Collection;
Jonathan Hart3d7730a2013-02-22 11:51:17 -08006import java.util.Collections;
Jonathan Hart599c6b32013-03-24 22:42:02 -07007import java.util.Comparator;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08008import java.util.HashMap;
Jonathan Hartedd6a442013-02-20 15:22:06 -08009import java.util.List;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080010import java.util.Map;
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070011import java.util.Random;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070012import java.util.concurrent.BlockingQueue;
Jonathan Hart89187372013-03-14 16:41:09 -070013import java.util.concurrent.ConcurrentHashMap;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070014import java.util.concurrent.ExecutorService;
15import java.util.concurrent.Executors;
16import java.util.concurrent.LinkedBlockingQueue;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080017
Pavlin Radoslavovc35229e2014-02-06 16:19:37 -080018import net.floodlightcontroller.core.IFloodlightProviderService;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080019import net.floodlightcontroller.core.module.FloodlightModuleContext;
20import net.floodlightcontroller.core.module.FloodlightModuleException;
21import net.floodlightcontroller.core.module.IFloodlightModule;
22import net.floodlightcontroller.core.module.IFloodlightService;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080023import net.floodlightcontroller.restserver.IRestApiService;
Jonathan Hartdeda0ba2014-04-03 11:14:12 -070024import net.onrc.onos.core.registry.web.RegistryWebRoutable;
Sho SHIMIZUfc932d52014-08-15 11:22:37 -070025import net.onrc.onos.core.util.IdBlock;
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -070026import net.onrc.onos.core.util.OnosInstanceId;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080027
Jonathan Hart12a26aa2014-06-04 14:33:09 -070028import org.apache.commons.lang.NotImplementedException;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070029import org.apache.curator.RetryPolicy;
30import org.apache.curator.framework.CuratorFramework;
31import org.apache.curator.framework.CuratorFrameworkFactory;
32import org.apache.curator.framework.recipes.atomic.AtomicValue;
33import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
34import org.apache.curator.framework.recipes.cache.ChildData;
35import org.apache.curator.framework.recipes.cache.PathChildrenCache;
36import org.apache.curator.framework.recipes.cache.PathChildrenCache.StartMode;
37import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
38import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
39import org.apache.curator.framework.recipes.leader.LeaderLatch;
40import org.apache.curator.framework.recipes.leader.LeaderLatchListener;
41import org.apache.curator.framework.recipes.leader.Participant;
42import org.apache.curator.retry.ExponentialBackoffRetry;
43import org.apache.curator.retry.RetryOneTime;
44import org.apache.curator.x.discovery.ServiceCache;
45import org.apache.curator.x.discovery.ServiceDiscovery;
46import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
47import org.apache.curator.x.discovery.ServiceInstance;
Jonathan Hartc78b8f62014-08-07 22:31:09 -070048import org.projectfloodlight.openflow.util.HexString;
Jonathan Hartbd181b62013-02-17 16:05:38 -080049import org.slf4j.Logger;
50import org.slf4j.LoggerFactory;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080051
Jonathan Hartd10008d2013-02-23 17:04:08 -080052import com.google.common.base.Charsets;
Jonathan Hartbd181b62013-02-17 16:05:38 -080053
Jonathan Hart7bf62172013-02-28 13:17:18 -080054/**
55 * A registry service that uses Zookeeper. All data is stored in Zookeeper,
56 * so this can be used as a global registry in a multi-node ONOS cluster.
Jonathan Hart7bf62172013-02-28 13:17:18 -080057 */
Jonathan Hart1dbcce62014-06-04 15:21:45 -070058public class ZookeeperRegistry implements IFloodlightModule,
59 IControllerRegistryService {
Jonathan Hartc6eee9e2013-02-18 14:58:27 -080060
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -070061 private static final String DEFAULT_CONNECTION_STRING = "localhost:2181";
62
Ray Milkeyec838942014-04-09 11:28:43 -070063 private static final Logger log = LoggerFactory.getLogger(ZookeeperRegistry.class);
Jonathan Hart71c0ffc2013-03-24 15:58:42 -070064
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -070065 private OnosInstanceId onosInstanceId;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070066
67 private IRestApiService restApi;
Jonathan Hartbd181b62013-02-17 16:05:38 -080068
Jonathan Hart1dbcce62014-06-04 15:21:45 -070069 // This is the default. It is overwritten by the connectionString
70 // configuration parameter
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -070071 private String connectionString = DEFAULT_CONNECTION_STRING;
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080072
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -070073 /**
74 * JVM Option to specify ZooKeeper namespace.
75 */
76 public static final String ZK_NAMESPACE_KEY = "zookeeper.namespace";
77 private static final String DEFAULT_NAMESPACE = "onos";
78 private String namespace = DEFAULT_NAMESPACE;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070079 private static final String SWITCH_LATCHES_PATH = "/switches";
Ray Milkey2476cac2014-04-08 11:03:21 -070080 private static final String CLUSTER_LEADER_PATH = "/cluster/leader";
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080081
Jonathan Hart1dbcce62014-06-04 15:21:45 -070082 private static final String SERVICES_PATH = "/"; // i.e. the root of our namespace
Ray Milkey2476cac2014-04-08 11:03:21 -070083 private static final String CONTROLLER_SERVICE_NAME = "controllers";
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070084
Jonathan Hart12a26aa2014-06-04 14:33:09 -070085 private CuratorFramework curatorFrameworkClient;
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070086
Jonathan Hart12a26aa2014-06-04 14:33:09 -070087 private PathChildrenCache rootSwitchCache;
Ray Milkey269ffb92014-04-03 14:43:30 -070088
Jonathan Hart12a26aa2014-06-04 14:33:09 -070089 private ConcurrentHashMap<String, SwitchLeadershipData> switches;
90 private Map<String, PathChildrenCache> switchPathCaches;
Ray Milkey269ffb92014-04-03 14:43:30 -070091
Jonathan Hart12a26aa2014-06-04 14:33:09 -070092 private LeaderLatch clusterLeaderLatch;
93 private ClusterLeaderListener clusterLeaderListener;
Ray Milkey269ffb92014-04-03 14:43:30 -070094 private static final long CLUSTER_LEADER_ELECTION_RETRY_MS = 100;
95
Ray Milkey2476cac2014-04-08 11:03:21 -070096 private static final String ID_COUNTER_PATH = "/flowidcounter";
97 private static final Long ID_BLOCK_SIZE = 0x100000000L;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070098 private DistributedAtomicLong distributedIdCounter;
Ray Milkey269ffb92014-04-03 14:43:30 -070099
100 //Zookeeper performance-related configuration
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700101 private static final int SESSION_TIMEOUT = 7000; // ms
102 private static final int CONNECTION_TIMEOUT = 5000; // ms
Ray Milkey269ffb92014-04-03 14:43:30 -0700103
104 //
105 // Unique ID generation state
106 // TODO: The implementation must be updated to use the Zookeeper
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700107 // instead of a random generator.
Ray Milkey269ffb92014-04-03 14:43:30 -0700108 //
109 private static Random randomGenerator = new Random();
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700110 private static long nextUniqueIdPrefix;
Pavlin Radoslavov952a9762014-04-10 13:47:03 -0700111 // NOTE: The 0xffffffffL value is used by the Unique ID generator for
112 // initialization purpose.
113 private static long nextUniqueIdSuffix = 0xffffffffL;
Ray Milkey269ffb92014-04-03 14:43:30 -0700114
115 private final BlockingQueue<SwitchLeaderEvent> switchLeadershipEvents =
116 new LinkedBlockingQueue<SwitchLeaderEvent>();
117
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700118 /**
119 * Listens for changes to the switch znodes in Zookeeper. This maintains
120 * the second level of PathChildrenCaches that hold the controllers
121 * contending for each switch - there's one for each switch.
122 */
123 private PathChildrenCacheListener switchPathCacheListener =
124 new SwitchPathCacheListener();
125 private ServiceDiscovery<ControllerService> serviceDiscovery;
126 private ServiceCache<ControllerService> serviceCache;
127
Ray Milkey269ffb92014-04-03 14:43:30 -0700128
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700129 private static class SwitchLeaderEvent {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700130 private final long dpid;
131 private final boolean isLeader;
Ray Milkey269ffb92014-04-03 14:43:30 -0700132
133 public SwitchLeaderEvent(long dpid, boolean isLeader) {
134 this.dpid = dpid;
135 this.isLeader = isLeader;
136 }
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700137
138 public long getDpid() {
139 return dpid;
140 }
141
142 public boolean isLeader() {
143 return isLeader;
144 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700145 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700146
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700147 // Dispatcher thread for leadership change events coming from Curator
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700148 private void dispatchEvents() {
Ray Milkey269ffb92014-04-03 14:43:30 -0700149 while (!Thread.currentThread().isInterrupted()) {
150 try {
151 SwitchLeaderEvent event = switchLeadershipEvents.take();
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700152 SwitchLeadershipData swData =
153 switches.get(HexString.toHexString(event.getDpid()));
Ray Milkey269ffb92014-04-03 14:43:30 -0700154 if (swData == null) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700155 log.debug("Leadership data {} not found", event.getDpid());
Ray Milkey269ffb92014-04-03 14:43:30 -0700156 continue;
157 }
158
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700159 swData.getCallback().controlChanged(event.getDpid(), event.isLeader());
Ray Milkey269ffb92014-04-03 14:43:30 -0700160 } catch (InterruptedException e) {
161 Thread.currentThread().interrupt();
162 break;
163 } catch (Exception e) {
164 log.error("Exception in registry event thread", e);
165 }
166 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700167 }
Jonathan Hartbd181b62013-02-17 16:05:38 -0800168
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700169 class SwitchLeaderListener implements LeaderLatchListener {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700170 private final String dpid;
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700171
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700172 public SwitchLeaderListener(String dpid) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700173 this.dpid = dpid;
Ray Milkey269ffb92014-04-03 14:43:30 -0700174 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700175
Ray Milkey269ffb92014-04-03 14:43:30 -0700176 @Override
177 public void isLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700178 log.info("Became leader for {}", dpid);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800179
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700180 switchLeadershipEvents.add(
181 new SwitchLeaderEvent(HexString.toLong(dpid), true));
Ray Milkey269ffb92014-04-03 14:43:30 -0700182 }
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800183
Ray Milkey269ffb92014-04-03 14:43:30 -0700184 @Override
185 public void notLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700186 log.info("Lost leadership for {}", dpid);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800187
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700188 switchLeadershipEvents.add(
189 new SwitchLeaderEvent(HexString.toLong(dpid), false));
Ray Milkey269ffb92014-04-03 14:43:30 -0700190 }
191 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700192
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700193 class SwitchPathCacheListener implements PathChildrenCacheListener {
Ray Milkey269ffb92014-04-03 14:43:30 -0700194 @Override
195 public void childEvent(CuratorFramework client,
196 PathChildrenCacheEvent event) throws Exception {
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800197
Ray Milkey269ffb92014-04-03 14:43:30 -0700198 String strSwitch = null;
199 if (event.getData() != null) {
200 String[] splitted = event.getData().getPath().split("/");
201 strSwitch = splitted[splitted.length - 1];
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800202 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700203
204 switch (event.getType()) {
205 case CHILD_ADDED:
206 case CHILD_UPDATED:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700207 // Check we have a PathChildrenCache for this child
208 // and add one if not
Ray Milkey269ffb92014-04-03 14:43:30 -0700209 synchronized (switchPathCaches) {
210 if (switchPathCaches.get(strSwitch) == null) {
211 PathChildrenCache pc = new PathChildrenCache(client,
212 event.getData().getPath(), true);
213 pc.start(StartMode.NORMAL);
214 switchPathCaches.put(strSwitch, pc);
215 }
216 }
217 break;
218 case CHILD_REMOVED:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700219 // Remove our PathChildrenCache for this child
Ray Milkey269ffb92014-04-03 14:43:30 -0700220 PathChildrenCache pc = null;
221 synchronized (switchPathCaches) {
222 pc = switchPathCaches.remove(strSwitch);
223 }
224 if (pc != null) {
225 pc.close();
226 }
227 break;
228 default:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700229 // All other switchLeadershipEvents are connection status
230 // switchLeadershipEvents. We don't need to do anything as
231 // the path cache handles these on its own.
Ray Milkey269ffb92014-04-03 14:43:30 -0700232 break;
233 }
234
235 }
236 }
237
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700238 private static class ClusterLeaderListener implements LeaderLatchListener {
Ray Milkey269ffb92014-04-03 14:43:30 -0700239 //
240 // NOTE: If we need to support callbacks when the
241 // leadership changes, those should be called here.
242 //
243
244 @Override
245 public void isLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700246 log.info("Cluster leadership aquired");
Ray Milkey269ffb92014-04-03 14:43:30 -0700247 }
248
249 @Override
250 public void notLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700251 log.info("Cluster leadership lost");
Ray Milkey269ffb92014-04-03 14:43:30 -0700252 }
253 }
254
Ray Milkey269ffb92014-04-03 14:43:30 -0700255 @Override
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700256 public void requestControl(long dpid, ControlChangeCallback cb)
257 throws RegistryException {
Ray Milkey269ffb92014-04-03 14:43:30 -0700258 log.info("Requesting control for {}", HexString.toHexString(dpid));
259
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700260 if (onosInstanceId == null) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700261 throw new IllegalStateException("Must register a controller before"
262 + " calling requestControl");
Ray Milkey269ffb92014-04-03 14:43:30 -0700263 }
264
265 String dpidStr = HexString.toHexString(dpid);
Ray Milkey269ffb92014-04-03 14:43:30 -0700266
267 if (switches.get(dpidStr) != null) {
268 log.debug("Already contesting {}, returning", HexString.toHexString(dpid));
269 throw new RegistryException("Already contesting control for " + dpidStr);
270 }
271
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700272 String latchPath = SWITCH_LATCHES_PATH + "/" + dpidStr;
273
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700274 LeaderLatch latch =
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700275 new LeaderLatch(curatorFrameworkClient, latchPath,
276 onosInstanceId.toString());
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700277 SwitchLeaderListener listener = new SwitchLeaderListener(dpidStr);
Ray Milkey269ffb92014-04-03 14:43:30 -0700278 latch.addListener(listener);
279
280
281 SwitchLeadershipData swData = new SwitchLeadershipData(latch, cb, listener);
282 SwitchLeadershipData oldData = switches.putIfAbsent(dpidStr, swData);
283
284 if (oldData != null) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700285 // There was already data for that key in the map
286 // i.e. someone else got here first so we can't succeed
Ray Milkey269ffb92014-04-03 14:43:30 -0700287 log.debug("Already requested control for {}", dpidStr);
288 throw new RegistryException("Already requested control for " + dpidStr);
289 }
290
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700291 // Now that we know we were able to add our latch to the collection,
292 // we can start the leader election in Zookeeper. However I don't know
293 // how to handle if the start fails - the latch is already in our
294 // switches list.
295 // TODO seems like there's a Curator bug when latch.start is called when
296 // there's no Zookeeper connection which causes two znodes to be put in
297 // Zookeeper at the latch path when we reconnect to Zookeeper.
Ray Milkey269ffb92014-04-03 14:43:30 -0700298 try {
299 latch.start();
300 } catch (Exception e) {
301 log.warn("Error starting leader latch: {}", e.getMessage());
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700302 throw new RegistryException("Error starting leader latch for "
303 + dpidStr, e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700304 }
305
306 }
307
308 @Override
309 public void releaseControl(long dpid) {
310 log.info("Releasing control for {}", HexString.toHexString(dpid));
311
312 String dpidStr = HexString.toHexString(dpid);
313
314 SwitchLeadershipData swData = switches.remove(dpidStr);
315
316 if (swData == null) {
317 log.debug("Trying to release control of a switch we are not contesting");
318 return;
319 }
320
321 LeaderLatch latch = swData.getLatch();
322
323 latch.removeListener(swData.getListener());
324
325 try {
326 latch.close();
327 } catch (IOException e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700328 // I think it's OK not to do anything here. Either the node got
329 // deleted correctly, or the connection went down and the node got deleted.
Ray Milkey269ffb92014-04-03 14:43:30 -0700330 log.debug("releaseControl: caught IOException {}", dpidStr);
331 }
332 }
333
334 @Override
335 public boolean hasControl(long dpid) {
336 String dpidStr = HexString.toHexString(dpid);
337
338 SwitchLeadershipData swData = switches.get(dpidStr);
339
340 if (swData == null) {
341 log.warn("No leader latch for dpid {}", dpidStr);
342 return false;
343 }
344
345 return swData.getLatch().hasLeadership();
346 }
347
348 @Override
349 public boolean isClusterLeader() {
350 return clusterLeaderLatch.hasLeadership();
351 }
352
353 @Override
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700354 public OnosInstanceId getOnosInstanceId() {
355 return onosInstanceId;
Ray Milkey269ffb92014-04-03 14:43:30 -0700356 }
357
358 @Override
359 public Collection<String> getAllControllers() throws RegistryException {
360 log.debug("Getting all controllers");
361
362 List<String> controllers = new ArrayList<String>();
363 for (ServiceInstance<ControllerService> instance : serviceCache.getInstances()) {
364 String id = instance.getPayload().getControllerId();
365 if (!controllers.contains(id)) {
366 controllers.add(id);
367 }
368 }
369
370 return controllers;
371 }
372
373 @Override
374 public void registerController(String id) throws RegistryException {
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700375 if (onosInstanceId != null) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700376 throw new RegistryException(
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700377 "Controller already registered with id " + onosInstanceId);
Ray Milkey269ffb92014-04-03 14:43:30 -0700378 }
379
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700380 onosInstanceId = new OnosInstanceId(id);
Ray Milkey269ffb92014-04-03 14:43:30 -0700381
382 try {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700383 ServiceInstance<ControllerService> thisInstance =
384 ServiceInstance.<ControllerService>builder()
Ray Milkey269ffb92014-04-03 14:43:30 -0700385 .name(CONTROLLER_SERVICE_NAME)
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700386 .payload(new ControllerService(onosInstanceId.toString()))
Ray Milkey269ffb92014-04-03 14:43:30 -0700387 .build();
388
389 serviceDiscovery.registerService(thisInstance);
390 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700391 log.error("Exception starting service instance:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700392 }
393
394 }
395
396 @Override
397 public String getControllerForSwitch(long dpid) throws RegistryException {
398 String dpidStr = HexString.toHexString(dpid);
399
400 PathChildrenCache switchCache = switchPathCaches.get(dpidStr);
401
402 if (switchCache == null) {
403 log.warn("Tried to get controller for non-existent switch");
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800404 return null;
405 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700406
Ray Milkey269ffb92014-04-03 14:43:30 -0700407 try {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700408 // We've seen issues with these caches get stuck out of date, so
409 // we'll have to force them to refresh before each read. This slows
410 // down the method as it blocks on a Zookeeper query, however at
411 // the moment only the cleanup thread uses this and that isn't
412 // particularly time-sensitive.
413 // TODO verify if it is still the case that caches can be out of date
Ray Milkey269ffb92014-04-03 14:43:30 -0700414 switchCache.rebuild();
415 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700416 log.error("Exception rebuilding the switch cache:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700417 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700418
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700419 List<ChildData> sortedData =
420 new ArrayList<ChildData>(switchCache.getCurrentData());
Ray Milkey269ffb92014-04-03 14:43:30 -0700421
422 Collections.sort(
423 sortedData,
424 new Comparator<ChildData>() {
425 private String getSequenceNumber(String path) {
426 return path.substring(path.lastIndexOf('-') + 1);
427 }
428
429 @Override
430 public int compare(ChildData lhs, ChildData rhs) {
431 return getSequenceNumber(lhs.getPath()).
432 compareTo(getSequenceNumber(rhs.getPath()));
433 }
434 }
435 );
436
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700437 if (sortedData.isEmpty()) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700438 return null;
439 }
440
441 return new String(sortedData.get(0).getData(), Charsets.UTF_8);
442 }
443
444 @Override
Ray Milkey5df613b2014-04-15 10:50:56 -0700445 public Collection<Long> getSwitchesControlledByController(String controller) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700446 // TODO remove this if not needed
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700447 throw new NotImplementedException("Not yet implemented");
Ray Milkey269ffb92014-04-03 14:43:30 -0700448 }
449
450
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700451 // TODO what should happen when there's no ZK connection? Currently we just
452 // return the cache but this may lead to false impressions - i.e. we don't
453 // actually know what's in ZK so we shouldn't say we do
Ray Milkey269ffb92014-04-03 14:43:30 -0700454 @Override
455 public Map<String, List<ControllerRegistryEntry>> getAllSwitches() {
456 Map<String, List<ControllerRegistryEntry>> data =
457 new HashMap<String, List<ControllerRegistryEntry>>();
458
459 for (Map.Entry<String, PathChildrenCache> entry : switchPathCaches.entrySet()) {
460 List<ControllerRegistryEntry> contendingControllers =
461 new ArrayList<ControllerRegistryEntry>();
462
463 if (entry.getValue().getCurrentData().size() < 1) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700464 // TODO prevent even having the PathChildrenCache in this case
Ray Milkey269ffb92014-04-03 14:43:30 -0700465 continue;
466 }
467
468 for (ChildData d : entry.getValue().getCurrentData()) {
469
Ray Milkey5df613b2014-04-15 10:50:56 -0700470 String childsControllerId = new String(d.getData(), Charsets.UTF_8);
Ray Milkey269ffb92014-04-03 14:43:30 -0700471
472 String[] splitted = d.getPath().split("-");
473 int sequenceNumber = Integer.parseInt(splitted[splitted.length - 1]);
474
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700475 contendingControllers.add(new ControllerRegistryEntry(
476 childsControllerId, sequenceNumber));
Ray Milkey269ffb92014-04-03 14:43:30 -0700477 }
478
479 Collections.sort(contendingControllers);
480 data.put(entry.getKey(), contendingControllers);
481 }
482 return data;
483 }
484
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700485 @Override
Ray Milkey269ffb92014-04-03 14:43:30 -0700486 public IdBlock allocateUniqueIdBlock(long range) {
487 try {
488 AtomicValue<Long> result = null;
489 do {
490 result = distributedIdCounter.add(range);
491 } while (result == null || !result.succeeded());
492
Sho SHIMIZU9257b0c2014-08-13 15:00:10 -0700493 return new IdBlock(result.preValue(), range);
Ray Milkey269ffb92014-04-03 14:43:30 -0700494 } catch (Exception e) {
495 log.error("Error allocating ID block");
496 }
497 return null;
498 }
499
500 /**
501 * Returns a block of IDs which are unique and unused.
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700502 * The range of IDs is a fixed size and is allocated incrementally as this
503 * method is called. Since the range of IDs is managed by Zookeeper in
504 * distributed way, this method may block during Zookeeper access.
505 *
506 * @return an IdBlock containing a set of unique IDs
Ray Milkey269ffb92014-04-03 14:43:30 -0700507 */
508 @Override
509 public IdBlock allocateUniqueIdBlock() {
510 return allocateUniqueIdBlock(ID_BLOCK_SIZE);
511 }
512
513 /**
514 * Get a globally unique ID.
515 *
516 * @return a globally unique ID.
517 */
518 @Override
519 public synchronized long getNextUniqueId() {
520 //
521 // Generate the next Unique ID.
522 //
523 // TODO: For now, the higher 32 bits are random, and
524 // the lower 32 bits are sequential.
525 // The implementation must be updated to use the Zookeeper
526 // to allocate the higher 32 bits (globally unique).
527 //
528 if ((nextUniqueIdSuffix & 0xffffffffL) == 0xffffffffL) {
529 nextUniqueIdPrefix = randomGenerator.nextInt();
530 nextUniqueIdSuffix = 0;
531 } else {
532 nextUniqueIdSuffix++;
533 }
Pavlin Radoslavov952a9762014-04-10 13:47:03 -0700534 long result = nextUniqueIdPrefix << 32;
Ray Milkey269ffb92014-04-03 14:43:30 -0700535 result = result | (0xffffffffL & nextUniqueIdSuffix);
536 return result;
537 }
538
539 /*
540 * IFloodlightModule
541 */
542
543 @Override
544 public Collection<Class<? extends IFloodlightService>> getModuleServices() {
545 Collection<Class<? extends IFloodlightService>> l =
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800546 new ArrayList<Class<? extends IFloodlightService>>();
Ray Milkey269ffb92014-04-03 14:43:30 -0700547 l.add(IControllerRegistryService.class);
548 return l;
549 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700550
Ray Milkey269ffb92014-04-03 14:43:30 -0700551 @Override
552 public Map<Class<? extends IFloodlightService>, IFloodlightService> getServiceImpls() {
553 Map<Class<? extends IFloodlightService>, IFloodlightService> m =
554 new HashMap<Class<? extends IFloodlightService>, IFloodlightService>();
555 m.put(IControllerRegistryService.class, this);
556 return m;
557 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700558
Ray Milkey269ffb92014-04-03 14:43:30 -0700559 @Override
560 public Collection<Class<? extends IFloodlightService>> getModuleDependencies() {
561 Collection<Class<? extends IFloodlightService>> l =
562 new ArrayList<Class<? extends IFloodlightService>>();
563 l.add(IFloodlightProviderService.class);
564 l.add(IRestApiService.class);
565 return l;
566 }
Jonathan Hartbd181b62013-02-17 16:05:38 -0800567
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700568 // TODO currently blocks startup when it can't get a Zookeeper connection.
569 // Do we support starting up with no Zookeeper connection?
Ray Milkey269ffb92014-04-03 14:43:30 -0700570 @Override
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700571 public void init(FloodlightModuleContext context)
572 throws FloodlightModuleException {
573 // Read the Zookeeper connection string from the config
Ray Milkey269ffb92014-04-03 14:43:30 -0700574 Map<String, String> configParams = context.getConfigParams(this);
Ray Milkey5df613b2014-04-15 10:50:56 -0700575 String connectionStringParam = configParams.get("connectionString");
576 if (connectionStringParam != null) {
577 connectionString = connectionStringParam;
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -0700578 } else {
579 connectionString = System.getProperty(
580 "net.onrc.onos.core.registry.ZookeeperRegistry.connectionString",
581 DEFAULT_CONNECTION_STRING);
Ray Milkey269ffb92014-04-03 14:43:30 -0700582 }
583 log.info("Setting Zookeeper connection string to {}", this.connectionString);
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700584
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -0700585 namespace = System.getProperty(ZK_NAMESPACE_KEY, DEFAULT_NAMESPACE).trim();
586 if (namespace.isEmpty()) {
587 namespace = DEFAULT_NAMESPACE;
588 }
589 log.info("Setting Zookeeper namespace to {}", namespace);
590
Ray Milkey269ffb92014-04-03 14:43:30 -0700591 restApi = context.getServiceImpl(IRestApiService.class);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800592
Ray Milkey269ffb92014-04-03 14:43:30 -0700593 switches = new ConcurrentHashMap<String, SwitchLeadershipData>();
Ray Milkey269ffb92014-04-03 14:43:30 -0700594 switchPathCaches = new ConcurrentHashMap<String, PathChildrenCache>();
595
596 RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700597 curatorFrameworkClient =
598 CuratorFrameworkFactory.newClient(this.connectionString,
Ray Milkey5c9f2db2014-04-09 10:31:21 -0700599 SESSION_TIMEOUT, CONNECTION_TIMEOUT, retryPolicy);
Ray Milkey269ffb92014-04-03 14:43:30 -0700600
Ray Milkey5df613b2014-04-15 10:50:56 -0700601 curatorFrameworkClient.start();
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -0700602 curatorFrameworkClient = curatorFrameworkClient.usingNamespace(namespace);
Ray Milkey269ffb92014-04-03 14:43:30 -0700603
604 distributedIdCounter = new DistributedAtomicLong(
Ray Milkey5df613b2014-04-15 10:50:56 -0700605 curatorFrameworkClient,
Ray Milkey269ffb92014-04-03 14:43:30 -0700606 ID_COUNTER_PATH,
607 new RetryOneTime(100));
608
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700609 rootSwitchCache = new PathChildrenCache(
610 curatorFrameworkClient, SWITCH_LATCHES_PATH, true);
Ray Milkey5df613b2014-04-15 10:50:56 -0700611 rootSwitchCache.getListenable().addListener(switchPathCacheListener);
Ray Milkey269ffb92014-04-03 14:43:30 -0700612
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700613 // Build the service discovery object
Ray Milkey269ffb92014-04-03 14:43:30 -0700614 serviceDiscovery = ServiceDiscoveryBuilder.builder(ControllerService.class)
Ray Milkey5df613b2014-04-15 10:50:56 -0700615 .client(curatorFrameworkClient).basePath(SERVICES_PATH).build();
Ray Milkey269ffb92014-04-03 14:43:30 -0700616
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700617 // We read the list of services very frequently (GUI periodically
618 // queries them) so we'll cache them to cut down on Zookeeper queries.
Ray Milkey269ffb92014-04-03 14:43:30 -0700619 serviceCache = serviceDiscovery.serviceCacheBuilder()
620 .name(CONTROLLER_SERVICE_NAME).build();
621
Ray Milkey269ffb92014-04-03 14:43:30 -0700622 try {
623 serviceDiscovery.start();
624 serviceCache.start();
625
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700626 // Don't prime the cache, we want a notification for each child
627 // node in the path
Ray Milkey5df613b2014-04-15 10:50:56 -0700628 rootSwitchCache.start(StartMode.NORMAL);
Ray Milkey269ffb92014-04-03 14:43:30 -0700629 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700630 throw new FloodlightModuleException(
631 "Error initialising ZookeeperRegistry", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700632 }
633
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700634 ExecutorService eventThreadExecutorService =
635 Executors.newSingleThreadExecutor();
Ray Milkey269ffb92014-04-03 14:43:30 -0700636 eventThreadExecutorService.execute(
637 new Runnable() {
638 @Override
639 public void run() {
640 dispatchEvents();
641 }
642 });
643 }
644
645 @Override
646 public void startUp(FloodlightModuleContext context) {
647 //
648 // Cluster Leader election setup.
649 // NOTE: We have to do it here, because during the init stage
650 // we don't know the Controller ID.
651 //
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700652 if (onosInstanceId == null) {
653 log.error("Error on startup: unknown ONOS Instance ID");
654 return;
Ray Milkey269ffb92014-04-03 14:43:30 -0700655 }
Ray Milkey5df613b2014-04-15 10:50:56 -0700656 clusterLeaderLatch = new LeaderLatch(curatorFrameworkClient,
Ray Milkey269ffb92014-04-03 14:43:30 -0700657 CLUSTER_LEADER_PATH,
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700658 onosInstanceId.toString());
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700659 clusterLeaderListener = new ClusterLeaderListener();
Ray Milkey269ffb92014-04-03 14:43:30 -0700660 clusterLeaderLatch.addListener(clusterLeaderListener);
661 try {
662 clusterLeaderLatch.start();
663 } catch (Exception e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700664 log.error("Error starting the cluster leader election: ", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700665 }
666
667 // Keep trying until there is a cluster leader
668 do {
669 try {
670 Participant leader = clusterLeaderLatch.getLeader();
Ray Milkeyb29e6262014-04-09 16:02:14 -0700671 if (!leader.getId().isEmpty()) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700672 break;
Ray Milkeyb29e6262014-04-09 16:02:14 -0700673 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700674 Thread.sleep(CLUSTER_LEADER_ELECTION_RETRY_MS);
675 } catch (Exception e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700676 log.error("Error waiting for cluster leader election:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700677 }
678 } while (true);
679
680 restApi.addRestletRoutable(new RegistryWebRoutable());
681 }
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800682}