blob: 8a2af1323a720bca6291f64400d4e91090bf52b6 [file] [log] [blame]
Jonathan Hartd82f20d2013-02-21 18:04:24 -08001package net.onrc.onos.registry.controller;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08002
Jonathan Hartbd181b62013-02-17 16:05:38 -08003import java.io.IOException;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08004import java.util.ArrayList;
5import java.util.Collection;
Jonathan Hart3d7730a2013-02-22 11:51:17 -08006import java.util.Collections;
Jonathan Hart599c6b32013-03-24 22:42:02 -07007import java.util.Comparator;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08008import java.util.HashMap;
Jonathan Hartedd6a442013-02-20 15:22:06 -08009import java.util.List;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080010import java.util.Map;
Jonathan Hart89187372013-03-14 16:41:09 -070011import java.util.concurrent.ConcurrentHashMap;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080012
Pavlin Radoslavovc35229e2014-02-06 16:19:37 -080013import net.floodlightcontroller.core.IFloodlightProviderService;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080014import net.floodlightcontroller.core.module.FloodlightModuleContext;
15import net.floodlightcontroller.core.module.FloodlightModuleException;
16import net.floodlightcontroller.core.module.IFloodlightModule;
17import net.floodlightcontroller.core.module.IFloodlightService;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080018import net.floodlightcontroller.restserver.IRestApiService;
Naoki Shiotab32edf52013-12-12 14:09:36 -080019import net.onrc.onos.registry.controller.web.RegistryWebRoutable;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080020
Jonathan Hartbd181b62013-02-17 16:05:38 -080021import org.openflow.util.HexString;
22import org.slf4j.Logger;
23import org.slf4j.LoggerFactory;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080024
Jonathan Hartd10008d2013-02-23 17:04:08 -080025import com.google.common.base.Charsets;
Jonathan Hartbd181b62013-02-17 16:05:38 -080026import com.netflix.curator.RetryPolicy;
27import com.netflix.curator.framework.CuratorFramework;
28import com.netflix.curator.framework.CuratorFrameworkFactory;
Jonathan Hart1530ccc2013-04-03 19:36:02 -070029import com.netflix.curator.framework.recipes.atomic.AtomicValue;
30import com.netflix.curator.framework.recipes.atomic.DistributedAtomicLong;
Jonathan Hartedd6a442013-02-20 15:22:06 -080031import com.netflix.curator.framework.recipes.cache.ChildData;
32import com.netflix.curator.framework.recipes.cache.PathChildrenCache;
33import com.netflix.curator.framework.recipes.cache.PathChildrenCache.StartMode;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080034import com.netflix.curator.framework.recipes.cache.PathChildrenCacheEvent;
35import com.netflix.curator.framework.recipes.cache.PathChildrenCacheListener;
Jonathan Hartbd181b62013-02-17 16:05:38 -080036import com.netflix.curator.framework.recipes.leader.LeaderLatch;
Jonathan Hart0de09492013-03-13 14:37:21 -070037import com.netflix.curator.framework.recipes.leader.LeaderLatchEvent;
38import com.netflix.curator.framework.recipes.leader.LeaderLatchListener;
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080039import com.netflix.curator.framework.recipes.leader.Participant;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080040import com.netflix.curator.retry.ExponentialBackoffRetry;
Jonathan Hart1530ccc2013-04-03 19:36:02 -070041import com.netflix.curator.retry.RetryOneTime;
Jonathan Hart71c0ffc2013-03-24 15:58:42 -070042import com.netflix.curator.x.discovery.ServiceCache;
43import com.netflix.curator.x.discovery.ServiceDiscovery;
44import com.netflix.curator.x.discovery.ServiceDiscoveryBuilder;
45import com.netflix.curator.x.discovery.ServiceInstance;
Nick Karanatsios8abe7172014-02-19 20:31:48 -080046import java.util.concurrent.ArrayBlockingQueue;
47import java.util.concurrent.BlockingQueue;
Jonathan Hartbd181b62013-02-17 16:05:38 -080048
Jonathan Hart7bf62172013-02-28 13:17:18 -080049/**
50 * A registry service that uses Zookeeper. All data is stored in Zookeeper,
51 * so this can be used as a global registry in a multi-node ONOS cluster.
52 * @author jono
53 *
54 */
Jonathan Hartbd766972013-02-22 15:13:03 -080055public class ZookeeperRegistry implements IFloodlightModule, IControllerRegistryService {
Jonathan Hartc6eee9e2013-02-18 14:58:27 -080056
Yuta HIGUCHI6ac8d182013-10-22 15:24:56 -070057 protected final static Logger log = LoggerFactory.getLogger(ZookeeperRegistry.class);
Jonathan Hartbd766972013-02-22 15:13:03 -080058 protected String controllerId = null;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080059
Jonathan Hart3d7730a2013-02-22 11:51:17 -080060 protected IRestApiService restApi;
61
Jonathan Hart7bf62172013-02-28 13:17:18 -080062 //This is the default, it's overwritten by the connectionString configuration parameter
Jonathan Hartbd181b62013-02-17 16:05:38 -080063 protected String connectionString = "localhost:2181";
Jonathan Hart3d7730a2013-02-22 11:51:17 -080064
Jonathan Hartbd181b62013-02-17 16:05:38 -080065 private final String namespace = "onos";
Jonathan Hartedd6a442013-02-20 15:22:06 -080066 private final String switchLatchesPath = "/switches";
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080067 private final String CLUSTER_LEADER_PATH = "/cluster/leader";
Jonathan Hart71c0ffc2013-03-24 15:58:42 -070068
69 private final String SERVICES_PATH = "/"; //i.e. the root of our namespace
70 private final String CONTROLLER_SERVICE_NAME = "controllers";
Jonathan Hartbd181b62013-02-17 16:05:38 -080071
72 protected CuratorFramework client;
Jonathan Hartedd6a442013-02-20 15:22:06 -080073
Jonathan Hart3d7730a2013-02-22 11:51:17 -080074 protected PathChildrenCache switchCache;
Jonathan Hartbd181b62013-02-17 16:05:38 -080075
Jonathan Hart89187372013-03-14 16:41:09 -070076 protected ConcurrentHashMap<String, SwitchLeadershipData> switches;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080077 protected Map<String, PathChildrenCache> switchPathCaches;
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080078
79 protected LeaderLatch clusterLeaderLatch;
80 protected ClusterLeaderListener clusterLeaderListener;
81 private static final long CLUSTER_LEADER_ELECTION_RETRY_MS = 100;
82
Jonathan Hart1530ccc2013-04-03 19:36:02 -070083 private final String ID_COUNTER_PATH = "/flowidcounter";
84 private final Long ID_BLOCK_SIZE = 0x100000000L;
85 protected DistributedAtomicLong distributedIdCounter;
86
Jonathan Hart97801ac2013-02-26 14:29:16 -080087 //Zookeeper performance-related configuration
Jonathan Hart0b3eee42013-03-16 18:20:04 -070088 protected static final int sessionTimeout = 5000;
89 protected static final int connectionTimeout = 7000;
Nick Karanatsios8abe7172014-02-19 20:31:48 -080090 private volatile IdBlock idBlock = null;
Jonathan Hart57080fb2013-02-21 10:55:46 -080091
Jonathan Hartbd181b62013-02-17 16:05:38 -080092
Jonathan Hart89187372013-03-14 16:41:09 -070093 protected class SwitchLeaderListener implements LeaderLatchListener{
Jonathan Hart0de09492013-03-13 14:37:21 -070094 String dpid;
95 LeaderLatch latch;
96
Jonathan Hart89187372013-03-14 16:41:09 -070097 public SwitchLeaderListener(String dpid, LeaderLatch latch){
Jonathan Hart0de09492013-03-13 14:37:21 -070098 this.dpid = dpid;
99 this.latch = latch;
100 }
101
102 @Override
103 public void leaderLatchEvent(CuratorFramework arg0,
104 LeaderLatchEvent arg1) {
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800105 log.debug("Switch leadership changed for {}, now {}",
Jonathan Hart0de09492013-03-13 14:37:21 -0700106 dpid, latch.hasLeadership());
107
Jonathan Hart89187372013-03-14 16:41:09 -0700108 //Check that the leadership request is still active - the client
109 //may have since released the request or even begun another request
110 //(this is why we use == to check the object instance is the same)
111 SwitchLeadershipData swData = switches.get(dpid);
Naoki Shiota2999e3d2014-01-03 17:22:59 -0800112 if (swData == null) {
Naoki Shiota1a5ca912014-01-03 17:02:31 -0800113 log.debug("Leadership data {} not found", dpid);
Naoki Shiota2999e3d2014-01-03 17:22:59 -0800114 return;
Naoki Shiota1a5ca912014-01-03 17:02:31 -0800115 }
116
117 if (swData.getLatch() == latch){
Jonathan Hart89187372013-03-14 16:41:09 -0700118 swData.getCallback().controlChanged(
119 HexString.toLong(dpid), latch.hasLeadership());
120 }
121 else {
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700122 log.debug("Latch for {} has changed: old latch {} - new latch {}",
123 new Object[]{dpid, latch, swData.getLatch()});
Jonathan Hart89187372013-03-14 16:41:09 -0700124 }
Jonathan Hart0de09492013-03-13 14:37:21 -0700125 }
126 }
127
Naoki Shiotad00accf2013-06-25 14:40:37 -0700128 protected class SwitchPathCacheListener implements PathChildrenCacheListener {
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800129 @Override
130 public void childEvent(CuratorFramework client,
131 PathChildrenCacheEvent event) throws Exception {
Jonathan Hartcbb4b952013-03-18 16:15:18 -0700132 //log.debug("Root switch path cache got {} event", event.getType());
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800133
134 String strSwitch = null;
135 if (event.getData() != null){
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800136 String[] splitted = event.getData().getPath().split("/");
137 strSwitch = splitted[splitted.length - 1];
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800138 }
139
140 switch (event.getType()){
141 case CHILD_ADDED:
142 case CHILD_UPDATED:
143 //Check we have a PathChildrenCache for this child, add one if not
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700144 synchronized (switchPathCaches){
145 if (switchPathCaches.get(strSwitch) == null){
146 PathChildrenCache pc = new PathChildrenCache(client,
147 event.getData().getPath(), true);
148 pc.start(StartMode.NORMAL);
149 switchPathCaches.put(strSwitch, pc);
150 }
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800151 }
152 break;
153 case CHILD_REMOVED:
154 //Remove our PathChildrenCache for this child
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700155 PathChildrenCache pc = null;
156 synchronized(switchPathCaches){
157 pc = switchPathCaches.remove(strSwitch);
158 }
159 if (pc != null){
160 pc.close();
161 }
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800162 break;
163 default:
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700164 //All other events are connection status events. We don't need to
165 //do anything as the path cache handles these on its own.
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800166 break;
167 }
168
169 }
170 };
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800171
172 protected class ClusterLeaderListener implements LeaderLatchListener {
173 LeaderLatch latch;
174
175 public ClusterLeaderListener(LeaderLatch latch) {
176 this.latch = latch;
177 }
178
179 @Override
180 public void leaderLatchEvent(CuratorFramework arg0,
181 LeaderLatchEvent arg1) {
182 log.debug("Cluster leadership changed, now {}",
183 latch.hasLeadership());
184 //
185 // NOTE: If we need to support callbacks when the
186 // leadership changes, those should be called here.
187 //
188 }
189 }
190
Naoki Shiotad00accf2013-06-25 14:40:37 -0700191 /**
192 * Listens for changes to the switch znodes in Zookeeper. This maintains
193 * the second level of PathChildrenCaches that hold the controllers
194 * contending for each switch - there's one for each switch.
195 */
196 PathChildrenCacheListener switchPathCacheListener = new SwitchPathCacheListener();
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700197 protected ServiceDiscovery<ControllerService> serviceDiscovery;
198 protected ServiceCache<ControllerService> serviceCache;
Jonathan Hartedd6a442013-02-20 15:22:06 -0800199
Jonathan Hartbd181b62013-02-17 16:05:38 -0800200
201 @Override
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800202 public void requestControl(long dpid, ControlChangeCallback cb) throws RegistryException {
Jonathan Hart7bf62172013-02-28 13:17:18 -0800203 log.info("Requesting control for {}", HexString.toHexString(dpid));
Jonathan Hartc6eee9e2013-02-18 14:58:27 -0800204
Jonathan Hartbd766972013-02-22 15:13:03 -0800205 if (controllerId == null){
206 throw new RuntimeException("Must register a controller before calling requestControl");
Jonathan Hartbd181b62013-02-17 16:05:38 -0800207 }
208
209 String dpidStr = HexString.toHexString(dpid);
210 String latchPath = switchLatchesPath + "/" + dpidStr;
211
Jonathan Hart89187372013-03-14 16:41:09 -0700212 if (switches.get(dpidStr) != null){
Jonathan Hart3c0eccd2013-03-12 22:32:50 -0700213 log.debug("Already contesting {}, returning", HexString.toHexString(dpid));
Pankaj Berdeda7187b2013-03-18 15:24:59 -0700214 throw new RegistryException("Already contesting control for " + dpidStr);
Jonathan Hartc6eee9e2013-02-18 14:58:27 -0800215 }
216
Jonathan Hartbd766972013-02-22 15:13:03 -0800217 LeaderLatch latch = new LeaderLatch(client, latchPath, controllerId);
Jonathan Hart89187372013-03-14 16:41:09 -0700218 latch.addListener(new SwitchLeaderListener(dpidStr, latch));
Jonathan Hartbd181b62013-02-17 16:05:38 -0800219
Jonathan Hart44e56fc2013-03-14 16:53:59 -0700220
Jonathan Hart89187372013-03-14 16:41:09 -0700221 SwitchLeadershipData swData = new SwitchLeadershipData(latch, cb);
222 SwitchLeadershipData oldData = switches.putIfAbsent(dpidStr, swData);
223
224 if (oldData != null){
225 //There was already data for that key in the map
226 //i.e. someone else got here first so we can't succeed
227 log.debug("Already requested control for {}", dpidStr);
228 throw new RegistryException("Already requested control for " + dpidStr);
229 }
230
231 //Now that we know we were able to add our latch to the collection,
Jonathan Hart44e56fc2013-03-14 16:53:59 -0700232 //we can start the leader election in Zookeeper. However I don't know
233 //how to handle if the start fails - the latch is already in our
234 //switches list.
235 //TODO seems like there's a Curator bug when latch.start is called when
236 //there's no Zookeeper connection which causes two znodes to be put in
237 //Zookeeper at the latch path when we reconnect to Zookeeper.
Jonathan Hartbd181b62013-02-17 16:05:38 -0800238 try {
Jonathan Hartbd181b62013-02-17 16:05:38 -0800239 latch.start();
240 } catch (Exception e) {
Jonathan Hartc6eee9e2013-02-18 14:58:27 -0800241 log.warn("Error starting leader latch: {}", e.getMessage());
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800242 throw new RegistryException("Error starting leader latch for " + dpidStr, e);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800243 }
244
245 }
246
247 @Override
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800248 public void releaseControl(long dpid) {
Jonathan Hart7bf62172013-02-28 13:17:18 -0800249 log.info("Releasing control for {}", HexString.toHexString(dpid));
Jonathan Hart57080fb2013-02-21 10:55:46 -0800250
Jonathan Hartc6eee9e2013-02-18 14:58:27 -0800251 String dpidStr = HexString.toHexString(dpid);
252
Jonathan Hart89187372013-03-14 16:41:09 -0700253 SwitchLeadershipData swData = switches.remove(dpidStr);
254
255 if (swData == null) {
Jonathan Hart7bf62172013-02-28 13:17:18 -0800256 log.debug("Trying to release control of a switch we are not contesting");
Jonathan Hartbd181b62013-02-17 16:05:38 -0800257 return;
258 }
Jonathan Hart89187372013-03-14 16:41:09 -0700259
Jonathan Hart89187372013-03-14 16:41:09 -0700260 LeaderLatch latch = swData.getLatch();
Jonathan Hartbd181b62013-02-17 16:05:38 -0800261
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700262 latch.removeAllListeners();
263
Jonathan Hartbd181b62013-02-17 16:05:38 -0800264 try {
265 latch.close();
266 } catch (IOException e) {
Jonathan Hart7bf62172013-02-28 13:17:18 -0800267 //I think it's OK not to do anything here. Either the node got
268 //deleted correctly, or the connection went down and the node got deleted.
Umesh Krishnaswamy0ef75ee2013-03-25 17:50:27 -0700269 log.debug("releaseControl: caught IOException {}", dpidStr);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800270 }
271 }
272
273 @Override
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800274 public boolean hasControl(long dpid) {
Jonathan Hart89187372013-03-14 16:41:09 -0700275 String dpidStr = HexString.toHexString(dpid);
Jonathan Hart57080fb2013-02-21 10:55:46 -0800276
Jonathan Hart89187372013-03-14 16:41:09 -0700277 SwitchLeadershipData swData = switches.get(dpidStr);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800278
Jonathan Hart89187372013-03-14 16:41:09 -0700279 if (swData == null) {
280 log.warn("No leader latch for dpid {}", dpidStr);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800281 return false;
282 }
283
Jonathan Hart89187372013-03-14 16:41:09 -0700284 return swData.getLatch().hasLeadership();
Jonathan Hartbd181b62013-02-17 16:05:38 -0800285 }
286
287 @Override
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800288 public boolean isClusterLeader() {
289 return clusterLeaderLatch.hasLeadership();
290 }
291
292 @Override
Jonathan Hart7bf62172013-02-28 13:17:18 -0800293 public String getControllerId() {
Jonathan Hartbd766972013-02-22 15:13:03 -0800294 return controllerId;
Jonathan Hartbd181b62013-02-17 16:05:38 -0800295 }
296
Jonathan Hartedd6a442013-02-20 15:22:06 -0800297 @Override
Jonathan Hart57080fb2013-02-21 10:55:46 -0800298 public Collection<String> getAllControllers() throws RegistryException {
Jonathan Hartedd6a442013-02-20 15:22:06 -0800299 log.debug("Getting all controllers");
Jonathan Hart1be46262013-02-20 16:43:51 -0800300
Jonathan Hartedd6a442013-02-20 15:22:06 -0800301 List<String> controllers = new ArrayList<String>();
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700302 for (ServiceInstance<ControllerService> instance : serviceCache.getInstances()){
303 String id = instance.getPayload().getControllerId();
304 if (!controllers.contains(id)){
305 controllers.add(id);
Jonathan Hartedd6a442013-02-20 15:22:06 -0800306 }
Jonathan Hartedd6a442013-02-20 15:22:06 -0800307 }
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700308
Jonathan Hartedd6a442013-02-20 15:22:06 -0800309 return controllers;
310 }
311
312 @Override
Jonathan Hart57080fb2013-02-21 10:55:46 -0800313 public void registerController(String id) throws RegistryException {
Jonathan Hartd10008d2013-02-23 17:04:08 -0800314 if (controllerId != null) {
315 throw new RegistryException(
316 "Controller already registered with id " + controllerId);
317 }
Jonathan Hartbd766972013-02-22 15:13:03 -0800318
319 controllerId = id;
Jonathan Hart57080fb2013-02-21 10:55:46 -0800320
Jonathan Hartedd6a442013-02-20 15:22:06 -0800321 try {
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700322 ServiceInstance<ControllerService> thisInstance = ServiceInstance.<ControllerService>builder()
323 .name(CONTROLLER_SERVICE_NAME)
324 .payload(new ControllerService(controllerId))
325 //.port((int)(65535 * Math.random())) // in a real application, you'd use a common port
326 //.uriSpec(uriSpec)
327 .build();
Jonathan Hart0b3eee42013-03-16 18:20:04 -0700328
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700329 serviceDiscovery.registerService(thisInstance);
Jonathan Hartedd6a442013-02-20 15:22:06 -0800330 } catch (Exception e) {
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700331 // TODO Auto-generated catch block
332 e.printStackTrace();
Jonathan Hartedd6a442013-02-20 15:22:06 -0800333 }
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700334
Jonathan Hartedd6a442013-02-20 15:22:06 -0800335 }
336
337 @Override
Jonathan Hart57080fb2013-02-21 10:55:46 -0800338 public String getControllerForSwitch(long dpid) throws RegistryException {
Jonathan Hart89187372013-03-14 16:41:09 -0700339 String dpidStr = HexString.toHexString(dpid);
Pankaj Berde017960a2013-03-14 20:32:26 -0700340
Jonathan Hart599c6b32013-03-24 22:42:02 -0700341 PathChildrenCache switchCache = switchPathCaches.get(dpidStr);
342
343 if (switchCache == null){
Jonathan Hartedd6a442013-02-20 15:22:06 -0800344 log.warn("Tried to get controller for non-existent switch");
345 return null;
346 }
347
Jonathan Hartf4e80842013-03-26 23:55:02 -0700348 try {
349 //We've seen issues with these caches get stuck out of date, so we'll have to
350 //force them to refresh before each read. This slows down the method as it
351 //blocks on a Zookeeper query, however at the moment only the cleanup thread
352 //uses this and that isn't particularly time-sensitive.
353 switchCache.rebuild();
354 } catch (Exception e) {
355 // TODO Auto-generated catch block
356 e.printStackTrace();
357 }
358
Jonathan Hart599c6b32013-03-24 22:42:02 -0700359 List<ChildData> sortedData = new ArrayList<ChildData>(switchCache.getCurrentData());
Jonathan Hart0b3eee42013-03-16 18:20:04 -0700360
Jonathan Hart599c6b32013-03-24 22:42:02 -0700361 Collections.sort(
362 sortedData,
363 new Comparator<ChildData>(){
364 private String getSequenceNumber(String path){
365 return path.substring(path.lastIndexOf('-') + 1);
366 }
367 @Override
368 public int compare(ChildData lhs, ChildData rhs) {
369 return getSequenceNumber(lhs.getPath()).
370 compareTo(getSequenceNumber(rhs.getPath()));
371 }
372 }
373 );
Jonathan Hartedd6a442013-02-20 15:22:06 -0800374
Jonathan Hart56b296e2013-03-25 13:30:10 -0700375 if (sortedData.size() == 0){
376 return null;
377 }
378
Jonathan Hart599c6b32013-03-24 22:42:02 -0700379 return new String(sortedData.get(0).getData(), Charsets.UTF_8);
Jonathan Hartedd6a442013-02-20 15:22:06 -0800380 }
381
382 @Override
383 public Collection<Long> getSwitchesControlledByController(String controllerId) {
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800384 //TODO remove this if not needed
Jonathan Hartbd766972013-02-22 15:13:03 -0800385 throw new RuntimeException("Not yet implemented");
Jonathan Hartedd6a442013-02-20 15:22:06 -0800386 }
Jonathan Hartbd181b62013-02-17 16:05:38 -0800387
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800388
Jonathan Hart89187372013-03-14 16:41:09 -0700389 //TODO what should happen when there's no ZK connection? Currently we just return
390 //the cache but this may lead to false impressions - i.e. we don't actually know
391 //what's in ZK so we shouldn't say we do
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800392 @Override
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800393 public Map<String, List<ControllerRegistryEntry>> getAllSwitches() {
394 Map<String, List<ControllerRegistryEntry>> data =
395 new HashMap<String, List<ControllerRegistryEntry>>();
396
397 for (Map.Entry<String, PathChildrenCache> entry : switchPathCaches.entrySet()){
398 List<ControllerRegistryEntry> contendingControllers =
399 new ArrayList<ControllerRegistryEntry>();
400
401 if (entry.getValue().getCurrentData().size() < 1){
Jonathan Hartcbb4b952013-03-18 16:15:18 -0700402 //TODO prevent even having the PathChildrenCache in this case
403 //log.info("Switch entry with no leader elections: {}", entry.getKey());
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800404 continue;
405 }
406
407 for (ChildData d : entry.getValue().getCurrentData()) {
Jonathan Hart97801ac2013-02-26 14:29:16 -0800408
Jonathan Hartd10008d2013-02-23 17:04:08 -0800409 String controllerId = new String(d.getData(), Charsets.UTF_8);
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800410
411 String[] splitted = d.getPath().split("-");
412 int sequenceNumber = Integer.parseInt(splitted[splitted.length - 1]);
413
414 contendingControllers.add(new ControllerRegistryEntry(controllerId, sequenceNumber));
415 }
416
417 Collections.sort(contendingControllers);
418 data.put(entry.getKey(), contendingControllers);
419 }
420 return data;
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800421 }
422
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800423 public IdBlock allocateUniqueIdBlock(long range) {
424 try {
425 AtomicValue<Long> result = null;
426 do {
427 result = distributedIdCounter.add(range);
428 } while (result == null || !result.succeeded());
429
430 return new IdBlock(result.preValue(), result.postValue() - 1, range);
431 } catch (Exception e) {
432 log.error("Error allocating ID block");
433 }
434 return null;
435 }
436
Naoki Shiotaa3b2dfa2013-06-27 13:52:24 -0700437 /**
438 * Returns a block of IDs which are unique and unused.
439 * Range of IDs is fixed size and is assigned incrementally as this method called.
440 * Since the range of IDs is managed by Zookeeper in distributed way, this method may block when
441 * requests come up simultaneously.
442 */
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800443 @Override
Jonathan Hart1530ccc2013-04-03 19:36:02 -0700444 public IdBlock allocateUniqueIdBlock(){
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800445 return allocateUniqueIdBlock(ID_BLOCK_SIZE);
Jonathan Hart1530ccc2013-04-03 19:36:02 -0700446 }
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800447
Jonathan Hartbd181b62013-02-17 16:05:38 -0800448 /*
449 * IFloodlightModule
450 */
451
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800452 @Override
453 public Collection<Class<? extends IFloodlightService>> getModuleServices() {
Jonathan Hartedd6a442013-02-20 15:22:06 -0800454 Collection<Class<? extends IFloodlightService>> l =
455 new ArrayList<Class<? extends IFloodlightService>>();
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800456 l.add(IControllerRegistryService.class);
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800457 return l;
458 }
459
460 @Override
461 public Map<Class<? extends IFloodlightService>, IFloodlightService> getServiceImpls() {
462 Map<Class<? extends IFloodlightService>, IFloodlightService> m =
463 new HashMap<Class<? extends IFloodlightService>, IFloodlightService>();
Jonathan Hartd82f20d2013-02-21 18:04:24 -0800464 m.put(IControllerRegistryService.class, this);
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800465 return m;
466 }
467
468 @Override
469 public Collection<Class<? extends IFloodlightService>> getModuleDependencies() {
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800470 Collection<Class<? extends IFloodlightService>> l =
471 new ArrayList<Class<? extends IFloodlightService>>();
Pavlin Radoslavovc35229e2014-02-06 16:19:37 -0800472 l.add(IFloodlightProviderService.class);
473 l.add(IRestApiService.class);
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800474 return l;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800475 }
476
Jonathan Hart89187372013-03-14 16:41:09 -0700477 //TODO currently blocks startup when it can't get a Zookeeper connection.
478 //Do we support starting up with no Zookeeper connection?
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800479 @Override
480 public void init (FloodlightModuleContext context) throws FloodlightModuleException {
Jonathan Hartbd766972013-02-22 15:13:03 -0800481 log.info("Initialising the Zookeeper Registry - Zookeeper connection required");
482
Jonathan Hart97801ac2013-02-26 14:29:16 -0800483 //Read the Zookeeper connection string from the config
484 Map<String, String> configParams = context.getConfigParams(this);
485 String connectionString = configParams.get("connectionString");
486 if (connectionString != null){
487 this.connectionString = connectionString;
Jonathan Hart57080fb2013-02-21 10:55:46 -0800488 }
Jonathan Hart97801ac2013-02-26 14:29:16 -0800489 log.info("Setting Zookeeper connection string to {}", this.connectionString);
Jonathan Hart57080fb2013-02-21 10:55:46 -0800490
Jonathan Hart97801ac2013-02-26 14:29:16 -0800491 restApi = context.getServiceImpl(IRestApiService.class);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800492
Jonathan Hart89187372013-03-14 16:41:09 -0700493 switches = new ConcurrentHashMap<String, SwitchLeadershipData>();
Jonathan Hart4baf3be2013-03-21 18:26:13 -0700494 //switchPathCaches = new HashMap<String, PathChildrenCache>();
495 switchPathCaches = new ConcurrentHashMap<String, PathChildrenCache>();
Jonathan Hartbd181b62013-02-17 16:05:38 -0800496
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800497 RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
Jonathan Hart97801ac2013-02-26 14:29:16 -0800498 client = CuratorFrameworkFactory.newClient(this.connectionString,
Jonathan Hartcc957a02013-02-26 10:39:04 -0800499 sessionTimeout, connectionTimeout, retryPolicy);
Jonathan Hartbd181b62013-02-17 16:05:38 -0800500
501 client.start();
Jonathan Hartbd181b62013-02-17 16:05:38 -0800502 client = client.usingNamespace(namespace);
Jonathan Hart97801ac2013-02-26 14:29:16 -0800503
Jonathan Hart1530ccc2013-04-03 19:36:02 -0700504 distributedIdCounter = new DistributedAtomicLong(
505 client,
506 ID_COUNTER_PATH,
507 new RetryOneTime(100));
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800508
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800509 switchCache = new PathChildrenCache(client, switchLatchesPath, true);
510 switchCache.getListenable().addListener(switchPathCacheListener);
Jonathan Hartedd6a442013-02-20 15:22:06 -0800511
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700512 //Build the service discovery object
513 serviceDiscovery = ServiceDiscoveryBuilder.builder(ControllerService.class)
514 .client(client).basePath(SERVICES_PATH).build();
515
516 //We read the list of services very frequently (GUI periodically queries them)
517 //so we'll cache them to cut down on Zookeeper queries.
518 serviceCache = serviceDiscovery.serviceCacheBuilder()
519 .name(CONTROLLER_SERVICE_NAME).build();
520
521
Jonathan Hartedd6a442013-02-20 15:22:06 -0800522 try {
Jonathan Hart71c0ffc2013-03-24 15:58:42 -0700523 serviceDiscovery.start();
524 serviceCache.start();
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800525
526 //Don't prime the cache, we want a notification for each child node in the path
527 switchCache.start(StartMode.NORMAL);
Jonathan Hartedd6a442013-02-20 15:22:06 -0800528 } catch (Exception e) {
Jonathan Hart7bf62172013-02-28 13:17:18 -0800529 throw new FloodlightModuleException("Error initialising ZookeeperRegistry: "
530 + e.getMessage());
Jonathan Hartedd6a442013-02-20 15:22:06 -0800531 }
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800532 }
533
534 @Override
535 public void startUp (FloodlightModuleContext context) {
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800536 //
537 // Cluster Leader election setup.
538 // NOTE: We have to do it here, because during the init stage
539 // we don't know the Controller ID.
540 //
541 if (controllerId == null) {
542 log.error("Error on startup: unknown ControllerId");
543 }
544 clusterLeaderLatch = new LeaderLatch(client,
545 CLUSTER_LEADER_PATH,
546 controllerId);
547 clusterLeaderListener = new ClusterLeaderListener(clusterLeaderLatch);
548 clusterLeaderLatch.addListener(clusterLeaderListener);
549 try {
550 clusterLeaderLatch.start();
551 } catch (Exception e) {
552 log.error("Error on startup starting the cluster leader election: {}", e.getMessage());
553 }
554
555 // Keep trying until there is a cluster leader
556 do {
557 try {
558 Participant leader = clusterLeaderLatch.getLeader();
559 if (! leader.getId().isEmpty())
560 break;
561 Thread.sleep(CLUSTER_LEADER_ELECTION_RETRY_MS);
562 } catch (Exception e) {
563 log.error("Error on startup waiting for cluster leader election: {}", e.getMessage());
564 }
565 } while (true);
566
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800567 restApi.addRestletRoutable(new RegistryWebRoutable());
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800568 }
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800569}