blob: 5bb7f00c345339d02eb4d7cada04ab4661a80504 [file] [log] [blame]
Jonathan Hartdeda0ba2014-04-03 11:14:12 -07001package net.onrc.onos.core.registry;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08002
Jonathan Hartbd181b62013-02-17 16:05:38 -08003import java.io.IOException;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08004import java.util.ArrayList;
5import java.util.Collection;
Jonathan Hart3d7730a2013-02-22 11:51:17 -08006import java.util.Collections;
Jonathan Hart599c6b32013-03-24 22:42:02 -07007import java.util.Comparator;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -08008import java.util.HashMap;
Jonathan Hartedd6a442013-02-20 15:22:06 -08009import java.util.List;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080010import java.util.Map;
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070011import java.util.Random;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070012import java.util.concurrent.BlockingQueue;
Jonathan Hart89187372013-03-14 16:41:09 -070013import java.util.concurrent.ConcurrentHashMap;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070014import java.util.concurrent.ExecutorService;
15import java.util.concurrent.Executors;
16import java.util.concurrent.LinkedBlockingQueue;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080017
Pavlin Radoslavovc35229e2014-02-06 16:19:37 -080018import net.floodlightcontroller.core.IFloodlightProviderService;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080019import net.floodlightcontroller.core.module.FloodlightModuleContext;
20import net.floodlightcontroller.core.module.FloodlightModuleException;
21import net.floodlightcontroller.core.module.IFloodlightModule;
22import net.floodlightcontroller.core.module.IFloodlightService;
Jonathan Hart3d7730a2013-02-22 11:51:17 -080023import net.floodlightcontroller.restserver.IRestApiService;
Jonathan Hartdeda0ba2014-04-03 11:14:12 -070024import net.onrc.onos.core.registry.web.RegistryWebRoutable;
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -070025import net.onrc.onos.core.util.OnosInstanceId;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080026
Jonathan Hart12a26aa2014-06-04 14:33:09 -070027import org.apache.commons.lang.NotImplementedException;
Jonathan Hart116b1fe2014-03-14 18:53:47 -070028import org.apache.curator.RetryPolicy;
29import org.apache.curator.framework.CuratorFramework;
30import org.apache.curator.framework.CuratorFrameworkFactory;
31import org.apache.curator.framework.recipes.atomic.AtomicValue;
32import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
33import org.apache.curator.framework.recipes.cache.ChildData;
34import org.apache.curator.framework.recipes.cache.PathChildrenCache;
35import org.apache.curator.framework.recipes.cache.PathChildrenCache.StartMode;
36import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent;
37import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
38import org.apache.curator.framework.recipes.leader.LeaderLatch;
39import org.apache.curator.framework.recipes.leader.LeaderLatchListener;
40import org.apache.curator.framework.recipes.leader.Participant;
41import org.apache.curator.retry.ExponentialBackoffRetry;
42import org.apache.curator.retry.RetryOneTime;
43import org.apache.curator.x.discovery.ServiceCache;
44import org.apache.curator.x.discovery.ServiceDiscovery;
45import org.apache.curator.x.discovery.ServiceDiscoveryBuilder;
46import org.apache.curator.x.discovery.ServiceInstance;
Jonathan Hartc78b8f62014-08-07 22:31:09 -070047import org.projectfloodlight.openflow.util.HexString;
Jonathan Hartbd181b62013-02-17 16:05:38 -080048import org.slf4j.Logger;
49import org.slf4j.LoggerFactory;
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -080050
Jonathan Hartd10008d2013-02-23 17:04:08 -080051import com.google.common.base.Charsets;
Jonathan Hartbd181b62013-02-17 16:05:38 -080052
Jonathan Hart7bf62172013-02-28 13:17:18 -080053/**
54 * A registry service that uses Zookeeper. All data is stored in Zookeeper,
55 * so this can be used as a global registry in a multi-node ONOS cluster.
Jonathan Hart7bf62172013-02-28 13:17:18 -080056 */
Jonathan Hart1dbcce62014-06-04 15:21:45 -070057public class ZookeeperRegistry implements IFloodlightModule,
58 IControllerRegistryService {
Jonathan Hartc6eee9e2013-02-18 14:58:27 -080059
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -070060 private static final String DEFAULT_CONNECTION_STRING = "localhost:2181";
61
Ray Milkeyec838942014-04-09 11:28:43 -070062 private static final Logger log = LoggerFactory.getLogger(ZookeeperRegistry.class);
Jonathan Hart71c0ffc2013-03-24 15:58:42 -070063
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -070064 private OnosInstanceId onosInstanceId;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070065
66 private IRestApiService restApi;
Jonathan Hartbd181b62013-02-17 16:05:38 -080067
Jonathan Hart1dbcce62014-06-04 15:21:45 -070068 // This is the default. It is overwritten by the connectionString
69 // configuration parameter
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -070070 private String connectionString = DEFAULT_CONNECTION_STRING;
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080071
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -070072 /**
73 * JVM Option to specify ZooKeeper namespace.
74 */
75 public static final String ZK_NAMESPACE_KEY = "zookeeper.namespace";
76 private static final String DEFAULT_NAMESPACE = "onos";
77 private String namespace = DEFAULT_NAMESPACE;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070078 private static final String SWITCH_LATCHES_PATH = "/switches";
Ray Milkey2476cac2014-04-08 11:03:21 -070079 private static final String CLUSTER_LEADER_PATH = "/cluster/leader";
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -080080
Jonathan Hart1dbcce62014-06-04 15:21:45 -070081 private static final String SERVICES_PATH = "/"; // i.e. the root of our namespace
Ray Milkey2476cac2014-04-08 11:03:21 -070082 private static final String CONTROLLER_SERVICE_NAME = "controllers";
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070083
Jonathan Hart12a26aa2014-06-04 14:33:09 -070084 private CuratorFramework curatorFrameworkClient;
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -070085
Jonathan Hart12a26aa2014-06-04 14:33:09 -070086 private PathChildrenCache rootSwitchCache;
Ray Milkey269ffb92014-04-03 14:43:30 -070087
Jonathan Hart12a26aa2014-06-04 14:33:09 -070088 private ConcurrentHashMap<String, SwitchLeadershipData> switches;
89 private Map<String, PathChildrenCache> switchPathCaches;
Ray Milkey269ffb92014-04-03 14:43:30 -070090
Jonathan Hart12a26aa2014-06-04 14:33:09 -070091 private LeaderLatch clusterLeaderLatch;
92 private ClusterLeaderListener clusterLeaderListener;
Ray Milkey269ffb92014-04-03 14:43:30 -070093 private static final long CLUSTER_LEADER_ELECTION_RETRY_MS = 100;
94
Ray Milkey2476cac2014-04-08 11:03:21 -070095 private static final String ID_COUNTER_PATH = "/flowidcounter";
96 private static final Long ID_BLOCK_SIZE = 0x100000000L;
Jonathan Hart12a26aa2014-06-04 14:33:09 -070097 private DistributedAtomicLong distributedIdCounter;
Ray Milkey269ffb92014-04-03 14:43:30 -070098
99 //Zookeeper performance-related configuration
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700100 private static final int SESSION_TIMEOUT = 7000; // ms
101 private static final int CONNECTION_TIMEOUT = 5000; // ms
Ray Milkey269ffb92014-04-03 14:43:30 -0700102
103 //
104 // Unique ID generation state
105 // TODO: The implementation must be updated to use the Zookeeper
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700106 // instead of a random generator.
Ray Milkey269ffb92014-04-03 14:43:30 -0700107 //
108 private static Random randomGenerator = new Random();
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700109 private static long nextUniqueIdPrefix;
Pavlin Radoslavov952a9762014-04-10 13:47:03 -0700110 // NOTE: The 0xffffffffL value is used by the Unique ID generator for
111 // initialization purpose.
112 private static long nextUniqueIdSuffix = 0xffffffffL;
Ray Milkey269ffb92014-04-03 14:43:30 -0700113
114 private final BlockingQueue<SwitchLeaderEvent> switchLeadershipEvents =
115 new LinkedBlockingQueue<SwitchLeaderEvent>();
116
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700117 /**
118 * Listens for changes to the switch znodes in Zookeeper. This maintains
119 * the second level of PathChildrenCaches that hold the controllers
120 * contending for each switch - there's one for each switch.
121 */
122 private PathChildrenCacheListener switchPathCacheListener =
123 new SwitchPathCacheListener();
124 private ServiceDiscovery<ControllerService> serviceDiscovery;
125 private ServiceCache<ControllerService> serviceCache;
126
Ray Milkey269ffb92014-04-03 14:43:30 -0700127
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700128 private static class SwitchLeaderEvent {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700129 private final long dpid;
130 private final boolean isLeader;
Ray Milkey269ffb92014-04-03 14:43:30 -0700131
132 public SwitchLeaderEvent(long dpid, boolean isLeader) {
133 this.dpid = dpid;
134 this.isLeader = isLeader;
135 }
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700136
137 public long getDpid() {
138 return dpid;
139 }
140
141 public boolean isLeader() {
142 return isLeader;
143 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700144 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700145
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700146 // Dispatcher thread for leadership change events coming from Curator
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700147 private void dispatchEvents() {
Ray Milkey269ffb92014-04-03 14:43:30 -0700148 while (!Thread.currentThread().isInterrupted()) {
149 try {
150 SwitchLeaderEvent event = switchLeadershipEvents.take();
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700151 SwitchLeadershipData swData =
152 switches.get(HexString.toHexString(event.getDpid()));
Ray Milkey269ffb92014-04-03 14:43:30 -0700153 if (swData == null) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700154 log.debug("Leadership data {} not found", event.getDpid());
Ray Milkey269ffb92014-04-03 14:43:30 -0700155 continue;
156 }
157
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700158 swData.getCallback().controlChanged(event.getDpid(), event.isLeader());
Ray Milkey269ffb92014-04-03 14:43:30 -0700159 } catch (InterruptedException e) {
160 Thread.currentThread().interrupt();
161 break;
162 } catch (Exception e) {
163 log.error("Exception in registry event thread", e);
164 }
165 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700166 }
Jonathan Hartbd181b62013-02-17 16:05:38 -0800167
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700168 class SwitchLeaderListener implements LeaderLatchListener {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700169 private final String dpid;
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700170
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700171 public SwitchLeaderListener(String dpid) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700172 this.dpid = dpid;
Ray Milkey269ffb92014-04-03 14:43:30 -0700173 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700174
Ray Milkey269ffb92014-04-03 14:43:30 -0700175 @Override
176 public void isLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700177 log.info("Became leader for {}", dpid);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800178
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700179 switchLeadershipEvents.add(
180 new SwitchLeaderEvent(HexString.toLong(dpid), true));
Ray Milkey269ffb92014-04-03 14:43:30 -0700181 }
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800182
Ray Milkey269ffb92014-04-03 14:43:30 -0700183 @Override
184 public void notLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700185 log.info("Lost leadership for {}", dpid);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800186
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700187 switchLeadershipEvents.add(
188 new SwitchLeaderEvent(HexString.toLong(dpid), false));
Ray Milkey269ffb92014-04-03 14:43:30 -0700189 }
190 }
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700191
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700192 class SwitchPathCacheListener implements PathChildrenCacheListener {
Ray Milkey269ffb92014-04-03 14:43:30 -0700193 @Override
194 public void childEvent(CuratorFramework client,
195 PathChildrenCacheEvent event) throws Exception {
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800196
Ray Milkey269ffb92014-04-03 14:43:30 -0700197 String strSwitch = null;
198 if (event.getData() != null) {
199 String[] splitted = event.getData().getPath().split("/");
200 strSwitch = splitted[splitted.length - 1];
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800201 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700202
203 switch (event.getType()) {
204 case CHILD_ADDED:
205 case CHILD_UPDATED:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700206 // Check we have a PathChildrenCache for this child
207 // and add one if not
Ray Milkey269ffb92014-04-03 14:43:30 -0700208 synchronized (switchPathCaches) {
209 if (switchPathCaches.get(strSwitch) == null) {
210 PathChildrenCache pc = new PathChildrenCache(client,
211 event.getData().getPath(), true);
212 pc.start(StartMode.NORMAL);
213 switchPathCaches.put(strSwitch, pc);
214 }
215 }
216 break;
217 case CHILD_REMOVED:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700218 // Remove our PathChildrenCache for this child
Ray Milkey269ffb92014-04-03 14:43:30 -0700219 PathChildrenCache pc = null;
220 synchronized (switchPathCaches) {
221 pc = switchPathCaches.remove(strSwitch);
222 }
223 if (pc != null) {
224 pc.close();
225 }
226 break;
227 default:
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700228 // All other switchLeadershipEvents are connection status
229 // switchLeadershipEvents. We don't need to do anything as
230 // the path cache handles these on its own.
Ray Milkey269ffb92014-04-03 14:43:30 -0700231 break;
232 }
233
234 }
235 }
236
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700237 private static class ClusterLeaderListener implements LeaderLatchListener {
Ray Milkey269ffb92014-04-03 14:43:30 -0700238 //
239 // NOTE: If we need to support callbacks when the
240 // leadership changes, those should be called here.
241 //
242
243 @Override
244 public void isLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700245 log.info("Cluster leadership aquired");
Ray Milkey269ffb92014-04-03 14:43:30 -0700246 }
247
248 @Override
249 public void notLeader() {
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700250 log.info("Cluster leadership lost");
Ray Milkey269ffb92014-04-03 14:43:30 -0700251 }
252 }
253
Ray Milkey269ffb92014-04-03 14:43:30 -0700254 @Override
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700255 public void requestControl(long dpid, ControlChangeCallback cb)
256 throws RegistryException {
Ray Milkey269ffb92014-04-03 14:43:30 -0700257 log.info("Requesting control for {}", HexString.toHexString(dpid));
258
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700259 if (onosInstanceId == null) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700260 throw new IllegalStateException("Must register a controller before"
261 + " calling requestControl");
Ray Milkey269ffb92014-04-03 14:43:30 -0700262 }
263
264 String dpidStr = HexString.toHexString(dpid);
Ray Milkey269ffb92014-04-03 14:43:30 -0700265
266 if (switches.get(dpidStr) != null) {
267 log.debug("Already contesting {}, returning", HexString.toHexString(dpid));
268 throw new RegistryException("Already contesting control for " + dpidStr);
269 }
270
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700271 String latchPath = SWITCH_LATCHES_PATH + "/" + dpidStr;
272
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700273 LeaderLatch latch =
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700274 new LeaderLatch(curatorFrameworkClient, latchPath,
275 onosInstanceId.toString());
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700276 SwitchLeaderListener listener = new SwitchLeaderListener(dpidStr);
Ray Milkey269ffb92014-04-03 14:43:30 -0700277 latch.addListener(listener);
278
279
280 SwitchLeadershipData swData = new SwitchLeadershipData(latch, cb, listener);
281 SwitchLeadershipData oldData = switches.putIfAbsent(dpidStr, swData);
282
283 if (oldData != null) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700284 // There was already data for that key in the map
285 // i.e. someone else got here first so we can't succeed
Ray Milkey269ffb92014-04-03 14:43:30 -0700286 log.debug("Already requested control for {}", dpidStr);
287 throw new RegistryException("Already requested control for " + dpidStr);
288 }
289
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700290 // Now that we know we were able to add our latch to the collection,
291 // we can start the leader election in Zookeeper. However I don't know
292 // how to handle if the start fails - the latch is already in our
293 // switches list.
294 // TODO seems like there's a Curator bug when latch.start is called when
295 // there's no Zookeeper connection which causes two znodes to be put in
296 // Zookeeper at the latch path when we reconnect to Zookeeper.
Ray Milkey269ffb92014-04-03 14:43:30 -0700297 try {
298 latch.start();
299 } catch (Exception e) {
300 log.warn("Error starting leader latch: {}", e.getMessage());
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700301 throw new RegistryException("Error starting leader latch for "
302 + dpidStr, e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700303 }
304
305 }
306
307 @Override
308 public void releaseControl(long dpid) {
309 log.info("Releasing control for {}", HexString.toHexString(dpid));
310
311 String dpidStr = HexString.toHexString(dpid);
312
313 SwitchLeadershipData swData = switches.remove(dpidStr);
314
315 if (swData == null) {
316 log.debug("Trying to release control of a switch we are not contesting");
317 return;
318 }
319
320 LeaderLatch latch = swData.getLatch();
321
322 latch.removeListener(swData.getListener());
323
324 try {
325 latch.close();
326 } catch (IOException e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700327 // I think it's OK not to do anything here. Either the node got
328 // deleted correctly, or the connection went down and the node got deleted.
Ray Milkey269ffb92014-04-03 14:43:30 -0700329 log.debug("releaseControl: caught IOException {}", dpidStr);
330 }
331 }
332
333 @Override
334 public boolean hasControl(long dpid) {
335 String dpidStr = HexString.toHexString(dpid);
336
337 SwitchLeadershipData swData = switches.get(dpidStr);
338
339 if (swData == null) {
340 log.warn("No leader latch for dpid {}", dpidStr);
341 return false;
342 }
343
344 return swData.getLatch().hasLeadership();
345 }
346
347 @Override
348 public boolean isClusterLeader() {
349 return clusterLeaderLatch.hasLeadership();
350 }
351
352 @Override
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700353 public OnosInstanceId getOnosInstanceId() {
354 return onosInstanceId;
Ray Milkey269ffb92014-04-03 14:43:30 -0700355 }
356
357 @Override
358 public Collection<String> getAllControllers() throws RegistryException {
359 log.debug("Getting all controllers");
360
361 List<String> controllers = new ArrayList<String>();
362 for (ServiceInstance<ControllerService> instance : serviceCache.getInstances()) {
363 String id = instance.getPayload().getControllerId();
364 if (!controllers.contains(id)) {
365 controllers.add(id);
366 }
367 }
368
369 return controllers;
370 }
371
372 @Override
373 public void registerController(String id) throws RegistryException {
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700374 if (onosInstanceId != null) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700375 throw new RegistryException(
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700376 "Controller already registered with id " + onosInstanceId);
Ray Milkey269ffb92014-04-03 14:43:30 -0700377 }
378
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700379 onosInstanceId = new OnosInstanceId(id);
Ray Milkey269ffb92014-04-03 14:43:30 -0700380
381 try {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700382 ServiceInstance<ControllerService> thisInstance =
383 ServiceInstance.<ControllerService>builder()
Ray Milkey269ffb92014-04-03 14:43:30 -0700384 .name(CONTROLLER_SERVICE_NAME)
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700385 .payload(new ControllerService(onosInstanceId.toString()))
Ray Milkey269ffb92014-04-03 14:43:30 -0700386 .build();
387
388 serviceDiscovery.registerService(thisInstance);
389 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700390 log.error("Exception starting service instance:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700391 }
392
393 }
394
395 @Override
396 public String getControllerForSwitch(long dpid) throws RegistryException {
397 String dpidStr = HexString.toHexString(dpid);
398
399 PathChildrenCache switchCache = switchPathCaches.get(dpidStr);
400
401 if (switchCache == null) {
402 log.warn("Tried to get controller for non-existent switch");
Nick Karanatsios8abe7172014-02-19 20:31:48 -0800403 return null;
404 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700405
Ray Milkey269ffb92014-04-03 14:43:30 -0700406 try {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700407 // We've seen issues with these caches get stuck out of date, so
408 // we'll have to force them to refresh before each read. This slows
409 // down the method as it blocks on a Zookeeper query, however at
410 // the moment only the cleanup thread uses this and that isn't
411 // particularly time-sensitive.
412 // TODO verify if it is still the case that caches can be out of date
Ray Milkey269ffb92014-04-03 14:43:30 -0700413 switchCache.rebuild();
414 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700415 log.error("Exception rebuilding the switch cache:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700416 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700417
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700418 List<ChildData> sortedData =
419 new ArrayList<ChildData>(switchCache.getCurrentData());
Ray Milkey269ffb92014-04-03 14:43:30 -0700420
421 Collections.sort(
422 sortedData,
423 new Comparator<ChildData>() {
424 private String getSequenceNumber(String path) {
425 return path.substring(path.lastIndexOf('-') + 1);
426 }
427
428 @Override
429 public int compare(ChildData lhs, ChildData rhs) {
430 return getSequenceNumber(lhs.getPath()).
431 compareTo(getSequenceNumber(rhs.getPath()));
432 }
433 }
434 );
435
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700436 if (sortedData.isEmpty()) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700437 return null;
438 }
439
440 return new String(sortedData.get(0).getData(), Charsets.UTF_8);
441 }
442
443 @Override
Ray Milkey5df613b2014-04-15 10:50:56 -0700444 public Collection<Long> getSwitchesControlledByController(String controller) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700445 // TODO remove this if not needed
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700446 throw new NotImplementedException("Not yet implemented");
Ray Milkey269ffb92014-04-03 14:43:30 -0700447 }
448
449
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700450 // TODO what should happen when there's no ZK connection? Currently we just
451 // return the cache but this may lead to false impressions - i.e. we don't
452 // actually know what's in ZK so we shouldn't say we do
Ray Milkey269ffb92014-04-03 14:43:30 -0700453 @Override
454 public Map<String, List<ControllerRegistryEntry>> getAllSwitches() {
455 Map<String, List<ControllerRegistryEntry>> data =
456 new HashMap<String, List<ControllerRegistryEntry>>();
457
458 for (Map.Entry<String, PathChildrenCache> entry : switchPathCaches.entrySet()) {
459 List<ControllerRegistryEntry> contendingControllers =
460 new ArrayList<ControllerRegistryEntry>();
461
462 if (entry.getValue().getCurrentData().size() < 1) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700463 // TODO prevent even having the PathChildrenCache in this case
Ray Milkey269ffb92014-04-03 14:43:30 -0700464 continue;
465 }
466
467 for (ChildData d : entry.getValue().getCurrentData()) {
468
Ray Milkey5df613b2014-04-15 10:50:56 -0700469 String childsControllerId = new String(d.getData(), Charsets.UTF_8);
Ray Milkey269ffb92014-04-03 14:43:30 -0700470
471 String[] splitted = d.getPath().split("-");
472 int sequenceNumber = Integer.parseInt(splitted[splitted.length - 1]);
473
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700474 contendingControllers.add(new ControllerRegistryEntry(
475 childsControllerId, sequenceNumber));
Ray Milkey269ffb92014-04-03 14:43:30 -0700476 }
477
478 Collections.sort(contendingControllers);
479 data.put(entry.getKey(), contendingControllers);
480 }
481 return data;
482 }
483
Yuta HIGUCHI5bbbaca2014-06-09 16:39:08 -0700484 @Override
Ray Milkey269ffb92014-04-03 14:43:30 -0700485 public IdBlock allocateUniqueIdBlock(long range) {
486 try {
487 AtomicValue<Long> result = null;
488 do {
489 result = distributedIdCounter.add(range);
490 } while (result == null || !result.succeeded());
491
492 return new IdBlock(result.preValue(), result.postValue() - 1, range);
493 } catch (Exception e) {
494 log.error("Error allocating ID block");
495 }
496 return null;
497 }
498
499 /**
500 * Returns a block of IDs which are unique and unused.
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700501 * The range of IDs is a fixed size and is allocated incrementally as this
502 * method is called. Since the range of IDs is managed by Zookeeper in
503 * distributed way, this method may block during Zookeeper access.
504 *
505 * @return an IdBlock containing a set of unique IDs
Ray Milkey269ffb92014-04-03 14:43:30 -0700506 */
507 @Override
508 public IdBlock allocateUniqueIdBlock() {
509 return allocateUniqueIdBlock(ID_BLOCK_SIZE);
510 }
511
512 /**
513 * Get a globally unique ID.
514 *
515 * @return a globally unique ID.
516 */
517 @Override
518 public synchronized long getNextUniqueId() {
519 //
520 // Generate the next Unique ID.
521 //
522 // TODO: For now, the higher 32 bits are random, and
523 // the lower 32 bits are sequential.
524 // The implementation must be updated to use the Zookeeper
525 // to allocate the higher 32 bits (globally unique).
526 //
527 if ((nextUniqueIdSuffix & 0xffffffffL) == 0xffffffffL) {
528 nextUniqueIdPrefix = randomGenerator.nextInt();
529 nextUniqueIdSuffix = 0;
530 } else {
531 nextUniqueIdSuffix++;
532 }
Pavlin Radoslavov952a9762014-04-10 13:47:03 -0700533 long result = nextUniqueIdPrefix << 32;
Ray Milkey269ffb92014-04-03 14:43:30 -0700534 result = result | (0xffffffffL & nextUniqueIdSuffix);
535 return result;
536 }
537
538 /*
539 * IFloodlightModule
540 */
541
542 @Override
543 public Collection<Class<? extends IFloodlightService>> getModuleServices() {
544 Collection<Class<? extends IFloodlightService>> l =
Jonathan Hart3d7730a2013-02-22 11:51:17 -0800545 new ArrayList<Class<? extends IFloodlightService>>();
Ray Milkey269ffb92014-04-03 14:43:30 -0700546 l.add(IControllerRegistryService.class);
547 return l;
548 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700549
Ray Milkey269ffb92014-04-03 14:43:30 -0700550 @Override
551 public Map<Class<? extends IFloodlightService>, IFloodlightService> getServiceImpls() {
552 Map<Class<? extends IFloodlightService>, IFloodlightService> m =
553 new HashMap<Class<? extends IFloodlightService>, IFloodlightService>();
554 m.put(IControllerRegistryService.class, this);
555 return m;
556 }
Pavlin Radoslavov52163ed2014-03-19 11:39:34 -0700557
Ray Milkey269ffb92014-04-03 14:43:30 -0700558 @Override
559 public Collection<Class<? extends IFloodlightService>> getModuleDependencies() {
560 Collection<Class<? extends IFloodlightService>> l =
561 new ArrayList<Class<? extends IFloodlightService>>();
562 l.add(IFloodlightProviderService.class);
563 l.add(IRestApiService.class);
564 return l;
565 }
Jonathan Hartbd181b62013-02-17 16:05:38 -0800566
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700567 // TODO currently blocks startup when it can't get a Zookeeper connection.
568 // Do we support starting up with no Zookeeper connection?
Ray Milkey269ffb92014-04-03 14:43:30 -0700569 @Override
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700570 public void init(FloodlightModuleContext context)
571 throws FloodlightModuleException {
572 // Read the Zookeeper connection string from the config
Ray Milkey269ffb92014-04-03 14:43:30 -0700573 Map<String, String> configParams = context.getConfigParams(this);
Ray Milkey5df613b2014-04-15 10:50:56 -0700574 String connectionStringParam = configParams.get("connectionString");
575 if (connectionStringParam != null) {
576 connectionString = connectionStringParam;
Yuta HIGUCHI5f1ce1c2014-07-20 22:43:54 -0700577 } else {
578 connectionString = System.getProperty(
579 "net.onrc.onos.core.registry.ZookeeperRegistry.connectionString",
580 DEFAULT_CONNECTION_STRING);
Ray Milkey269ffb92014-04-03 14:43:30 -0700581 }
582 log.info("Setting Zookeeper connection string to {}", this.connectionString);
Jonathan Hart116b1fe2014-03-14 18:53:47 -0700583
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -0700584 namespace = System.getProperty(ZK_NAMESPACE_KEY, DEFAULT_NAMESPACE).trim();
585 if (namespace.isEmpty()) {
586 namespace = DEFAULT_NAMESPACE;
587 }
588 log.info("Setting Zookeeper namespace to {}", namespace);
589
Ray Milkey269ffb92014-04-03 14:43:30 -0700590 restApi = context.getServiceImpl(IRestApiService.class);
Pavlin Radoslavovf1377ce2014-02-05 17:37:24 -0800591
Ray Milkey269ffb92014-04-03 14:43:30 -0700592 switches = new ConcurrentHashMap<String, SwitchLeadershipData>();
Ray Milkey269ffb92014-04-03 14:43:30 -0700593 switchPathCaches = new ConcurrentHashMap<String, PathChildrenCache>();
594
595 RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700596 curatorFrameworkClient =
597 CuratorFrameworkFactory.newClient(this.connectionString,
Ray Milkey5c9f2db2014-04-09 10:31:21 -0700598 SESSION_TIMEOUT, CONNECTION_TIMEOUT, retryPolicy);
Ray Milkey269ffb92014-04-03 14:43:30 -0700599
Ray Milkey5df613b2014-04-15 10:50:56 -0700600 curatorFrameworkClient.start();
Yuta HIGUCHI85de40d2014-06-12 14:06:41 -0700601 curatorFrameworkClient = curatorFrameworkClient.usingNamespace(namespace);
Ray Milkey269ffb92014-04-03 14:43:30 -0700602
603 distributedIdCounter = new DistributedAtomicLong(
Ray Milkey5df613b2014-04-15 10:50:56 -0700604 curatorFrameworkClient,
Ray Milkey269ffb92014-04-03 14:43:30 -0700605 ID_COUNTER_PATH,
606 new RetryOneTime(100));
607
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700608 rootSwitchCache = new PathChildrenCache(
609 curatorFrameworkClient, SWITCH_LATCHES_PATH, true);
Ray Milkey5df613b2014-04-15 10:50:56 -0700610 rootSwitchCache.getListenable().addListener(switchPathCacheListener);
Ray Milkey269ffb92014-04-03 14:43:30 -0700611
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700612 // Build the service discovery object
Ray Milkey269ffb92014-04-03 14:43:30 -0700613 serviceDiscovery = ServiceDiscoveryBuilder.builder(ControllerService.class)
Ray Milkey5df613b2014-04-15 10:50:56 -0700614 .client(curatorFrameworkClient).basePath(SERVICES_PATH).build();
Ray Milkey269ffb92014-04-03 14:43:30 -0700615
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700616 // We read the list of services very frequently (GUI periodically
617 // queries them) so we'll cache them to cut down on Zookeeper queries.
Ray Milkey269ffb92014-04-03 14:43:30 -0700618 serviceCache = serviceDiscovery.serviceCacheBuilder()
619 .name(CONTROLLER_SERVICE_NAME).build();
620
Ray Milkey269ffb92014-04-03 14:43:30 -0700621 try {
622 serviceDiscovery.start();
623 serviceCache.start();
624
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700625 // Don't prime the cache, we want a notification for each child
626 // node in the path
Ray Milkey5df613b2014-04-15 10:50:56 -0700627 rootSwitchCache.start(StartMode.NORMAL);
Ray Milkey269ffb92014-04-03 14:43:30 -0700628 } catch (Exception e) {
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700629 throw new FloodlightModuleException(
630 "Error initialising ZookeeperRegistry", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700631 }
632
Jonathan Hart12a26aa2014-06-04 14:33:09 -0700633 ExecutorService eventThreadExecutorService =
634 Executors.newSingleThreadExecutor();
Ray Milkey269ffb92014-04-03 14:43:30 -0700635 eventThreadExecutorService.execute(
636 new Runnable() {
637 @Override
638 public void run() {
639 dispatchEvents();
640 }
641 });
642 }
643
644 @Override
645 public void startUp(FloodlightModuleContext context) {
646 //
647 // Cluster Leader election setup.
648 // NOTE: We have to do it here, because during the init stage
649 // we don't know the Controller ID.
650 //
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700651 if (onosInstanceId == null) {
652 log.error("Error on startup: unknown ONOS Instance ID");
653 return;
Ray Milkey269ffb92014-04-03 14:43:30 -0700654 }
Ray Milkey5df613b2014-04-15 10:50:56 -0700655 clusterLeaderLatch = new LeaderLatch(curatorFrameworkClient,
Ray Milkey269ffb92014-04-03 14:43:30 -0700656 CLUSTER_LEADER_PATH,
Pavlin Radoslavov53b208a2014-07-28 13:16:11 -0700657 onosInstanceId.toString());
Pavlin Radoslavov0294e052014-04-10 13:36:45 -0700658 clusterLeaderListener = new ClusterLeaderListener();
Ray Milkey269ffb92014-04-03 14:43:30 -0700659 clusterLeaderLatch.addListener(clusterLeaderListener);
660 try {
661 clusterLeaderLatch.start();
662 } catch (Exception e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700663 log.error("Error starting the cluster leader election: ", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700664 }
665
666 // Keep trying until there is a cluster leader
667 do {
668 try {
669 Participant leader = clusterLeaderLatch.getLeader();
Ray Milkeyb29e6262014-04-09 16:02:14 -0700670 if (!leader.getId().isEmpty()) {
Ray Milkey269ffb92014-04-03 14:43:30 -0700671 break;
Ray Milkeyb29e6262014-04-09 16:02:14 -0700672 }
Ray Milkey269ffb92014-04-03 14:43:30 -0700673 Thread.sleep(CLUSTER_LEADER_ELECTION_RETRY_MS);
674 } catch (Exception e) {
Jonathan Hart1dbcce62014-06-04 15:21:45 -0700675 log.error("Error waiting for cluster leader election:", e);
Ray Milkey269ffb92014-04-03 14:43:30 -0700676 }
677 } while (true);
678
679 restApi.addRestletRoutable(new RegistryWebRoutable());
680 }
Umesh Krishnaswamyb56bb292013-02-12 20:28:27 -0800681}