Bug fix for controller registry to ensure the controller node is put back after Zookeeper connection loss
diff --git a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
index fe758cd..494f20d 100644
--- a/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
+++ b/src/main/java/net/onrc/onos/registry/controller/ZookeeperRegistry.java
@@ -17,6 +17,9 @@
 import net.floodlightcontroller.restserver.IRestApiService;
 
 import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.data.Stat;
 import org.openflow.util.HexString;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -35,6 +38,7 @@
 import com.netflix.curator.framework.recipes.leader.LeaderLatchListener;
 import com.netflix.curator.framework.recipes.leader.Participant;
 import com.netflix.curator.retry.ExponentialBackoffRetry;
+import com.netflix.curator.utils.ZKPaths;
 
 /**
  * A registry service that uses Zookeeper. All data is stored in Zookeeper,
@@ -65,8 +69,8 @@
 	protected Map<String, PathChildrenCache> switchPathCaches;
 	
 	//Zookeeper performance-related configuration
-	protected static final int sessionTimeout = 2000;
-	protected static final int connectionTimeout = 4000;
+	protected static final int sessionTimeout = 5000;
+	protected static final int connectionTimeout = 7000;
 	
 
 	protected class SwitchLeaderListener implements LeaderLatchListener{
@@ -261,15 +265,48 @@
 		controllerId = id;
 		
 		byte bytes[] = id.getBytes(Charsets.UTF_8);
-		
-		String path = controllerPath + "/" + id;
+		String path = ZKPaths.makePath(controllerPath, controllerId);
 		
 		log.info("Registering controller with id {}", id);
 		
-		//Create ephemeral node in controller registry
 		try {
-			client.create().withProtection().withMode(CreateMode.EPHEMERAL)
+			//We need to set a watch to recreate the node in the controller
+			//registry if it gets deleted - e.g. on Zookeeper connection loss.
+			Watcher watcher = new Watcher(){
+				@Override
+				public void process(WatchedEvent event) {
+					log.debug("got any watch event {} ", event);
+					
+					String path = ZKPaths.makePath(controllerPath, controllerId);
+					byte bytes[] = controllerId.getBytes(Charsets.UTF_8);
+					
+					try {
+						if (event.getType() == Event.EventType.NodeDeleted){
+							log.debug("got a node deleted event");
+							
+							
+							client.create().withMode(CreateMode.EPHEMERAL)
+								.forPath(path, bytes);
+						}
+					} catch (Exception e) {
+						log.warn("Error recreating controller node for {}: {}",
+								controllerId, e.getMessage());
+					} finally {
+						try {
+							client.checkExists().usingWatcher(this).forPath(path);
+						} catch (Exception e2){
+							log.warn("Error resetting watch for {}: {}", 
+									controllerId, e2.getMessage());
+						}
+					}
+				}
+			};
+			
+			//Create ephemeral node in controller registry
+			//TODO Use protection
+			client.create().withMode(CreateMode.EPHEMERAL)
 					.forPath(path, bytes);
+			client.checkExists().usingWatcher(watcher).forPath(path);
 		} catch (Exception e) {
 			throw new RegistryException("Error contacting the Zookeeper service", e);
 		}
@@ -281,14 +318,16 @@
 		
 		String dpidStr = HexString.toHexString(dpid);
 
+		SwitchLeadershipData swData = switches.get(dpidStr);
+		//LeaderLatch latch = (switches.get(dpidStr) != null)?switches.get(dpidStr).getLatch():null;
 		
-		LeaderLatch latch = (switches.get(dpidStr) != null)?switches.get(dpidStr).getLatch():null;
-		
-		if (latch == null){
+		if (swData == null){
 			log.warn("Tried to get controller for non-existent switch");
 			return null;
 		}
 		
+		LeaderLatch latch = swData.getLatch();
+		
 		Participant leader = null;
 		try {
 			leader = latch.getLeader();