Added web-socket fail-over and fixed a defect in mastership balancing.

Change-Id: I14eeb17fdc7970140287b51915c5accc24cf855b
diff --git a/core/net/src/main/java/org/onlab/onos/cluster/impl/MastershipManager.java b/core/net/src/main/java/org/onlab/onos/cluster/impl/MastershipManager.java
index aaac493..b316986 100644
--- a/core/net/src/main/java/org/onlab/onos/cluster/impl/MastershipManager.java
+++ b/core/net/src/main/java/org/onlab/onos/cluster/impl/MastershipManager.java
@@ -17,8 +17,6 @@
 
 import com.codahale.metrics.Timer;
 import com.codahale.metrics.Timer.Context;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.Multimap;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Deactivate;
@@ -48,8 +46,11 @@
 import org.slf4j.Logger;
 
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -57,6 +58,7 @@
 import static com.google.common.collect.Lists.newArrayList;
 import static org.onlab.metrics.MetricsUtil.startTimer;
 import static org.onlab.metrics.MetricsUtil.stopTimer;
+import static org.onlab.onos.cluster.ControllerNode.State.ACTIVE;
 import static org.onlab.onos.net.MastershipRole.MASTER;
 import static org.slf4j.LoggerFactory.getLogger;
 
@@ -208,31 +210,34 @@
     @Override
     public void balanceRoles() {
         List<ControllerNode> nodes = newArrayList(clusterService.getNodes());
-        Multimap<ControllerNode, DeviceId> controllerDevices = HashMultimap.create();
+        Map<ControllerNode, Set<DeviceId>> controllerDevices = new HashMap<>();
         int deviceCount = 0;
 
         // Create buckets reflecting current ownership.
         for (ControllerNode node : nodes) {
-            Set<DeviceId> devicesOf = getDevicesOf(node.id());
-            deviceCount += devicesOf.size();
-            controllerDevices.putAll(node, devicesOf);
-            log.info("Node {} has {} devices.", node.id(), devicesOf.size());
+            if (clusterService.getState(node.id()) == ACTIVE) {
+                Set<DeviceId> devicesOf = new HashSet<>(getDevicesOf(node.id()));
+                deviceCount += devicesOf.size();
+                controllerDevices.put(node, devicesOf);
+                log.info("Node {} has {} devices.", node.id(), devicesOf.size());
+            }
         }
 
-        int rounds = nodes.size();
+        // Now re-balance the buckets until they are roughly even.
+        int rounds = controllerDevices.keySet().size();
         for (int i = 0; i < rounds; i++) {
             // Iterate over the buckets and find the smallest and the largest.
-            ControllerNode smallest = findBucket(true, nodes, controllerDevices);
-            ControllerNode largest = findBucket(false, nodes, controllerDevices);
+            ControllerNode smallest = findBucket(true, controllerDevices);
+            ControllerNode largest = findBucket(false, controllerDevices);
             balanceBuckets(smallest, largest, controllerDevices, deviceCount);
         }
     }
 
-    private ControllerNode findBucket(boolean min, Collection<ControllerNode> nodes,
-                                      Multimap<ControllerNode, DeviceId> controllerDevices) {
+    private ControllerNode findBucket(boolean min,
+                                      Map<ControllerNode, Set<DeviceId>>  controllerDevices) {
         int xSize = min ? Integer.MAX_VALUE : -1;
         ControllerNode xNode = null;
-        for (ControllerNode node : nodes) {
+        for (ControllerNode node : controllerDevices.keySet()) {
             int size = controllerDevices.get(node).size();
             if ((min && size < xSize) || (!min && size > xSize)) {
                 xSize = size;
@@ -243,7 +248,7 @@
     }
 
     private void balanceBuckets(ControllerNode smallest, ControllerNode largest,
-                                Multimap<ControllerNode, DeviceId> controllerDevices,
+                                Map<ControllerNode, Set<DeviceId>>  controllerDevices,
                                 int deviceCount) {
         Collection<DeviceId> minBucket = controllerDevices.get(smallest);
         Collection<DeviceId> maxBucket = controllerDevices.get(largest);
@@ -262,7 +267,7 @@
                 DeviceId deviceId = it.next();
                 log.info("Setting {} as the master for {}", smallest.id(), deviceId);
                 setRole(smallest.id(), deviceId, MASTER);
-                controllerDevices.put(smallest, deviceId);
+                controllerDevices.get(smallest).add(deviceId);
                 it.remove();
                 i++;
             }
diff --git a/web/gui/src/main/webapp/topo.js b/web/gui/src/main/webapp/topo.js
index 97416ef..17e319b 100644
--- a/web/gui/src/main/webapp/topo.js
+++ b/web/gui/src/main/webapp/topo.js
@@ -197,6 +197,7 @@
         summaryPane,
         detailPane,
         antTimer = null,
+        guiSuccessor = null,
         onosInstances = {},
         onosOrder = [],
         oiBox,
@@ -620,7 +621,7 @@
         var inst = data.payload,
             id = inst.id;
         if (onosInstances[id]) {
-            logicError('ONOS instance already added: ' + id);
+            updateInstance(data);
             return;
         }
         onosInstances[id] = inst;
@@ -635,7 +636,7 @@
             d;
 
         if (network.lookup[id]) {
-            logicError('Device already added: ' + id);
+            updateDevice(data);
             return;
         }
 
@@ -2177,16 +2178,38 @@
     // ==============================
     // Web-Socket for live data
 
+    function findGuiSuccessor() {
+        var idx = -1;
+        onosOrder.forEach(function (d, i) {
+            if (d.uiAttached) {
+                idx = i;
+            }
+        });
+
+        for (var i = 0; i < onosOrder.length - 1; i++) {
+            var ni = (idx + 1 + i) % onosOrder.length;
+            if (onosOrder[ni].online) {
+                return onosOrder[ni].ip;
+            }
+        }
+        return null;
+    }
+
     function webSockUrl() {
-        return document.location.toString()
-            .replace(/\#.*/, '')
-            .replace('http://', 'ws://')
-            .replace('https://', 'wss://')
-            .replace('index.html', config.webSockUrl);
+        var url = document.location.toString()
+                .replace(/\#.*/, '')
+                .replace('http://', 'ws://')
+                .replace('https://', 'wss://')
+                .replace('index.html', config.webSockUrl);
+        if (guiSuccessor) {
+            url = url.replace(location.hostname, guiSuccessor);
+        }
+        return url;
     }
 
     webSock = {
         ws : null,
+        retries: 0,
 
         connect : function() {
             webSock.ws = new WebSocket(webSockUrl());
@@ -2195,6 +2218,7 @@
                 noWebSock(false);
                 requestSummary();
                 showInstances();
+                webSock.retries = 0;
             };
 
             webSock.ws.onmessage = function(m) {
@@ -2206,7 +2230,13 @@
 
             webSock.ws.onclose = function(m) {
                 webSock.ws = null;
-                noWebSock(true);
+                guiSuccessor = findGuiSuccessor();
+                if (guiSuccessor && webSock.retries < onosOrder.length) {
+                    webSock.retries++;
+                    webSock.connect();
+                } else {
+                    noWebSock(true);
+                }
             };
         },