role reassignment tweaks

Change-Id: Ie6d412787330e67a13e605a34f0824cf70882f85
diff --git a/core/store/hz/cluster/src/main/java/org/onlab/onos/store/cluster/impl/DistributedMastershipStore.java b/core/store/hz/cluster/src/main/java/org/onlab/onos/store/cluster/impl/DistributedMastershipStore.java
index 04833e6..71d42fa 100644
--- a/core/store/hz/cluster/src/main/java/org/onlab/onos/store/cluster/impl/DistributedMastershipStore.java
+++ b/core/store/hz/cluster/src/main/java/org/onlab/onos/store/cluster/impl/DistributedMastershipStore.java
@@ -10,7 +10,6 @@
 import org.apache.felix.scr.annotations.Deactivate;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.ReferenceCardinality;
-import org.apache.felix.scr.annotations.ReferencePolicy;
 import org.apache.felix.scr.annotations.Service;
 import org.onlab.onos.cluster.ClusterService;
 import org.onlab.onos.cluster.MastershipEvent;
@@ -20,15 +19,16 @@
 import org.onlab.onos.cluster.NodeId;
 import org.onlab.onos.net.DeviceId;
 import org.onlab.onos.net.MastershipRole;
-import org.onlab.onos.net.device.DeviceService;
 import org.onlab.onos.store.common.AbstractHazelcastStore;
 
 import com.google.common.collect.ImmutableSet;
 import com.hazelcast.core.ILock;
 import com.hazelcast.core.IMap;
+import com.hazelcast.core.MultiMap;
 
 /**
- * Distributed implementation of the cluster nodes store.
+ * Distributed implementation of the mastership store. The store is
+ * responsible for the master selection process.
  */
 @Component(immediate = true)
 @Service
@@ -38,35 +38,34 @@
 
     //arbitrary lock name
     private static final String LOCK = "lock";
-    //initial term value
+    //initial term/TTL value
     private static final Integer INIT = 0;
-    //placeholder non-null value
-    private static final Byte NIL = 0x0;
 
     //devices to masters
-    protected IMap<byte[], byte[]> rawMasters;
+    protected IMap<byte[], byte[]> masters;
     //devices to terms
-    protected IMap<byte[], Integer> rawTerms;
-    //collection of nodes. values are ignored, as it's used as a makeshift 'set'
-    protected IMap<byte[], Byte> backups;
+    protected IMap<byte[], Integer> terms;
+
+    //re-election related, disjoint-set structures:
+    //device-nodes multiset of available nodes
+    protected MultiMap<byte[], byte[]> standbys;
+    //device-nodes multiset for nodes that have given up on device
+    protected MultiMap<byte[], byte[]> unusable;
 
     @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
     protected ClusterService clusterService;
 
-    //FIXME: need to guarantee that this will be met, sans circular dependencies
-    @Reference(policy = ReferencePolicy.DYNAMIC)
-    protected DeviceService deviceService;
-
     @Override
     @Activate
     public void activate() {
         super.activate();
 
-        rawMasters = theInstance.getMap("masters");
-        rawTerms = theInstance.getMap("terms");
-        backups = theInstance.getMap("backups");
+        masters = theInstance.getMap("masters");
+        terms = theInstance.getMap("terms");
+        standbys = theInstance.getMultiMap("backups");
+        unusable = theInstance.getMultiMap("unusable");
 
-        rawMasters.addEntryListener(new RemoteMasterShipEventHandler(), true);
+        masters.addEntryListener(new RemoteMasterShipEventHandler(), true);
 
         log.info("Started");
     }
@@ -77,6 +76,30 @@
     }
 
     @Override
+    public MastershipRole getRole(NodeId nodeId, DeviceId deviceId) {
+        byte[] did = serialize(deviceId);
+        byte[] nid = serialize(nodeId);
+
+        NodeId current = deserialize(masters.get(did));
+        if (current == null) {
+            if (standbys.containsEntry(did, nid)) {
+                //was previously standby, or set to standby from master
+                return MastershipRole.STANDBY;
+            } else {
+                return MastershipRole.NONE;
+            }
+        } else {
+            if (current.equals(nodeId)) {
+                //*should* be in unusable, not always
+                return MastershipRole.MASTER;
+            } else {
+                //may be in backups or unusable from earlier retirement
+                return MastershipRole.STANDBY;
+            }
+        }
+    }
+
+    @Override
     public MastershipEvent setMaster(NodeId nodeId, DeviceId deviceId) {
         byte [] did = serialize(deviceId);
         byte [] nid = serialize(nodeId);
@@ -85,30 +108,31 @@
         lock.lock();
         try {
             MastershipRole role = getRole(nodeId, deviceId);
-            Integer term = rawTerms.get(did);
             switch (role) {
                 case MASTER:
+                    //reinforce mastership
+                    evict(nid, did);
                     return null;
                 case STANDBY:
-                    rawMasters.put(did, nid);
-                    rawTerms.put(did, ++term);
-                    backups.putIfAbsent(nid, NIL);
-                    break;
-                case NONE:
-                    rawMasters.put(did, nid);
-                    //new switch OR state transition after being orphaned
-                    if (term == null) {
-                        rawTerms.put(did, INIT);
-                    } else {
-                        rawTerms.put(did, ++term);
+                    //make current master standby
+                    byte [] current = masters.get(did);
+                    if (current != null) {
+                        backup(current, did);
                     }
-                    backups.put(nid, NIL);
-                    break;
+                    //assign specified node as new master
+                    masters.put(did, nid);
+                    evict(nid, did);
+                    updateTerm(did);
+                    return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
+                case NONE:
+                    masters.put(did, nid);
+                    evict(nid, did);
+                    updateTerm(did);
+                    return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
                 default:
                     log.warn("unknown Mastership Role {}", role);
                     return null;
             }
-            return new MastershipEvent(MASTER_CHANGED, deviceId, nodeId);
         } finally {
             lock.unlock();
         }
@@ -116,14 +140,14 @@
 
     @Override
     public NodeId getMaster(DeviceId deviceId) {
-        return deserialize(rawMasters.get(serialize(deviceId)));
+        return deserialize(masters.get(serialize(deviceId)));
     }
 
     @Override
     public Set<DeviceId> getDevices(NodeId nodeId) {
         ImmutableSet.Builder<DeviceId> builder = ImmutableSet.builder();
 
-        for (Map.Entry<byte[], byte[]> entry : rawMasters.entrySet()) {
+        for (Map.Entry<byte[], byte[]> entry : masters.entrySet()) {
             if (nodeId.equals(deserialize(entry.getValue()))) {
                 builder.add((DeviceId) deserialize(entry.getKey()));
             }
@@ -134,11 +158,8 @@
 
     @Override
     public MastershipRole requestRole(DeviceId deviceId) {
-        // first to empty slot for device in master map is MASTER
-        // depending on how backups are organized, might need to trigger election
-        // so only controller doesn't set itself to backup for another device
-        byte [] did = serialize(deviceId);
         NodeId local = clusterService.getLocalNode().id();
+        byte [] did = serialize(deviceId);
         byte [] lnid = serialize(local);
 
         ILock lock = theInstance.getLock(LOCK);
@@ -147,15 +168,17 @@
             MastershipRole role = getRole(local, deviceId);
             switch (role) {
                 case MASTER:
+                    evict(lnid, did);
                     break;
                 case STANDBY:
-                    backups.put(lnid, NIL);
-                    rawTerms.putIfAbsent(did, INIT);
+                    backup(lnid, did);
+                    terms.putIfAbsent(did, INIT);
                     break;
                 case NONE:
-                    rawMasters.put(did, lnid);
-                    rawTerms.putIfAbsent(did, INIT);
-                    backups.put(lnid, NIL);
+                    //claim mastership
+                    masters.put(did, lnid);
+                    evict(lnid, did);
+                    updateTerm(did);
                     role = MastershipRole.MASTER;
                     break;
                 default:
@@ -168,41 +191,21 @@
     }
 
     @Override
-    public MastershipRole getRole(NodeId nodeId, DeviceId deviceId) {
-        byte[] did = serialize(deviceId);
-
-        NodeId current = deserialize(rawMasters.get(did));
-        MastershipRole role = null;
-
-        if (current == null) {
-            //IFF no controllers have claimed mastership over it
-            role = MastershipRole.NONE;
-        } else {
-            if (current.equals(nodeId)) {
-                role = MastershipRole.MASTER;
-            } else {
-                role = MastershipRole.STANDBY;
-            }
-        }
-
-        return role;
-    }
-
-    @Override
     public MastershipTerm getTermFor(DeviceId deviceId) {
         byte[] did = serialize(deviceId);
-
-        if ((rawMasters.get(did) == null) ||
-                (rawTerms.get(did) == null)) {
+        if ((masters.get(did) == null) ||
+                (terms.get(did) == null)) {
             return null;
         }
         return MastershipTerm.of(
-                (NodeId) deserialize(rawMasters.get(did)), rawTerms.get(did));
+                (NodeId) deserialize(masters.get(did)), terms.get(did));
     }
 
     @Override
-    public MastershipEvent unsetMaster(NodeId nodeId, DeviceId deviceId) {
+    public MastershipEvent setStandby(NodeId nodeId, DeviceId deviceId) {
         byte [] did = serialize(deviceId);
+        byte [] nid = serialize(nodeId);
+        MastershipEvent event = null;
 
         ILock lock = theInstance.getLock(LOCK);
         lock.lock();
@@ -210,54 +213,113 @@
             MastershipRole role = getRole(nodeId, deviceId);
             switch (role) {
                 case MASTER:
-                    //hand off device to another
-                    NodeId backup = reelect(nodeId, deviceId);
-                    if (backup == null) {
-                        //goes back to NONE
-                        rawMasters.remove(did);
-                    } else {
-                        //goes to STANDBY for local, MASTER for someone else
-                        Integer term = rawTerms.get(did);
-                        rawMasters.put(did, serialize(backup));
-                        rawTerms.put(did, ++term);
-                        return new MastershipEvent(MASTER_CHANGED, deviceId, backup);
-                    }
+                    event = reelect(nodeId, deviceId);
+                    backup(nid, did);
+                    break;
                 case STANDBY:
+                    //fall through to reinforce role
                 case NONE:
+                    backup(nid, did);
                     break;
                 default:
                     log.warn("unknown Mastership Role {}", role);
             }
-            return null;
+            return event;
         } finally {
             lock.unlock();
         }
     }
 
-    //helper for "re-electing" a new master for a given device
-    private NodeId reelect(NodeId current, DeviceId deviceId) {
+    @Override
+    public MastershipEvent relinquishRole(NodeId nodeId, DeviceId deviceId) {
+        byte [] did = serialize(deviceId);
+        byte [] nid = serialize(nodeId);
+        MastershipEvent event = null;
 
-        for (byte [] node : backups.keySet()) {
-            NodeId nid = deserialize(node);
-            //if a device dies we shouldn't pick another master for it.
-            if (!current.equals(nid) && (deviceService.isAvailable(deviceId))) {
-                return nid;
+        ILock lock = theInstance.getLock(LOCK);
+        lock.lock();
+        try {
+            MastershipRole role = getRole(nodeId, deviceId);
+            switch (role) {
+                case MASTER:
+                    event = reelect(nodeId, deviceId);
+                    evict(nid, did);
+                    break;
+                case STANDBY:
+                    //fall through to reinforce relinquishment
+                case NONE:
+                    evict(nid, did);
+                    break;
+                default:
+                log.warn("unknown Mastership Role {}", role);
             }
+            return event;
+        } finally {
+            lock.unlock();
         }
-        return null;
     }
 
-    //adds node to pool(s) of backup
-    private void backup(NodeId nodeId, DeviceId deviceId) {
-        //TODO might be useful to isolate out this function and reelect() if we
-        //get more backup/election schemes
+    //helper to fetch a new master candidate for a given device.
+    private MastershipEvent reelect(NodeId current, DeviceId deviceId) {
+        byte [] did = serialize(deviceId);
+        byte [] nid = serialize(current);
+
+        //if this is an queue it'd be neater.
+        byte [] backup = null;
+        for (byte [] n : standbys.get(serialize(deviceId))) {
+            if (!current.equals(deserialize(n))) {
+                backup = n;
+                break;
+            }
+        }
+
+        if (backup == null) {
+            masters.remove(did, nid);
+            return null;
+        } else {
+            masters.put(did, backup);
+            evict(backup, did);
+            Integer term = terms.get(did);
+            terms.put(did, ++term);
+            return new MastershipEvent(
+                    MASTER_CHANGED, deviceId, (NodeId) deserialize(backup));
+        }
+    }
+
+    //adds node to pool(s) of backups and moves them from unusable.
+    private void backup(byte [] nodeId, byte [] deviceId) {
+        if (!standbys.containsEntry(deviceId, nodeId)) {
+            standbys.put(deviceId, nodeId);
+        }
+        if (unusable.containsEntry(deviceId, nodeId)) {
+            unusable.remove(deviceId, nodeId);
+        }
+    }
+
+    //adds node to unusable and evicts it from backup pool.
+    private void evict(byte [] nodeId, byte [] deviceId) {
+        if (!unusable.containsEntry(deviceId, nodeId)) {
+            unusable.put(deviceId, nodeId);
+        }
+        if (standbys.containsEntry(deviceId, nodeId)) {
+            standbys.remove(deviceId, nodeId);
+        }
+    }
+
+    //adds or updates term information.
+    private void updateTerm(byte [] deviceId) {
+        Integer term = terms.get(deviceId);
+        if (term == null) {
+            terms.put(deviceId, INIT);
+        } else {
+            terms.put(deviceId, ++term);
+        }
     }
 
     private class RemoteMasterShipEventHandler extends RemoteEventHandler<DeviceId, NodeId> {
 
         @Override
         protected void onAdd(DeviceId deviceId, NodeId nodeId) {
-            //only addition indicates a change in mastership
             notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
         }
 
@@ -268,6 +330,7 @@
 
         @Override
         protected void onUpdate(DeviceId deviceId, NodeId oldNodeId, NodeId nodeId) {
+            //only addition indicates a change in mastership
             //notifyDelegate(new MastershipEvent(MASTER_CHANGED, deviceId, nodeId));
         }
     }