[SDFAB-633][SDFAB-634][SDFAB-445] Collection of improvements for SR

Firstly, this patch deeply rewrites the load sharing of the SR instances,
before we were using an hybrid approach based on MastershipService.
With this patch we get rid of completely of the MastershipService
for any task. We just use the MastershipEvent as way to perform rerouting
if it happens near a cluster event. The aim is to make more stable the forwarding,
and the phased recovery.

Then, the patch contains a fix for an issue related to the phased recovery.
pr.init() can be called when there are still no masters (on device config
for example) and when this happens the portstate commands are dropped.

Last but not least, there is a fix for missing device routes in DefaultRoutingHandler.
Device routes (seenBeforeRoutes) are cleaned on DEVICE UP/ADDED events, this can lead
to purge some routes when the device events are handled in different moments by the
ONOS instances and there already some programmed routes.

Change-Id: Ia03b7c7c5b8a1b80c4b6d17053c2e2e7abf13d17
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java b/impl/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
index 7a65e95..e085c63 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
@@ -22,6 +22,8 @@
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+import com.google.common.hash.Hasher;
+import com.google.common.hash.Hashing;
 import org.onlab.packet.EthType;
 import com.google.common.collect.Streams;
 import org.onlab.packet.Ip4Address;
@@ -67,6 +69,7 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -107,10 +110,13 @@
     private Instant lastRoutingChange = Instant.EPOCH;
     private Instant lastFullReroute = Instant.EPOCH;
 
-    // Distributed store to keep track of ONOS instance that should program the
-    // device pair. There should be only one instance (the king) that programs the same pair.
-    Map<Set<DeviceId>, NodeId> shouldProgram;
-    Map<DeviceId, Boolean> shouldProgramCache;
+    /*
+     * Store to keep track of ONOS instance that should program the device pair.
+     * There should be only one instance (the leader) that programs the same pair.
+     * This EC map is used as first source of truth. WorkPartitionService is used
+     * to elect a leader when shouldProgram is empty.
+     */
+    Map<DeviceId, NodeId> shouldProgram;
 
     // Distributed routes store to keep track of the routes already seen
     // destination device is the key and target sw is the value
@@ -136,12 +142,32 @@
     }
 
     /**
+     * Deterministic hashing for the shouldProgram logic.
+     */
+    private static Long consistentHasher(EdgePair pair) {
+        Hasher hasher = Hashing.md5().newHasher();
+        long dev1Hash = hasher.putUnencodedChars(pair.dev1.toString())
+                .hash()
+                .asLong();
+        hasher = Hashing.md5().newHasher();
+        long dev2Hash = hasher.putUnencodedChars(pair.dev2.toString())
+                .hash()
+                .asLong();
+        return dev1Hash + dev2Hash;
+    }
+
+    /**
+     * Implements the hash function for the shouldProgram logic.
+     */
+    protected static final Function<EdgePair, Long> HASH_FUNCTION = DefaultRoutingHandler::consistentHasher;
+
+    /**
      * Creates a DefaultRoutingHandler object.
      *
      * @param srManager SegmentRoutingManager object
      */
     DefaultRoutingHandler(SegmentRoutingManager srManager) {
-        this.shouldProgram = srManager.storageService.<Set<DeviceId>, NodeId>consistentMapBuilder()
+        this.shouldProgram = srManager.storageService.<DeviceId, NodeId>consistentMapBuilder()
                 .withName("sr-should-program")
                 .withSerializer(Serializer.using(KryoNamespaces.API))
                 .withRelaxedReadConsistency()
@@ -151,7 +177,6 @@
                 .withSerializer(Serializer.using(KryoNamespaces.API))
                 .withRelaxedReadConsistency()
                 .build();
-        this.shouldProgramCache = Maps.newConcurrentMap();
         update(srManager);
         this.routePopulators = new PredictableExecutor(DEFAULT_THREADS,
                                                       groupedThreads("onos/sr", "r-populator-%d", log));
@@ -290,6 +315,8 @@
             }
 
             log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
+            seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
+
             if (!redoRouting(routeChanges, edgePairs, null)) {
                 log.debug("populateAllRoutingRules: populationStatus is ABORTED");
                 populationStatus = Status.ABORTED;
@@ -351,7 +378,10 @@
                     log.trace("  Current/Existing SPG: {}", entry.getValue());
                 }
             });
+
             log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
+            seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
+
             Set<EdgePair> edgePairs = new HashSet<>();
             Set<ArrayList<DeviceId>> routeChanges = new HashSet<>();
             boolean handleRouting = false;
@@ -505,6 +535,7 @@
             log.debug("populateRoutingRulesForLinkStatusChange: "
                     + "populationStatus is STARTED");
             log.debug("seenBeforeRoutes size {}", seenBeforeRoutes.size());
+            seenBeforeRoutes.forEach(entry -> log.debug("{} -> {}", entry.getValue(), entry.getKey()));
             populationStatus = Status.STARTED;
             rulePopulator.resetCounter(); //XXX maybe useful to have a rehash ctr
             boolean hashGroupsChanged = false;
@@ -1351,7 +1382,7 @@
 
     /**
      * Populates IP rules for a route that has direct connection to the switch
-     * if the current instance is the master of the switch.
+     * if the current instance is leading the programming of the switch.
      *
      * @param deviceId device ID of the device that next hop attaches to
      * @param prefix IP prefix of the route
@@ -1372,7 +1403,7 @@
 
     /**
      * Removes IP rules for a route when the next hop is gone.
-     * if the current instance is the master of the switch.
+     * if the current instance is leading the programming of the switch.
      *
      * @param deviceId device ID of the device that next hop attaches to
      * @param prefix IP prefix of the route
@@ -1423,7 +1454,7 @@
     }
 
     /**
-     * Populates IP rules for a route when the next hop is double-tagged.
+     * Program IP rules for a route when the next hop is double-tagged.
      *
      * @param deviceId  device ID that next hop attaches to
      * @param prefix    IP prefix of the route
@@ -1432,56 +1463,24 @@
      * @param outerVlan Outer Vlan ID of the next hop
      * @param outerTpid Outer TPID of the next hop
      * @param outPort   port that the next hop attaches to
+     * @param install   whether or not install the route
      */
-    void populateDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
-                                   VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
-        if (srManager.mastershipService.isLocalMaster(deviceId)) {
-            srManager.routingRulePopulator.populateDoubleTaggedRoute(
-                    deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
+    void programDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
+                                  VlanId outerVlan, EthType outerTpid, PortNumber outPort, boolean install) {
+        if (shouldProgram(deviceId)) {
+            if (install) {
+                srManager.routingRulePopulator.populateDoubleTaggedRoute(
+                        deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
+            } else {
+                srManager.routingRulePopulator.revokeDoubleTaggedRoute(
+                        deviceId, prefix, hostMac, innerVlan, outerVlan, outerTpid, outPort);
+            }
             srManager.routingRulePopulator.processDoubleTaggedFilter(
-                    deviceId, outPort, outerVlan, innerVlan, true);
+                    deviceId, outPort, outerVlan, innerVlan, install);
         }
     }
 
     /**
-     * Revokes IP rules for a route when the next hop is double-tagged.
-     *
-     * @param deviceId  device ID that next hop attaches to
-     * @param prefix    IP prefix of the route
-     * @param hostMac   MAC address of the next hop
-     * @param innerVlan Inner Vlan ID of the next hop
-     * @param outerVlan Outer Vlan ID of the next hop
-     * @param outerTpid Outer TPID of the next hop
-     * @param outPort   port that the next hop attaches to
-     */
-    void revokeDoubleTaggedRoute(DeviceId deviceId, IpPrefix prefix, MacAddress hostMac, VlanId innerVlan,
-                                 VlanId outerVlan, EthType outerTpid, PortNumber outPort) {
-        // Revoke route either if this node have the mastership (when device is available) or
-        // if this node is the leader (even when device is unavailable)
-        if (!srManager.mastershipService.isLocalMaster(deviceId)) {
-            if (srManager.deviceService.isAvailable(deviceId)) {
-                // Master node will revoke specified rule.
-                log.debug("This node is not a master for {}, stop revoking route.", deviceId);
-                return;
-            }
-
-            // isLocalMaster will return false when the device is unavailable.
-            // Verify if this node is the leader in that case.
-            NodeId leader = srManager.leadershipService.runForLeadership(
-                    deviceId.toString()).leaderNodeId();
-            if (!srManager.clusterService.getLocalNode().id().equals(leader)) {
-                // Leader node will revoke specified rule.
-                log.debug("This node is not a master for {}, stop revoking route.", deviceId);
-                return;
-            }
-        }
-
-        srManager.routingRulePopulator.revokeDoubleTaggedRoute(deviceId, prefix, hostMac,
-                innerVlan, outerVlan, outerTpid, outPort);
-        srManager.routingRulePopulator.processDoubleTaggedFilter(deviceId, outPort, outerVlan, innerVlan, false);
-    }
-
-    /**
      * Purges seen before routes for a given device.
      * @param deviceId the device id
      */
@@ -1533,6 +1532,12 @@
                                        MASTER_CHANGE_DELAY, TimeUnit.MILLISECONDS);
     }
 
+    /*
+     * Even though the current implementation does not heavily rely
+     * on mastership, we keep using the mastership and cluster events
+     * as heuristic to perform full reroutes and to make sure we don't
+     * lose any event when instances fail.
+     */
     protected final class MasterChange implements Runnable {
         private DeviceId devId;
         private MastershipEvent me;
@@ -1589,16 +1594,16 @@
                 log.warn("Mastership changed for dev: {}/{} while programming route-paths "
                         + "due to clusterEvent {} ms ago .. attempting full reroute",
                          devId, me.roleInfo(), lce);
-                if (srManager.mastershipService.isLocalMaster(devId)) {
-                    // old master could have died when populating filters
+                if (shouldProgram(devId)) {
+                    // old leader could have died when populating filters
                     populatePortAddressingRules(devId);
                 }
-                // old master could have died when creating groups
+                // old leader could have died when creating groups
                 // XXX right now we have no fine-grained way to only make changes
                 // for the route paths affected by this device. Thus we do a
                 // full reroute after purging all hash groups. We also try to do
                 // it only once, irrespective of the number of devices
-                // that changed mastership when their master instance died.
+                // that changed mastership when their leader instance died.
                 long lfrr = Instant.now().toEpochMilli() - lastFullReroute.toEpochMilli();
                 boolean doFullReroute = lfrr > FULL_REROUTE_THRESHOLD;
                 if (doFullReroute) {
@@ -1675,7 +1680,7 @@
                 continue;
             }
             for (DeviceId rootSw : deviceAndItsPair(sw.id())) {
-                // check for mastership change since last run
+                // check for leadership change since last run
                 if (!lastProgrammed.contains(sw.id())) {
                     log.warn("New responsibility for this node to program dev:{}"
                             + " ... nuking current ECMPspg", sw.id());
@@ -1766,7 +1771,7 @@
                                   link.dst().deviceId());
                     }
                 }
-                // check for mastership change since last run
+                // check for leadership change since last run
                 if (!lastProgrammed.contains(sw.id())) {
                     log.warn("New responsibility for this node to program dev:{}"
                             + " ... nuking current ECMPspg", sw.id());
@@ -1806,8 +1811,7 @@
             if (!pairDev.isPresent() || !srManager.deviceService.isAvailable(pairDev.get())) {
                 log.debug("Proxy Route changes to downed Sw:{}", failedSwitch);
                 srManager.deviceService.getDevices().forEach(dev -> {
-                    if (!dev.id().equals(failedSwitch) &&
-                            srManager.mastershipService.isLocalMaster(dev.id())) {
+                    if (!dev.id().equals(failedSwitch) && shouldProgram(dev.id())) {
                         log.debug(" : {}", dev.id());
                         changedRtBldr.add(Lists.newArrayList(dev.id(), failedSwitch));
                     }
@@ -1970,89 +1974,47 @@
     }
 
     /**
-     * Determines whether this controller instance should program the
-     * given {@code deviceId}, based on mastership and pairDeviceId if one exists.
-     * <p>
-     * Once an instance is elected, it will be the only instance responsible for programming
-     * both devices in the pair until it goes down.
+     * Determines whether this controller instance should program the given deviceId, based on
+     * workPartitionService and pairDeviceId if one exists. Once an instance is elected, it will
+     * be the only instance responsible for programming both devices in the pair until it goes down.
      *
-     * @param deviceId device identifier to consider for routing
-     * @return true if current instance should handle the routing for given device
+     * @param deviceId the device id
+     * @return true if this instance leads the programming, false otherwise
      */
-    boolean shouldProgram(DeviceId deviceId) {
-        Boolean cached = shouldProgramCache.get(deviceId);
-        if (cached != null) {
-            log.debug("shouldProgram dev:{} cached:{}", deviceId, cached);
-            return cached;
-        }
-
-        Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
-
+    public boolean shouldProgram(DeviceId deviceId) {
+        NodeId leader = shouldProgram.get(deviceId);
         NodeId currentNodeId = srManager.clusterService.getLocalNode().id();
-        NodeId masterNodeId = srManager.mastershipService.getMasterFor(deviceId);
-        Optional<NodeId> pairMasterNodeId = pairDeviceId.map(srManager.mastershipService::getMasterFor);
-        log.debug("Evaluate shouldProgram {}/pair={}. currentNodeId={}, master={}, pairMaster={}",
-                deviceId, pairDeviceId, currentNodeId, masterNodeId, pairMasterNodeId);
-
-        // No pair device configured. Only handle when current instance is the master of the device
-        if (!pairDeviceId.isPresent()) {
-            log.debug("No pair device. currentNodeId={}, master={}", currentNodeId, masterNodeId);
-            return currentNodeId.equals(masterNodeId);
+        if (leader != null) {
+            log.trace("shouldProgram dev:{} leader:{}", deviceId, leader);
+            return currentNodeId.equals(leader);
         }
 
-        // Should not handle if current instance is not the master of either switch
-        if (!currentNodeId.equals(masterNodeId) &&
-                !(pairMasterNodeId.isPresent() && currentNodeId.equals(pairMasterNodeId.get()))) {
-            log.debug("Current nodeId {} is neither the master of target device {} nor pair device {}",
-                    currentNodeId, deviceId, pairDeviceId);
-            return false;
-        }
+        // hash function is independent from the order of the devices in the edge pair
+        Optional<DeviceId> pairDeviceId = srManager.getPairDeviceId(deviceId);
+        EdgePair edgePair = new EdgePair(deviceId, pairDeviceId.orElse(DeviceId.NONE));
 
-        Set<DeviceId> key = Sets.newHashSet(deviceId, pairDeviceId.get());
-
-        NodeId king = shouldProgram.compute(key, ((k, v) -> {
-            if (v == null) {
-                // There is no value in the map. Elect a node
-                return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
-            } else {
-                if (v.equals(masterNodeId) || v.equals(pairMasterNodeId.orElse(null))) {
-                    // Use the node in the map if it is still alive and is a master of any of the two switches
-                    return v;
-                } else {
-                    // Previously elected node is no longer the master of either switch. Re-elect a node.
-                    return elect(Lists.newArrayList(masterNodeId, pairMasterNodeId.orElse(null)));
-                }
-            }
-        }));
-
-        if (king != null) {
-            log.debug("{} is king, should handle routing for {}/pair={}", king, deviceId, pairDeviceId);
-            shouldProgramCache.put(deviceId, king.equals(currentNodeId));
-            return king.equals(currentNodeId);
+        leader = srManager.workPartitionService.getLeader(edgePair, HASH_FUNCTION);
+        if (leader != null) {
+            log.debug("{} is the leader, should handle routing for {}/pair={}", leader, deviceId,
+                    pairDeviceId);
+            shouldProgram.put(deviceId, leader);
+            return leader.equals(currentNodeId);
         } else {
-            log.error("Fail to elect a king for {}/pair={}. Abort.", deviceId, pairDeviceId);
-            shouldProgramCache.remove(deviceId);
+            log.error("Fail to elect a leader for {}/pair={}. Abort.", deviceId, pairDeviceId);
+            shouldProgram.remove(deviceId);
             return false;
         }
     }
 
-    /**
-     * Elects a node who should take responsibility of programming devices.
-     * @param nodeIds list of candidate node ID
-     *
-     * @return NodeId of the node that gets elected, or null if none of the node can be elected
-     */
-    private NodeId elect(List<NodeId> nodeIds) {
-        // Remove all null elements. This could happen when some device has no master
-        nodeIds.removeAll(Collections.singleton(null));
-        nodeIds.sort(null);
-        return nodeIds.size() == 0 ? null : nodeIds.get(0);
+    void invalidateShouldProgram(DeviceId deviceId) {
+        shouldProgram.remove(deviceId);
     }
 
-    void invalidateShouldProgramCache(DeviceId deviceId) {
-        shouldProgramCache.remove(deviceId);
+    void invalidateShouldProgram() {
+        shouldProgram.clear();
     }
 
+
     /**
      * Returns a set of device ID, containing given device and its pair device if exist.
      *
@@ -2147,7 +2109,8 @@
     /**
      * Populates filtering rules for port, and punting rules
      * for gateway IPs, loopback IPs and arp/ndp traffic.
-     * Should only be called by the master instance for this device/port.
+     * Should only be called by the instance leading the programming
+     * for this device/port.
      *
      * @param deviceId Switch ID to set the rules
      */
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/HostHandler.java b/impl/src/main/java/org/onosproject/segmentrouting/HostHandler.java
index 07e619f..a039892 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/HostHandler.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/HostHandler.java
@@ -144,7 +144,7 @@
         // ensure dual-homed host locations have viable uplinks
         if (effectiveLocations(host).size() > 1 || srManager.singleHomedDown) {
             effectiveLocations(host).forEach(loc -> {
-                if (srManager.mastershipService.isLocalMaster(loc.deviceId())) {
+                if (srManager.shouldProgram(loc.deviceId())) {
                     srManager.linkHandler.checkUplinksForHost(loc);
                 }
             });
@@ -442,7 +442,7 @@
         // ensure dual-homed host locations have viable uplinks
         if (newLocations.size() > prevLocations.size() || srManager.singleHomedDown) {
             newLocations.forEach(loc -> {
-                if (srManager.mastershipService.isLocalMaster(loc.deviceId())) {
+                if (srManager.shouldProgram(loc.deviceId())) {
                     srManager.linkHandler.checkUplinksForHost(loc);
                 }
             });
@@ -688,13 +688,8 @@
         }
         log.info("{} routing rule for double-tagged host {} at {}",
                  revoke ? "Revoking" : "Populating", ip, location);
-        if (revoke) {
-            srManager.defaultRoutingHandler.revokeDoubleTaggedRoute(
-                    deviceId, ip.toIpPrefix(), mac, innerVlan, outerVlan, outerTpid, port);
-        } else {
-            srManager.defaultRoutingHandler.populateDoubleTaggedRoute(
-                    deviceId, ip.toIpPrefix(), mac, innerVlan, outerVlan, outerTpid, port);
-        }
+        srManager.defaultRoutingHandler.programDoubleTaggedRoute(
+                deviceId, ip.toIpPrefix(), mac, innerVlan, outerVlan, outerTpid, port, !revoke);
     }
 
     void populateAllDoubleTaggedHost() {
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/LinkHandler.java b/impl/src/main/java/org/onosproject/segmentrouting/LinkHandler.java
index 229d32c..c8acc75 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/LinkHandler.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/LinkHandler.java
@@ -160,7 +160,7 @@
                     .populateRoutingRulesForLinkStatusChange(null, ulink, null,
                             (seenBefore.contains(ulink) && seenBefore.contains(getReverseLink(ulink))));
 
-            if (srManager.mastershipService.isLocalMaster(ulink.src().deviceId())) {
+            if (srManager.shouldProgram(ulink.src().deviceId())) {
                 // handle edge-ports for dual-homed hosts
                 updateHostPorts(ulink, true);
 
@@ -215,7 +215,7 @@
             log.warn("received a link down for the link {} which is not in the store", link);
         }
         // handle edge-ports for dual-homed hosts
-        if (srManager.mastershipService.isLocalMaster(link.src().deviceId())) {
+        if (srManager.shouldProgram(link.src().deviceId())) {
             updateHostPorts(link, false);
         }
 
@@ -252,16 +252,16 @@
             DefaultGroupHandler groupHandler = srManager.groupHandlerMap
                     .get(ulink.src().deviceId());
             if (groupHandler != null) {
-                if (srManager.mastershipService.isLocalMaster(ulink.src().deviceId())
+                if (srManager.shouldProgram(ulink.src().deviceId())
                         && isParallelLink(ulink)) {
                     log.debug("* retrying hash for parallel link removed:{}", ulink);
                     groupHandler.retryHash(ulink, true, false);
                 } else {
                     log.debug("Not attempting retry-hash for link removed: {} .. {}",
                               ulink,
-                              (srManager.mastershipService.isLocalMaster(ulink
+                              (srManager.shouldProgram(ulink
                                       .src().deviceId())) ? "not parallel"
-                                                          : "not master");
+                                                          : "not handling programming");
                 }
                 // ensure local stores are updated after all rerouting or rehashing
                 groupHandler.portDownForLink(ulink);
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/RoutingRulePopulator.java b/impl/src/main/java/org/onosproject/segmentrouting/RoutingRulePopulator.java
index 5e29dde..80666c5 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/RoutingRulePopulator.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/RoutingRulePopulator.java
@@ -1382,8 +1382,8 @@
             return;
         }
 
-        if (request && !srManager.mastershipService.isLocalMaster(deviceId)) {
-            log.debug("Not installing port-IP punts - not the master for dev:{} ",
+        if (request && !srManager.shouldProgram(deviceId)) {
+            log.debug("Not installing port-IP punts - not handling programming for dev:{} ",
                       deviceId);
             return;
         }
@@ -1468,8 +1468,8 @@
      */
     void populateArpNdpPunts(DeviceId deviceId) {
         // We are not the master just skip.
-        if (!srManager.mastershipService.isLocalMaster(deviceId)) {
-            log.debug("Not installing ARP/NDP punts - not the master for dev:{} ",
+        if (!srManager.shouldProgram(deviceId)) {
+            log.debug("Not installing ARP/NDP punts - not handling programming for dev:{} ",
                       deviceId);
             return;
         }
@@ -1967,7 +1967,7 @@
     void updateSpecialVlanFilteringRules(boolean pushVlan, VlanId oldVlanId,
                                          VlanId newVlanId) {
         for (Device dev : srManager.deviceService.getAvailableDevices()) {
-            if (srManager.mastershipService.isLocalMaster(dev.id())) {
+            if (srManager.shouldProgram(dev.id())) {
                 for (Port p : srManager.deviceService.getPorts(dev.id())) {
                     if (!hasIPConfiguration(new ConnectPoint(dev.id(), p.number()))
                             && p.isEnabled()) {
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingManager.java b/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingManager.java
index 13a9b99..c81ee56 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingManager.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingManager.java
@@ -35,7 +35,6 @@
 import org.onosproject.cluster.ClusterEvent;
 import org.onosproject.cluster.ClusterEventListener;
 import org.onosproject.cluster.ClusterService;
-import org.onosproject.cluster.LeadershipService;
 import org.onosproject.cluster.NodeId;
 import org.onosproject.core.ApplicationId;
 import org.onosproject.core.CoreService;
@@ -198,7 +197,6 @@
 public class SegmentRoutingManager implements SegmentRoutingService {
 
     private static Logger log = LoggerFactory.getLogger(SegmentRoutingManager.class);
-    private static final String NOT_MASTER = "Current instance is not the master of {}. Ignore.";
 
     @Reference(cardinality = ReferenceCardinality.MANDATORY)
     private ComponentConfigService compCfgService;
@@ -257,9 +255,6 @@
     @Reference(cardinality = ReferenceCardinality.MANDATORY)
     public WorkPartitionService workPartitionService;
 
-    @Reference(cardinality = ReferenceCardinality.MANDATORY)
-    public LeadershipService leadershipService;
-
     @Reference(cardinality = ReferenceCardinality.OPTIONAL,
             policy = ReferencePolicy.DYNAMIC)
     public volatile XconnectService xconnectService;
@@ -1063,19 +1058,23 @@
 
     @Override
     public Map<Set<DeviceId>, NodeId> getShouldProgram() {
+        return ImmutableMap.of();
+    }
+
+    @Override
+    public Map<DeviceId, Boolean> getShouldProgramCache() {
+        return ImmutableMap.of();
+    }
+
+    @Override
+    public Map<DeviceId, NodeId> getShouldProgramLeaders() {
         return defaultRoutingHandler == null ? ImmutableMap.of() :
                 ImmutableMap.copyOf(defaultRoutingHandler.shouldProgram);
     }
 
     @Override
-    public Map<DeviceId, Boolean> getShouldProgramCache() {
-        return defaultRoutingHandler == null ? ImmutableMap.of() :
-                ImmutableMap.copyOf(defaultRoutingHandler.shouldProgramCache);
-    }
-
-    @Override
     public boolean shouldProgram(DeviceId deviceId) {
-        return defaultRoutingHandler.shouldProgram(deviceId);
+        return defaultRoutingHandler != null && defaultRoutingHandler.shouldProgram(deviceId);
     }
 
     @Override
@@ -1343,7 +1342,7 @@
      */
     public void updateMacVlanTreatment(DeviceId deviceId, MacAddress hostMac,
                                        VlanId hostVlanId, PortNumber port, int nextId) {
-        // Check if we are the king of this device
+        // Check if we are the leader of this device
         // just one instance should perform this update
         if (!defaultRoutingHandler.shouldProgram(deviceId)) {
             log.debug("This instance is not handling the routing towards the "
@@ -1462,6 +1461,16 @@
                                 + "for available device {}",
                                  event.type(), ((Device) event.subject()).id());
                         processDeviceAdded((Device) event.subject());
+                        /*
+                         * This is a mere heuristic as there is not yet stable mastership in ONOS, and it is based on
+                         * the fact that DEVICE is marked online only if there is a master around. processDeviceAdded
+                         * can be called on config change and link events and there is no check of the availability.
+                         * In this scenarios, we could not have a master and pr is broken as nobody can admin enable
+                         * the ports. We keep in processDeviceAdded the code that is already idempotent
+                         */
+                        if (event.type() == DeviceEvent.Type.DEVICE_AVAILABILITY_CHANGED) {
+                            phasedRecoveryService.init(deviceId);
+                        }
                     } else {
                         if (event.type() == DeviceEvent.Type.DEVICE_ADDED) {
                             // Note: For p4 devices, the device will be added but unavailable at the beginning.
@@ -1584,13 +1593,13 @@
                 } else if (event.type() == MastershipEvent.Type.MASTER_CHANGED) {
                     MastershipEvent me = (MastershipEvent) event;
                     DeviceId deviceId = me.subject();
-                    Optional<DeviceId> pairDeviceId = getPairDeviceId(deviceId);
-                    log.info(" ** MASTERSHIP CHANGED Invalidating shouldProgram cache"
-                            + " for {}/pair={} due to change", deviceId, pairDeviceId);
-                    defaultRoutingHandler.invalidateShouldProgramCache(deviceId);
-                    pairDeviceId.ifPresent(defaultRoutingHandler::invalidateShouldProgramCache);
+                    log.info(" ** Mastership changed check full reroute for {} due to change", deviceId);
                     defaultRoutingHandler.checkFullRerouteForMasterChange(deviceId, me);
 
+                } else if (event.type() == ClusterEvent.Type.INSTANCE_DEACTIVATED ||
+                        event.type() == ClusterEvent.Type.INSTANCE_REMOVED) {
+                    log.info(" ** Cluster event invalidating shouldProgram");
+                    defaultRoutingHandler.invalidateShouldProgram();
                 } else {
                     log.warn("Unhandled event type: {}", event.type());
                 }
@@ -1618,7 +1627,7 @@
     }
 
     private void processDeviceAddedInternal(DeviceId deviceId) {
-        // Irrespective of whether the local is a MASTER or not for this device,
+        // Irrespective of whether the local is leading the programming or not for this device,
         // we need to create a SR-group-handler instance. This is because in a
         // multi-instance setup, any instance can initiate forwarding/next-objectives
         // for any switch (even if this instance is a SLAVE or not even connected
@@ -1644,13 +1653,11 @@
             groupHandlerMap.put(deviceId, groupHandler);
         }
 
-        if (mastershipService.isLocalMaster(deviceId)) {
+        if (shouldProgram(deviceId)) {
             defaultRoutingHandler.populatePortAddressingRules(deviceId);
-            defaultRoutingHandler.purgeSeenBeforeRoutes(deviceId);
             DefaultGroupHandler groupHandler = groupHandlerMap.get(deviceId);
             groupHandler.createGroupsFromVlanConfig();
             routingRulePopulator.populateSubnetBroadcastRule(deviceId);
-            phasedRecoveryService.init(deviceId);
         }
 
         appCfgHandler.init(deviceId);
@@ -1684,6 +1691,8 @@
         defaultRoutingHandler
             .populateRoutingRulesForLinkStatusChange(null, null, device.id(), true);
         defaultRoutingHandler.purgeEcmpGraph(device.id());
+        // Removes routes having as target the device down
+        defaultRoutingHandler.purgeSeenBeforeRoutes(device.id());
 
         // Cleanup all internal groupHandler stores for this device. Should be
         // done after all rerouting or rehashing has been completed
@@ -1723,8 +1732,8 @@
             lastEdgePortEvent = Instant.now();
         }
 
-        if (!mastershipService.isLocalMaster(device.id()))  {
-            log.debug("Not master for dev:{} .. not handling port updated event "
+        if (shouldProgram(device.id()))  {
+            log.debug("Should not program dev:{} .. not handling port updated event "
                     + "for port {}", device.id(), port.number());
             return;
         }
@@ -1734,7 +1743,7 @@
     /**
      * Adds or remove filtering rules for the given switchport. If switchport is
      * an edge facing port, additionally handles host probing and broadcast
-     * rules. Must be called by local master of device.
+     * rules. Must be called by the instance leading the programming of the device.
      *
      * @param deviceId the device identifier
      * @param port the port to update
@@ -2116,6 +2125,7 @@
             case INSTANCE_REMOVED:
                 log.info("** Cluster event {}", event.type());
                 lastClusterEvent = Instant.now();
+                mainEventExecutor.execute(new InternalEventHandler(event));
                 break;
             default:
                 break;
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingService.java b/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingService.java
index 061e246..71df2ef 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingService.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/SegmentRoutingService.java
@@ -329,21 +329,36 @@
      * Returns shouldProgram map.
      *
      * @return shouldProgram map
+     * @deprecated in trellis-control-3.0.1
      */
+    @Deprecated
     Map<Set<DeviceId>, NodeId> getShouldProgram();
 
     /**
      * Returns shouldProgram local cache.
      *
      * @return shouldProgram local cache
+     * @deprecated in trellis-control-3.0.1
      */
+    @Deprecated
     Map<DeviceId, Boolean> getShouldProgramCache();
 
     /**
-     * Returns whether instance should program device or not.
+     * Returns shouldProgram leaders.
      *
-     * @param deviceId .
-     * @return boolean status saying instance should program device or not.
+     * @return shouldProgram mapping device to node
+     */
+    Map<DeviceId, NodeId> getShouldProgramLeaders();
+
+    /**
+     * Determines whether this controller instance should program the
+     * given {@code deviceId}, based on work partition service and pairDeviceId if one exists.
+     * <p>
+     * Once an instance is elected, it will be the only instance responsible for programming
+     * both devices in the pair until it goes down.
+     *
+     * @param deviceId device identifier to consider for routing
+     * @return true if current instance should handle the routing for given device
      */
     boolean shouldProgram(DeviceId deviceId);
 
@@ -468,4 +483,5 @@
      * @return the default internal vlan id
      */
     VlanId getDefaultInternalVlan();
+
 }
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/cli/ShouldProgramCommand.java b/impl/src/main/java/org/onosproject/segmentrouting/cli/ShouldProgramCommand.java
index 250fb3b..01e62a0 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/cli/ShouldProgramCommand.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/cli/ShouldProgramCommand.java
@@ -24,7 +24,6 @@
 import org.onosproject.segmentrouting.SegmentRoutingService;
 
 import java.util.Map;
-import java.util.Set;
 
 /**
  * Display current shouldProgram map.
@@ -36,13 +35,9 @@
     @Override
     protected void doExecute() {
         SegmentRoutingService srService = AbstractShellCommand.get(SegmentRoutingService.class);
-        Map<Set<DeviceId>, NodeId> shouldProgram = srService.getShouldProgram();
-        Map<DeviceId, Boolean> shouldProgramCache = srService.getShouldProgramCache();
+        Map<DeviceId, NodeId> shouldProgram = srService.getShouldProgramLeaders();
 
         print("shouldProgram");
         shouldProgram.forEach((k, v) -> print("%s -> %s", k, v));
-
-        print("shouldProgramCache");
-        shouldProgramCache.forEach((k, v) -> print("%s -> %s", k, v));
     }
 }
\ No newline at end of file
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/grouphandler/DefaultGroupHandler.java b/impl/src/main/java/org/onosproject/segmentrouting/grouphandler/DefaultGroupHandler.java
index 7920e67..c33c457 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/grouphandler/DefaultGroupHandler.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/grouphandler/DefaultGroupHandler.java
@@ -250,9 +250,9 @@
     /**
      * Checks all groups in the src-device of link for neighbor sets that include
      * the dst-device of link, and edits the hash groups according to link up
-     * or down. Should only be called by the master instance of the src-switch
-     * of link. Typically used when there are no route-path changes due to the
-     * link up or down, as the ECMPspg does not change.
+     * or down. Should only be called by the instance leading the programming of
+     * the src-switch of link. Typically used when there are no route-path
+     * changes due to the link up or down, as the ECMPspg does not change.
      *
      * @param link the infrastructure link that has gone down or come up
      * @param linkDown true if link has gone down
@@ -461,9 +461,9 @@
     /**
      * Checks all the hash-groups in the target-switch meant for the destination
      * switch, and either adds or removes buckets to make the neighbor-set
-     * match the given next-hops. Typically called by the master instance of the
-     * destination switch, which may be different from the master instance of the
-     * target switch where hash-group changes are made.
+     * match the given next-hops. Typically called by the instance leading the programming
+     * of the destination switch, which may be different from the instance leading the
+     * programming of the target switch where hash-group changes are made.
      *
      * @param targetSw the switch in which the hash groups will be edited
      * @param nextHops the current next hops for the target switch to reach
@@ -685,7 +685,8 @@
 
     /**
      * Adds or removes a port that has been configured with a vlan to a broadcast group
-     * for bridging. Should only be called by the master instance for this device.
+     * for bridging. Should only be called by the instance leading the programming
+     * for this device.
      *
      * @param port the port on this device that needs to be added/removed to a bcast group
      * @param vlanId the vlan id corresponding to the broadcast domain/group
@@ -1718,22 +1719,13 @@
             this.nextId = null;
         }
 
-        BucketCorrector(Integer nextId) {
-            this.nextId = nextId;
-        }
-
         @Override
         public void run() {
-            if (!srManager.mastershipService.isLocalMaster(deviceId)) {
-                return;
-            }
             DefaultRoutingHandler rh = srManager.getRoutingHandler();
-            if (rh == null) {
+            if (rh == null || !rh.isRoutingStable() || !rh.shouldProgram(deviceId)) {
                 return;
             }
-            if (!rh.isRoutingStable()) {
-                return;
-            }
+
             rh.acquireRoutingLock();
             try {
                 log.trace("running bucket corrector for dev: {}", deviceId);
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/api/PhasedRecoveryService.java b/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/api/PhasedRecoveryService.java
index 705ecb7..6a30ef7 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/api/PhasedRecoveryService.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/api/PhasedRecoveryService.java
@@ -56,7 +56,8 @@
     boolean isEnabled();
 
     /**
-     * Initializes a device. Only the master of the device is allowed to do this.
+     * Initializes a device. Only the instance leading the programming
+     * of the device is allowed to do this.
      *
      * @param deviceId device ID
      * @return true if the device is initialized successfully and the caller should proceed,
@@ -65,7 +66,8 @@
     boolean init(DeviceId deviceId);
 
     /**
-     * Resets a device. Only the master of the device is allowed to do this.
+     * Resets a device. Only the instance leading the programming
+     * of the device is allowed to do this.
      *
      * @param deviceId device ID
      * @return true if the device is reset successfully.
@@ -89,7 +91,8 @@
     Phase getPhase(DeviceId deviceId);
 
     /**
-     * Sets given device with given recovery phase. Only the master of the device is allowed to do this.
+     * Sets given device with given recovery phase. Only the instance leading the programming
+     * of the device is allowed to do this.
      *
      * @param deviceId device ID
      * @param newPhase recovery phase
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/impl/PhasedRecoveryManager.java b/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/impl/PhasedRecoveryManager.java
index 8c62a2f..9544b57 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/impl/PhasedRecoveryManager.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/phasedrecovery/impl/PhasedRecoveryManager.java
@@ -22,7 +22,6 @@
 import org.onosproject.cfg.ComponentConfigService;
 import org.onosproject.core.ApplicationId;
 import org.onosproject.core.CoreService;
-import org.onosproject.mastership.MastershipService;
 import org.onosproject.net.DeviceId;
 import org.onosproject.net.Port;
 import org.onosproject.net.PortNumber;
@@ -90,9 +89,6 @@
     private DeviceAdminService deviceAdminService;
 
     @Reference(cardinality = ReferenceCardinality.MANDATORY)
-    private MastershipService mastershipService;
-
-    @Reference(cardinality = ReferenceCardinality.MANDATORY)
     private StorageService storageService;
 
     @Reference(cardinality = ReferenceCardinality.OPTIONAL)
@@ -157,8 +153,9 @@
             log.info("SegmentRoutingService is not ready");
             return false;
         }
-        if (!mastershipService.isLocalMaster(deviceId)) {
-            log.info("Not master of {}", deviceId);
+
+        if (!srService.shouldProgram(deviceId)) {
+            log.info("Skip init not leading the phase recovery of {}", deviceId);
             return false;
         }
 
@@ -195,8 +192,11 @@
             log.info("SegmentRoutingService is not ready");
             return false;
         }
-        // FIXME Skip mastership checking since master will not be available when a device goes offline
-        //       Improve this when persistent mastership is introduced
+
+        if (!srService.shouldProgram(deviceId)) {
+            log.info("Skip reset not leading the phase recovery of {}", deviceId);
+            return false;
+        }
 
         Phase result = Optional.ofNullable(phasedRecoveryStore.remove(deviceId))
                 .map(Versioned::value).orElse(null);
@@ -222,8 +222,9 @@
             log.info("SegmentRoutingService is not ready");
             return null;
         }
-        if (!mastershipService.isLocalMaster(deviceId)) {
-            log.info("Not master of {}", deviceId);
+
+        if (!srService.shouldProgram(deviceId)) {
+            log.info("Skip setPhase not leading the phase recovery of {}", deviceId);
             return null;
         }
 
@@ -407,4 +408,6 @@
             }
         }
     }
+
+    // FIXME We should handle cluster events to resume the phase recovery
 }
diff --git a/impl/src/main/java/org/onosproject/segmentrouting/xconnect/impl/XconnectManager.java b/impl/src/main/java/org/onosproject/segmentrouting/xconnect/impl/XconnectManager.java
index fecba5a..a20192d 100644
--- a/impl/src/main/java/org/onosproject/segmentrouting/xconnect/impl/XconnectManager.java
+++ b/impl/src/main/java/org/onosproject/segmentrouting/xconnect/impl/XconnectManager.java
@@ -27,9 +27,6 @@
 import org.onlab.packet.MacAddress;
 import org.onlab.packet.VlanId;
 import org.onlab.util.KryoNamespace;
-import org.onosproject.cluster.ClusterService;
-import org.onosproject.cluster.LeadershipService;
-import org.onosproject.cluster.NodeId;
 import org.onosproject.codec.CodecService;
 import org.onosproject.core.ApplicationId;
 import org.onosproject.core.CoreService;
@@ -37,7 +34,6 @@
 import org.onosproject.portloadbalancer.api.PortLoadBalancerId;
 import org.onosproject.portloadbalancer.api.PortLoadBalancerListener;
 import org.onosproject.portloadbalancer.api.PortLoadBalancerService;
-import org.onosproject.mastership.MastershipService;
 import org.onosproject.net.ConnectPoint;
 import org.onosproject.net.DeviceId;
 import org.onosproject.net.Host;
@@ -135,15 +131,6 @@
     @Reference(cardinality = ReferenceCardinality.MANDATORY)
     public FlowObjectiveService flowObjectiveService;
 
-    @Reference(cardinality = ReferenceCardinality.MANDATORY)
-    private LeadershipService leadershipService;
-
-    @Reference(cardinality = ReferenceCardinality.MANDATORY)
-    private ClusterService clusterService;
-
-    @Reference(cardinality = ReferenceCardinality.MANDATORY)
-    public MastershipService mastershipService;
-
     @Reference(cardinality = ReferenceCardinality.OPTIONAL)
     public SegmentRoutingService srService;
 
@@ -398,8 +385,8 @@
             deviceEventExecutor.execute(() -> {
                 DeviceId deviceId = event.subject().id();
                 // Just skip if we are not the leader
-                if (!isLocalLeader(deviceId)) {
-                    log.debug("Not the leader of {}. Skip event {}", deviceId, event);
+                if (!srService.shouldProgram(deviceId)) {
+                    log.debug("Not leading the programming of {}. Skip event {}", deviceId, event);
                     return;
                 }
                 // Populate or revoke according to the device availability
@@ -536,7 +523,7 @@
      * @param endpoints a set of endpoints to be cross-connected
      */
     private void populateXConnect(XconnectKey key, Set<XconnectEndpoint> endpoints) {
-        if (!isLocalLeader(key.deviceId())) {
+        if (!srService.shouldProgram(key.deviceId())) {
             log.debug("Abort populating XConnect {}: {}", key, ERROR_NOT_LEADER);
             return;
         }
@@ -649,7 +636,7 @@
      * @param endpoints XConnect endpoints
      */
     private void revokeXConnect(XconnectKey key, Set<XconnectEndpoint> endpoints) {
-        if (!isLocalLeader(key.deviceId())) {
+        if (!srService.shouldProgram(key.deviceId())) {
             log.debug("Abort revoking XConnect {}: {}", key, ERROR_NOT_LEADER);
             return;
         }
@@ -792,7 +779,7 @@
      */
     private void updateXConnect(XconnectKey key, Set<XconnectEndpoint> prevEndpoints,
                                 Set<XconnectEndpoint> endpoints) {
-        if (!isLocalLeader(key.deviceId())) {
+        if (!srService.shouldProgram(key.deviceId())) {
             log.debug("Abort updating XConnect {}: {}", key, ERROR_NOT_LEADER);
             return;
         }
@@ -965,7 +952,7 @@
     private void updateL2Flooding(DeviceId deviceId, PortNumber port, VlanId vlanId, boolean install) {
         XconnectKey key = new XconnectKey(deviceId, vlanId);
         // Ensure leadership on device
-        if (!isLocalLeader(deviceId)) {
+        if (!srService.shouldProgram(deviceId)) {
             log.debug("Abort updating L2Flood {}: {}", key, ERROR_NOT_LEADER);
             return;
         }
@@ -1260,22 +1247,6 @@
         return ports.stream().anyMatch(p -> !p.equals(pairPort));
     }
 
-    // Custom-built function, when the device is not available we need a fallback mechanism
-    private boolean isLocalLeader(DeviceId deviceId) {
-        if (!mastershipService.isLocalMaster(deviceId)) {
-            // When the device is available we just check the mastership
-            if (deviceService.isAvailable(deviceId)) {
-                return false;
-            }
-            // Fallback with Leadership service - device id is used as topic
-            NodeId leader = leadershipService.runForLeadership(
-                    deviceId.toString()).leaderNodeId();
-            // Verify if this node is the leader
-            return clusterService.getLocalNode().id().equals(leader);
-        }
-        return true;
-    }
-
     private Set<PortNumber> getPhysicalPorts(DeviceId deviceId, XconnectEndpoint endpoint) {
         if (endpoint.type() == XconnectEndpoint.Type.PORT) {
             PortNumber port = ((XconnectPortEndpoint) endpoint).port();