CORD-1583 More bug fixes in dual-ToR scenarios

 - reentrant lock was not being used correctly
 - fixHashGroup in group handler was not updating global store correctly
 - linkUp was not being noted in seenLinks if configuration came after switches connected
 - serialization error in global objective store due to missing kryo for Sets
 - damaged routepath computation was not taking pair-devs into account
 - switch failures were leading to improper ecmpSpg graph updates, and missed hash-group changes
 - implemented more next-objective verification as group sub-system can go out-of-sync with objective-store

Change-Id: If3cfdd715e9b69820894b49def31f75ceb748863
diff --git a/apps/segmentrouting/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java b/apps/segmentrouting/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
index e9517f1..50b0cd2 100644
--- a/apps/segmentrouting/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
+++ b/apps/segmentrouting/src/main/java/org/onosproject/segmentrouting/DefaultRoutingHandler.java
@@ -64,6 +64,7 @@
     private static final int RETRY_INTERVAL_MS = 250;
     private static final int RETRY_INTERVAL_SCALE = 1;
     private static final long STABLITY_THRESHOLD = 10; //secs
+    private static final int UPDATE_INTERVAL = 5; //secs
     private static Logger log = LoggerFactory.getLogger(DefaultRoutingHandler.class);
 
     private SegmentRoutingManager srManager;
@@ -148,7 +149,7 @@
    public boolean isRoutingStable() {
        long last = (long) (lastRoutingChange.getMillis() / 1000.0);
        long now = (long) (DateTime.now().getMillis() / 1000.0);
-       log.debug("Routing stable since {}s", now - last);
+       log.trace("Routing stable since {}s", now - last);
        return (now - last) > STABLITY_THRESHOLD;
    }
 
@@ -364,6 +365,8 @@
             return;
         }
         lastRoutingChange = DateTime.now();
+        executorService.schedule(new UpdateMaps(), UPDATE_INTERVAL,
+                                 TimeUnit.SECONDS);
         statusLock.lock();
         try {
 
@@ -402,34 +405,23 @@
                 // comparing all routes of existing ECMP SPG to new ECMP SPG
                 routeChanges = computeRouteChange();
 
-                if (routeChanges != null) {
-                    // deal with linkUp of a seen-before link
-                    if (linkUp != null && srManager.isSeenLink(linkUp)) {
-                        if (!srManager.isBidirectional(linkUp)) {
-                            log.warn("Not a bidirectional link yet .. not "
-                                    + "processing link {}", linkUp);
-                            srManager.updateSeenLink(linkUp, true);
-                            populationStatus = Status.ABORTED;
-                            return;
-                        }
-                        // link previously seen before
-                        // do hash-bucket changes instead of a re-route
-                        processHashGroupChange(routeChanges, false, null);
-                        // clear out routesChanges so a re-route is not attempted
-                        routeChanges = ImmutableSet.of();
+                // deal with linkUp of a seen-before link
+                if (linkUp != null && srManager.isSeenLink(linkUp)) {
+                    if (!srManager.isBidirectional(linkUp)) {
+                        log.warn("Not a bidirectional link yet .. not "
+                                + "processing link {}", linkUp);
+                        srManager.updateSeenLink(linkUp, true);
+                        populationStatus = Status.ABORTED;
+                        return;
                     }
-
-                    //deal with switchDown
-                    if (switchDown != null) {
-                        processHashGroupChange(routeChanges, true, switchDown);
-                        // clear out routesChanges so a re-route is not attempted
-                        routeChanges = ImmutableSet.of();
-                    }
-
-                    // for a linkUp of a never-seen-before link
-                    // let it fall through to a reroute of the routeChanges
-
+                    // link previously seen before
+                    // do hash-bucket changes instead of a re-route
+                    processHashGroupChange(routeChanges, false, null);
+                    // clear out routesChanges so a re-route is not attempted
+                    routeChanges = ImmutableSet.of();
                 }
+                // for a linkUp of a never-seen-before link
+                // let it fall through to a reroute of the routeChanges
 
                 // now that we are past the check for a previously seen link
                 // it is safe to update the store for the linkUp
@@ -437,6 +429,12 @@
                     srManager.updateSeenLink(linkUp, true);
                 }
 
+                //deal with switchDown
+                if (switchDown != null) {
+                    processHashGroupChange(routeChanges, true, switchDown);
+                    // clear out routesChanges so a re-route is not attempted
+                    routeChanges = ImmutableSet.of();
+                }
             } else {
                 // link has gone down
                 // Compare existing ECMP SPG only with the link that went down
@@ -452,7 +450,6 @@
             if (routeChanges == null) {
                 log.info("Optimized routing failed... opting for full reroute");
                 populationStatus = Status.ABORTED;
-                statusLock.unlock();
                 populateAllRoutingRules();
                 return;
             }
@@ -482,6 +479,7 @@
         }
     }
 
+
     /**
      * Processes a set a route-path changes by reprogramming routing rules and
      * creating new hash-groups or editing them if necessary. This method also
@@ -533,8 +531,6 @@
             return false; //abort routing and fail fast
         }
 
-        //XXX should we do hashgroupchanges here?
-
         // update ecmpSPG for all edge-pairs
         for (EdgePair ep : edgePairs) {
             currentEcmpSpgMap.put(ep.dev1, updatedEcmpSpgMap.get(ep.dev1));
@@ -847,41 +843,51 @@
     private void processHashGroupChange(Set<ArrayList<DeviceId>> routeChanges,
                                         boolean linkOrSwitchFailed,
                                         DeviceId failedSwitch) {
+        Set<ArrayList<DeviceId>> changedRoutes = new HashSet<>();
+        // first, ensure each routeChanges entry has two elements
         for (ArrayList<DeviceId> route : routeChanges) {
-            DeviceId targetSw = route.get(0);
-            boolean success;
-            DeviceId dstSw = null;
-            if (route.size() > 1) {
-                dstSw = route.get(1);
+            if (route.size() == 1) {
+                // route-path changes are from everyone else to this switch
+                DeviceId dstSw = route.get(0);
+                srManager.deviceService.getAvailableDevices().forEach(sw -> {
+                    if (!sw.id().equals(dstSw)) {
+                        changedRoutes.add(Lists.newArrayList(sw.id(), dstSw));
+                    }
+                });
+            } else {
+                changedRoutes.add(route);
             }
+        }
 
+        for (ArrayList<DeviceId> route : changedRoutes) {
+            DeviceId targetSw = route.get(0);
+            DeviceId dstSw = route.get(1);
             if (linkOrSwitchFailed) {
-                fixHashGroupsForRoute(route, true);
+                boolean success = fixHashGroupsForRoute(route, true);
                 // it's possible that we cannot fix hash groups for a route
                 // if the target switch has failed. Nevertheless the ecmp graph
                 // for the impacted switch must still be updated.
-                if (failedSwitch != null && targetSw.equals(failedSwitch)
-                        && dstSw != null) {
+                if (!success && failedSwitch != null && targetSw.equals(failedSwitch)) {
                     currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
                     currentEcmpSpgMap.remove(targetSw);
-                    log.debug("Updating ECMPspg for dst:{} removing failed "
+                    log.debug("Updating ECMPspg for dst:{} removing failed switch "
                             + "target:{}", dstSw, targetSw);
-                    return;
+                    continue;
                 }
                 //linkfailed - update both sides
-                currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
-                if (dstSw != null) {
-                    currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
-                }
-                log.debug("Updating ECMPspg for dst:{} and target:{}", dstSw, targetSw);
-            } else {
-                success = fixHashGroupsForRoute(route, false);
                 if (success) {
                     currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
-                    if (dstSw != null) {
-                        currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
-                    }
-                    log.debug("Updating ECMPspg for target:{} and dst:{}",
+                    currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
+                    log.debug("Updating ECMPspg for dst:{} and target:{} for linkdown",
+                              dstSw, targetSw);
+                }
+            } else {
+                //linkup of seen before link
+                boolean success = fixHashGroupsForRoute(route, false);
+                if (success) {
+                    currentEcmpSpgMap.put(targetSw, updatedEcmpSpgMap.get(targetSw));
+                    currentEcmpSpgMap.put(dstSw, updatedEcmpSpgMap.get(dstSw));
+                    log.debug("Updating ECMPspg for target:{} and dst:{} for linkup",
                               targetSw, dstSw);
                 }
             }
@@ -908,48 +914,10 @@
             return false;
         }
         DeviceId destSw = route.get(1);
-        log.debug("Processing fixHashGroupsForRoute: Target {} -> Dest {}",
+        log.debug("* processing fixHashGroupsForRoute: Target {} -> Dest {}",
                   targetSw, destSw);
-        boolean targetIsEdge = false;
-        try {
-            targetIsEdge = srManager.deviceConfiguration.isEdgeDevice(targetSw);
-        } catch (DeviceConfigNotFoundException e) {
-            log.warn(e.getMessage() + "Cannot determine if targetIsEdge {}.. "
-                    + "continuing fixHash", targetSw);
-        }
-
         // figure out the new next hops at the targetSw towards the destSw
-        Set<DeviceId> nextHops = new HashSet<>();
-        EcmpShortestPathGraph ecmpSpg = updatedEcmpSpgMap.get(destSw);
-        HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
-                ecmpSpg.getAllLearnedSwitchesAndVia();
-        for (Integer itrIdx : switchVia.keySet()) {
-            HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
-                    switchVia.get(itrIdx);
-            for (DeviceId target : swViaMap.keySet()) {
-                if (target.equals(targetSw)) {
-                    // found the iteration where targetSw is reached- get nextHops
-                    if (!targetIsEdge && itrIdx > 1) {
-                        // optimization for spines to not use other leaves to get
-                        // to a leaf to avoid loops
-                        log.debug("Avoiding {} hop path for non-edge targetSw:{}"
-                                + " --> dstSw:{}", itrIdx, targetSw, destSw);
-                        break;
-                    }
-                    for (ArrayList<DeviceId> via : swViaMap.get(target)) {
-                        if (via.isEmpty()) {
-                            // the dstSw is the next-hop from the targetSw
-                            nextHops.add(destSw);
-                        } else {
-                            // first elem is next-hop in each ECMP path
-                            nextHops.add(via.get(0));
-                        }
-                    }
-                    break;
-                }
-            }
-        }
-
+        Set<DeviceId> nextHops = getNextHops(targetSw, destSw);
         // call group handler to change hash group at targetSw
         DefaultGroupHandler grpHandler = srManager.getGroupHandler(targetSw);
         if (grpHandler == null) {
@@ -1010,7 +978,7 @@
      * @param deviceId the device for which graphs need to be purged
      */
     protected void purgeEcmpGraph(DeviceId deviceId) {
-        currentEcmpSpgMap.remove(deviceId);
+        currentEcmpSpgMap.remove(deviceId); // XXX reconsider
         if (updatedEcmpSpgMap != null) {
             updatedEcmpSpgMap.remove(deviceId);
         }
@@ -1030,64 +998,64 @@
      *         affected, or null if no previous ecmp spg was found for comparison
      */
     private Set<ArrayList<DeviceId>> computeDamagedRoutes(Link linkFail) {
-
         Set<ArrayList<DeviceId>> routes = new HashSet<>();
 
         for (Device sw : srManager.deviceService.getDevices()) {
             log.debug("Computing the impacted routes for device {} due to link fail",
                       sw.id());
-            if (!srManager.mastershipService.isLocalMaster(sw.id())) {
-                log.debug("No mastership for {} .. skipping route optimization",
-                          sw.id());
+            DeviceId retId = shouldHandleRouting(sw.id());
+            if (retId == null) {
                 continue;
             }
-            EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(sw.id());
-            if (ecmpSpg == null) {
-                log.warn("No existing ECMP graph for switch {}. Aborting optimized"
-                        + " rerouting and opting for full-reroute", sw.id());
-                return null;
-            }
-            if (log.isDebugEnabled()) {
-                log.debug("Root switch: {}", sw.id());
-                log.debug("  Current/Existing SPG: {}", ecmpSpg);
-                log.debug("       New/Updated SPG: {}", updatedEcmpSpgMap.get(sw.id()));
-            }
-            HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>> switchVia =
-                    ecmpSpg.getAllLearnedSwitchesAndVia();
-            for (Integer itrIdx : switchVia.keySet()) {
-                log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
-                HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
-                        switchVia.get(itrIdx);
-                for (DeviceId targetSw : swViaMap.keySet()) {
-                    DeviceId rootSw = sw.id();
-                    if (log.isTraceEnabled()) {
-                        log.trace("TargetSwitch {} --> RootSwitch {}", targetSw, rootSw);
+            Set<DeviceId> devicesToProcess = Sets.newHashSet(retId, sw.id());
+            for (DeviceId rootSw : devicesToProcess) {
+                EcmpShortestPathGraph ecmpSpg = currentEcmpSpgMap.get(rootSw);
+                if (ecmpSpg == null) {
+                    log.warn("No existing ECMP graph for switch {}. Aborting optimized"
+                            + " rerouting and opting for full-reroute", rootSw);
+                    return null;
+                }
+                if (log.isDebugEnabled()) {
+                    log.debug("Root switch: {}", rootSw);
+                    log.debug("  Current/Existing SPG: {}", ecmpSpg);
+                    log.debug("       New/Updated SPG: {}", updatedEcmpSpgMap.get(rootSw));
+                }
+                HashMap<Integer, HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>>>
+                    switchVia = ecmpSpg.getAllLearnedSwitchesAndVia();
+                // figure out if the broken link affected any route-paths in this graph
+                for (Integer itrIdx : switchVia.keySet()) {
+                    log.trace("Current/Exiting SPG Iterindex# {}", itrIdx);
+                    HashMap<DeviceId, ArrayList<ArrayList<DeviceId>>> swViaMap =
+                            switchVia.get(itrIdx);
+                    for (DeviceId targetSw : swViaMap.keySet()) {
+                        log.trace("TargetSwitch {} --> RootSwitch {}",
+                                  targetSw, rootSw);
                         for (ArrayList<DeviceId> via : swViaMap.get(targetSw)) {
                             log.trace(" Via:");
                             via.forEach(e -> log.trace("  {}", e));
                         }
-                    }
-                    Set<ArrayList<DeviceId>> subLinks =
-                            computeLinks(targetSw, rootSw, swViaMap);
-                    for (ArrayList<DeviceId> alink: subLinks) {
-                        if ((alink.get(0).equals(linkFail.src().deviceId()) &&
-                                alink.get(1).equals(linkFail.dst().deviceId()))
-                                ||
-                             (alink.get(0).equals(linkFail.dst().deviceId()) &&
-                                     alink.get(1).equals(linkFail.src().deviceId()))) {
-                            log.debug("Impacted route:{}->{}", targetSw, rootSw);
-                            ArrayList<DeviceId> aRoute = new ArrayList<>();
-                            aRoute.add(targetSw); // switch with rules to populate
-                            aRoute.add(rootSw); // towards this destination
-                            routes.add(aRoute);
-                            break;
+                        Set<ArrayList<DeviceId>> subLinks =
+                                computeLinks(targetSw, rootSw, swViaMap);
+                        for (ArrayList<DeviceId> alink: subLinks) {
+                            if ((alink.get(0).equals(linkFail.src().deviceId()) &&
+                                    alink.get(1).equals(linkFail.dst().deviceId()))
+                                    ||
+                                    (alink.get(0).equals(linkFail.dst().deviceId()) &&
+                                         alink.get(1).equals(linkFail.src().deviceId()))) {
+                                log.debug("Impacted route:{}->{}", targetSw, rootSw);
+                                ArrayList<DeviceId> aRoute = new ArrayList<>();
+                                aRoute.add(targetSw); // switch with rules to populate
+                                aRoute.add(rootSw); // towards this destination
+                                routes.add(aRoute);
+                                break;
+                            }
                         }
                     }
                 }
+
             }
 
         }
-
         return routes;
     }
 
@@ -1407,6 +1375,35 @@
         }
     }
 
+    /**
+     * Updates the currentEcmpSpgGraph for all devices.
+     */
+    private void updateEcmpSpgMaps() {
+        for (Device sw : srManager.deviceService.getDevices()) {
+            EcmpShortestPathGraph ecmpSpgUpdated =
+                    new EcmpShortestPathGraph(sw.id(), srManager);
+            currentEcmpSpgMap.put(sw.id(), ecmpSpgUpdated);
+        }
+    }
+
+    /**
+     * Ensures routing is stable before updating all ECMP SPG graphs.
+     *
+     * TODO: CORD-1843 will ensure maps are updated faster, potentially while
+     * topology/routing is still unstable
+     */
+    private final class UpdateMaps implements Runnable {
+        @Override
+        public void run() {
+            if (isRoutingStable()) {
+                updateEcmpSpgMaps();
+            } else {
+                executorService.schedule(new UpdateMaps(), UPDATE_INTERVAL,
+                                         TimeUnit.SECONDS);
+            }
+        }
+    }
+
     //////////////////////////////////////
     //  Filtering rule creation
     //////////////////////////////////////