[SDFAB-1015] Introduce gracePeriod and defer mastership check

For some devices like p4rt devices which are first registered
and then updated does not make sense to check the reachability
asap: connection is established concurently to the mastership
checks and the channel might not be ready.

Change-Id: I5a31e09cf382df388a473af338eb72fb9a187fa8
diff --git a/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java b/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
index a85ee8f..e78e6d2 100644
--- a/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
+++ b/core/net/src/main/java/org/onosproject/net/device/impl/DeviceManager.java
@@ -398,9 +398,22 @@
         return ls.dateTime.toEpochMilli();
     }
 
+    // Returns true if the grace period is still on
+    private boolean inGracePeriod(DeviceId deviceId) {
+        LocalStatus ls = deviceLocalStatus.get(deviceId);
+        if (ls == null) {
+            // This should not be possible, unless the device is removed
+            log.warn("Not found a recent local status for {}", deviceId);
+            return true;
+        }
+        DeviceProvider provider = getProvider(deviceId);
+        return ls.connected &&
+                (Instant.now().toEpochMilli() - ls.dateTime.toEpochMilli()) < provider.gracePeriod();
+    }
+
     // Check a device for control channel connectivity
     // and changes local-status appropriately.
-    private boolean isReachable(DeviceId deviceId) {
+    private boolean isReachable(DeviceId deviceId, boolean inGracePeriod) {
         if (deviceId == null) {
             return false;
         }
@@ -409,7 +422,8 @@
             boolean reachable = probeReachability(deviceId);
             if (reachable && !isLocallyConnected(deviceId)) {
                 deviceLocalStatus.put(deviceId, new LocalStatus(true, Instant.now()));
-            } else if (!reachable && isLocallyConnected(deviceId)) {
+            } else if (!reachable && isLocallyConnected(deviceId) && !inGracePeriod) {
+                // Do not update if the grace period is still on
                 deviceLocalStatus.put(deviceId, new LocalStatus(false, Instant.now()));
             }
             return reachable;
@@ -513,8 +527,12 @@
             final DeviceId deviceId = device.id();
             MastershipRole myRole = mastershipService.getLocalRole(deviceId);
             log.trace("Checking device {}. Current role is {}", deviceId, myRole);
-            log.info("Device local status is {}", localStatus(deviceId));
-            if (!isReachable(deviceId)) {
+
+            log.debug("Device {} local status is {}", deviceId, localStatus(deviceId));
+            final boolean isGracePeriodOn = inGracePeriod(deviceId);
+            final boolean isReachable = isReachable(deviceId, isGracePeriodOn);
+            // Passed the grace period and it is still not reachable
+            if (!isGracePeriodOn && !isReachable) {
                 if (myRole != NONE) {
                     // Verify if the device is fully disconnected from the cluster
                     if (updateMastershipFor(deviceId) == null
@@ -523,10 +541,10 @@
                         post(store.markOffline(deviceId));
                     }
                 } else {
-                    /* Firstly get a role and then check if the device is available in the store.
-                       if it is, if this node is the master and the device is fully disconnected
-                       from the cluster mark the device offline. In principle, this condition should
-                       never be hit unless in a device removed phase for NONE mastership roles. */
+                    // Firstly get a role and then check if the device is available in the store.
+                    // if it is, if this node is the master and the device is fully disconnected
+                    // from the cluster mark the device offline. In principle, this condition should
+                    // never be hit unless in a device removed phase for NONE mastership roles.
                     try {
                         mastershipService.requestRoleFor(deviceId).get();
                     } catch (InterruptedException e) {
@@ -545,19 +563,22 @@
                     }
                 }
                 roleToAcknowledge.remove(deviceId);
-                continue;
+            } else if (isReachable) {
+                // If this node is the master, ensure the device is marked online.
+                if (myRole == MASTER && canMarkOnline(device)) {
+                    log.info("Can mark online {}", deviceId);
+                    post(store.markOnline(deviceId));
+                }
+
+                log.info("{} is reachable - reasserting the role", deviceId);
+
+                // Device is still reachable. It is useful for some protocols
+                // to reassert the role. Note: NONE triggers request to MastershipService
+                reassertRole(deviceId, myRole);
+            } else {
+                // Do not proceed furthermore if the grace period is still on
+                log.debug("Skipping mastership check for {}", deviceId);
             }
-
-            // If this node is the master, ensure the device is marked online.
-            if (myRole == MASTER && canMarkOnline(device)) {
-                post(store.markOnline(deviceId));
-            }
-
-            log.info("{} is reachable - reasserting the role", deviceId);
-
-            /* Device is still reachable. It is useful for some protocols
-               to reassert the role. Note: NONE triggers request to MastershipService */
-            reassertRole(deviceId, myRole);
         }
     }
 
@@ -595,8 +616,8 @@
                 log.debug("Timeout is expired but current role is not MASTER ({}), nothing to do", myRole);
                 continue;
             }
-            /* Switch failed to acknowledge master role we asked for.
-               Yield mastership to other instance*/
+            // Switch failed to acknowledge master role we asked for.
+            // Yield mastership to other instance
             log.warn("Failed to assert role onto device {}. requested={}, no response",
                     deviceId, myRole);
             updateMastershipFor(deviceId);
@@ -705,10 +726,10 @@
             log.info("Device {} disconnected from this node: {}", deviceId,
                      clusterService.getLocalNode().id());
 
-            /* If none can reach the device, we will continue with the disconnection logic.
-               If there is one instance that reported device is still reachable, we hand over
-               the mastership to it if we are the current master, otherwise if we are a backup
-               we demote ourselves to the bottom of the backups list */
+            // If none can reach the device, we will continue with the disconnection logic.
+            // If there is one instance that reported device is still reachable, we hand over
+            // the mastership to it if we are the current master, otherwise if we are a backup
+            // we demote ourselves to the bottom of the backups list
             if (updateMastershipFor(deviceId) == null) {
                 log.info("Device {} is fully disconnected from the cluster", deviceId);
                 List<PortDescription> descs = store.getPortDescriptions(provider().id(), deviceId)
@@ -1058,6 +1079,7 @@
     }
 
     private void handleMastershipEvent(MastershipEvent event) {
+        log.debug("Handling mastership event");
         final DeviceId did = event.subject();
 
         // myNextRole suggested by MastershipService event
@@ -1079,9 +1101,11 @@
             myNextRole = NONE;
         }
 
-        final boolean isReachable = isReachable(did);
-        log.info("Device local status is {}", localStatus(did));
-        if (!isReachable) {
+        log.debug("Device {} local status is {}", did, localStatus(did));
+        final boolean isGracePeriodOn = inGracePeriod(did);
+        final boolean isReachable = isReachable(did, isGracePeriodOn);
+        // Passed the grace period and it is still not reachable
+        if (!isGracePeriodOn && !isReachable) {
             // device is not connected to this node, nevertheless we should get a role
             if (mastershipService.getLocalRole(did) == NONE) {
                 log.debug("Node was instructed to be {} role for {}, "
@@ -1106,20 +1130,23 @@
             // Let's put some order in the candidates list
             roleToAcknowledge.remove(did);
             updateMastershipFor(did);
-            return;
-        }
-
-        /* Device is connected to this node - always reassert the role.
-           Ideally, protocols like OpenFlow would not need to reassert the
-           role because the instances are only identified by the role. However,
-           other protocols like P4RT require to provide also an election id
-           which maybe different over time, by reasserting the role will guarantee
-           that updated election ids are communicated to the devices. It should not
-           cost us a lot as it is equivalent to a probe.*/
-        if (store.getDevice(did) != null) {
-            reassertRole(did, myNextRole);
+        } else if (isReachable) {
+            // Device is connected to this node - always reassert the role.
+            // Ideally, protocols like OpenFlow would not need to reassert the
+            // role because the instances are only identified by the role. However,
+            // other protocols like P4RT require to provide also an election id
+            // which maybe different over time, by reasserting the role will guarantee
+            // that updated election ids are communicated to the devices. It should not
+            // cost us a lot as it is equivalent to a probe.
+            if (store.getDevice(did) != null) {
+                log.info("{} is reachable - reasserting the role", did);
+                reassertRole(did, myNextRole);
+            } else {
+                log.debug("Device is not yet/no longer in the store: {}", did);
+            }
         } else {
-            log.debug("Device is not yet/no longer in the store: {}", did);
+            // Do not proceed furthermore if the grace period is still on
+            log.debug("Skipping mastership event {}", event);
         }
     }
 
@@ -1452,10 +1479,10 @@
             if (myRole == MASTER) {
                 log.info("Handing over the mastership of {} to next master {}", deviceId, nextMaster);
                 mastershipAdminService.setRole(nextMaster, deviceId, MASTER);
-                /* Do not demote here because setRole can return before the mastership has been passed.
-                   Current implementation promotes first the nextMaster as top of candidate list and then
-                   transfer the leadership. We can use the BACKUP events to do demote or leverage periodic
-                   checks.*/
+                // Do not demote here because setRole can return before the mastership has been passed.
+                // Current implementation promotes first the nextMaster as top of candidate list and then
+                // transfer the leadership. We can use the BACKUP events to do demote or leverage periodic
+                // checks.
             } else if (myRole == STANDBY) {
                 log.info("Demote current instance to the bottom of the candidates list for {}", deviceId);
                 mastershipAdminService.demote(localNodeId, deviceId);
@@ -1467,8 +1494,6 @@
         return nextMaster;
     }
 
-
-
     /**
      * Port Enable/Disable message sent to the device's MASTER node.
      */