Further improvements to connection handling for gRPC-based devices

Force reset of gRPC connection backoff when probing for reachability.
This allows provider to attempt reconnection when needed, instead of
depending on the channel backoff timer.

Improved checkup task in GDP to better handle mastership
flapping observed when reconnecting devices.

Change-Id: I473fb14160b2eb744a483de431b91f9f6bcdab95
diff --git a/protocols/grpc/ctl/src/main/java/org/onosproject/grpc/ctl/AbstractGrpcClientController.java b/protocols/grpc/ctl/src/main/java/org/onosproject/grpc/ctl/AbstractGrpcClientController.java
index 5b3153c..f7c33c3 100644
--- a/protocols/grpc/ctl/src/main/java/org/onosproject/grpc/ctl/AbstractGrpcClientController.java
+++ b/protocols/grpc/ctl/src/main/java/org/onosproject/grpc/ctl/AbstractGrpcClientController.java
@@ -101,7 +101,7 @@
                     "A %s client already exists for %s", serviceName, deviceId));
         }
 
-        log.info("Creating {}...", clientName(deviceId));
+        log.debug("Creating {}...", clientName(deviceId));
 
         final C client;
         try {
@@ -135,7 +135,7 @@
         withDeviceLock(() -> {
             final C client = clients.remove(deviceId);
             if (client != null) {
-                log.info("Removing {}...", clientName(deviceId));
+                log.debug("Removing {}...", clientName(deviceId));
                 client.shutdown();
             }
             return null;
diff --git a/protocols/grpc/utils/src/main/java/org/onosproject/grpc/utils/AbstractGrpcHandshaker.java b/protocols/grpc/utils/src/main/java/org/onosproject/grpc/utils/AbstractGrpcHandshaker.java
index 0183133..4cfb63b 100644
--- a/protocols/grpc/utils/src/main/java/org/onosproject/grpc/utils/AbstractGrpcHandshaker.java
+++ b/protocols/grpc/utils/src/main/java/org/onosproject/grpc/utils/AbstractGrpcHandshaker.java
@@ -17,6 +17,7 @@
 package org.onosproject.grpc.utils;
 
 import com.google.common.util.concurrent.Striped;
+import io.grpc.ConnectivityState;
 import io.grpc.ManagedChannel;
 import org.onosproject.grpc.api.GrpcChannelController;
 import org.onosproject.grpc.api.GrpcClient;
@@ -159,6 +160,7 @@
         if (!setupBehaviour("probeReachability()")) {
             return completedFuture(false);
         }
+        resetChannelConnectBackoffIfNeeded();
         return client.probeService();
     }
 
@@ -175,4 +177,26 @@
         handler().get(controllerClass)
                 .removeDeviceAgentListener(data().deviceId(), providerId);
     }
+
+    private void resetChannelConnectBackoffIfNeeded()  {
+        // Stimulate channel reconnect if in failure state.
+        final ManagedChannel channel = getExistingChannel();
+        if (channel == null) {
+            // Where did the channel go?
+            return;
+        }
+        if (channel.getState(false)
+                        .equals(ConnectivityState.TRANSIENT_FAILURE)) {
+            channel.resetConnectBackoff();
+        }
+    }
+
+    private ManagedChannel getExistingChannel() {
+        final DeviceId deviceId = data().deviceId();
+        if (CHANNEL_URIS.containsKey(deviceId)) {
+            return handler().get(GrpcChannelController.class)
+                    .get(CHANNEL_URIS.get(deviceId)).orElse(null);
+        }
+        return null;
+    }
 }
diff --git a/protocols/p4runtime/ctl/src/main/java/org/onosproject/p4runtime/ctl/client/StreamClientImpl.java b/protocols/p4runtime/ctl/src/main/java/org/onosproject/p4runtime/ctl/client/StreamClientImpl.java
index cf3cda0..b2516b4 100644
--- a/protocols/p4runtime/ctl/src/main/java/org/onosproject/p4runtime/ctl/client/StreamClientImpl.java
+++ b/protocols/p4runtime/ctl/src/main/java/org/onosproject/p4runtime/ctl/client/StreamClientImpl.java
@@ -187,7 +187,12 @@
                          deviceId, requestedToBeMaster.get(),
                          pendingElectionId, masterElectionId,
                          streamChannelManager.isOpen());
+                // Optimistically set the reported master status, if wrong, it
+                // will be updated by the arbitration response. This alleviates
+                // race conditions when calling isMaster() right after setting
+                // mastership.
                 sendMasterArbitrationUpdate(pendingElectionId);
+                isMaster.set(requestedToBeMaster.get());
                 pendingElectionId = null;
                 pendingElectionIdTimestamp = 0;
                 // No need to listen for master election ID changes.
@@ -199,7 +204,9 @@
     @Override
     public boolean isMaster(long p4DeviceId) {
         checkArgument(this.p4DeviceId == p4DeviceId);
-        return isMaster.get();
+        synchronized (requestedToBeMaster) {
+            return isMaster.get();
+        }
     }
 
     @Override
@@ -397,16 +404,11 @@
         void signalClosed() {
             synchronized (this) {
                 final boolean wasOpen = open.getAndSet(false);
-                // FIXME: in case of device disconnection, all clients will
-                //  signal role NONE, preventing the DeviceManager to mark the
-                //  device as offline, as only the master can do that. We should
-                //  change the DeviceManager. For now, we disable signaling role
-                //  NONE.
-                // if (wasOpen) {
-                //     // We lost any valid mastership role.
-                //     controller.postEvent(new DeviceAgentEvent(
-                //             DeviceAgentEvent.Type.ROLE_NONE, deviceId));
-                // }
+                if (wasOpen) {
+                    // We lost any valid mastership role.
+                    controller.postEvent(new DeviceAgentEvent(
+                            DeviceAgentEvent.Type.ROLE_NONE, deviceId));
+                }
             }
         }