ONOS Node rolling restart test

  - Kill ONOS k8s pods one at a time, while cordoning off the k8s node to
  prevent restarts while verifications are happening
  - While a node is down, check topology and ping between all hosts
  - Uncordon the k8s node to restart the onos k8s pod

Change-Id: I871704068b633721cf79eb747a7c294575415e54
diff --git a/TestON/drivers/common/cli/onosclidriver.py b/TestON/drivers/common/cli/onosclidriver.py
index bafa284..8e6a90a 100755
--- a/TestON/drivers/common/cli/onosclidriver.py
+++ b/TestON/drivers/common/cli/onosclidriver.py
@@ -495,7 +495,21 @@
                                       self.Prompt(),
                                       pexpect.TIMEOUT ] )
             response = self.handle.before
-            if i == 1:
+            if i == 1:  # Not in ONOS CLI
+                # FIXME: This isn't really the correct place for this, but it works for now
+                # Check if port-forward session is still up first
+                if hasattr( main, "Cluster"):
+                    ctrl = None
+                    for c in main.Cluster.controllers:
+                        if c.CLI is self:
+                            ctrl = c
+                            break
+                    if not ctrl:
+                        main.log.warn( self.name + ": Could not find this node in Cluster. Can't check port-forward status" )
+                    elif ctrl.k8s:
+                        ctrl.k8s.checkPortForward( ctrl.k8s.podName,
+                                                   kubeconfig=ctrl.k8s.kubeConfig,
+                                                   namespace=main.params[ 'kubernetes' ][ 'namespace' ] )
                 main.log.error( self.name + ": onos cli session closed. " )
                 if self.onosIp:
                     main.log.warn( "Trying to reconnect " + self.onosIp )
diff --git a/TestON/drivers/common/cli/onosclusterdriver.py b/TestON/drivers/common/cli/onosclusterdriver.py
index b4b6c12..d096d1b 100755
--- a/TestON/drivers/common/cli/onosclusterdriver.py
+++ b/TestON/drivers/common/cli/onosclusterdriver.py
@@ -168,7 +168,7 @@
             self.dockerPrompt = self.checkOptions( self.dockerPrompt, "~/onos#" )
             self.maxNodes = int( self.checkOptions( self.maxNodes, 100 ) )
             self.kubeConfig = self.checkOptions( self.kubeConfig, None )
-            self.up4Port = self.checkOptions(self.up4Port, None)
+            self.up4Port = self.checkOptions( self.up4Port, None )
 
             self.name = self.options[ 'name' ]
 
diff --git a/TestON/drivers/common/clidriver.py b/TestON/drivers/common/clidriver.py
index 48277e5..01242a1 100644
--- a/TestON/drivers/common/clidriver.py
+++ b/TestON/drivers/common/clidriver.py
@@ -35,6 +35,7 @@
     def __init__( self ):
         super( CLI, self ).__init__()
         self.inDocker = False
+        self.portForwardList = None
 
     def checkPrompt( self ):
         for key in self.options:
@@ -57,7 +58,7 @@
         ssh_newkey = 'Are you sure you want to continue connecting'
         refused = "ssh: connect to host " + \
             self.ip_address + " port 22: Connection refused"
-        ssh_options = "-t -X -A -o ServerAliveInterval=50 -o TCPKeepAlive=yes"
+        ssh_options = "-t -X -A -o ServerAliveInterval=50 -o ServerAliveCountMax=1000 -o TCPKeepAlive=yes"
         ssh_destination = self.user_name + "@" + self.ip_address
         envVars = { "TERM": "vt100" }
         # TODO: Add option to specify which shell/command to use
@@ -1132,7 +1133,7 @@
 
     def kubectlPodNodes( self, dstPath=None, kubeconfig=None, namespace=None ):
         """
-        Use kubectl to get the logs from a pod
+        Use kubectl to get the pod to node mappings
         Optional Arguments:
         - dstPath: The location to save the logs to
         - kubeconfig: The path to a kubeconfig file
@@ -1140,7 +1141,6 @@
         Returns main.TRUE if dstPath is given, else the output of the command or
             main.FALSE on Error
         """
-
         try:
             self.handle.sendline( "" )
             self.handle.expect( self.prompt )
@@ -1172,6 +1172,49 @@
             main.log.exception( self.name + ": Uncaught exception!" )
             return main.FALSE
 
+    def kubectlGetPodNode( self, podName, kubeconfig=None, namespace=None ):
+        """
+        Use kubectl to get the node a given pod is running on
+        Arguments:
+        - podName: The name of the pod
+        Optional Arguments:
+        - kubeconfig: The path to a kubeconfig file
+        - namespace: The namespace to search in
+        Returns a string of the node name or None
+        """
+        try:
+            self.handle.sendline( "" )
+            self.handle.expect( self.prompt )
+            main.log.debug( self.handle.before + self.handle.after )
+            cmdStr = "kubectl %s %s get pods %s --output=jsonpath='{.spec.nodeName}{\"\\n\"}'" % (
+                        "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+                        "-n %s" % namespace if namespace else "",
+                        podName )
+            main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+            self.handle.sendline( cmdStr )
+            i = self.handle.expect( [ "not found", "error", "The connection to the server", self.prompt ] )
+            if i == 3:
+                output = self.handle.before
+                main.log.debug( self.name + ": " + output )
+                output = output.splitlines()
+                main.log.warn( output )
+                return output[1] if len( output ) == 3 else None
+            else:
+                main.log.error( self.name + ": Error executing command" )
+                main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+                return None
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":     " + self.handle.before )
+            return None
+        except pexpect.TIMEOUT:
+            main.log.exception( self.name + ": TIMEOUT exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            return None
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            return None
+
     def sternLogs( self, podString, dstPath, kubeconfig=None, namespace=None, since='1h', wait=60 ):
         """
         Use stern to get the logs from a pod
@@ -1315,11 +1358,11 @@
             main.log.exception( self.name + ": Uncaught exception!" )
             return main.FALSE
 
-    def kubectlPortForward( self, podName, portsList,  kubeconfig=None, namespace=None, ):
+    def kubectlPortForward( self, podName, portsList, kubeconfig=None, namespace=None ):
         """
         Use kubectl to setup port forwarding from the local machine to the kubernetes pod
 
-        Note: This command does not return until the port forwarding session is ended.
+        Note: This cli command does not return until the port forwarding session is ended.
 
         Required Arguments:
         - podName: The name of the pod as a string
@@ -1327,9 +1370,7 @@
         Optional Arguments:
         - kubeconfig: The path to a kubeconfig file
         - namespace: The namespace to search in
-        - app: Get pods belonging to a specific app
-        Returns a list containing the names of the pods or
-            main.FALSE on Error
+        Returns main.TRUE if a port-forward session was created or main.FALSE on Error
 
 
         """
@@ -1341,8 +1382,11 @@
                         portsList )
             main.log.info( self.name + ": sending: " + repr( cmdStr ) )
             self.handle.sendline( cmdStr )
+            self.handle.expect( "pod/%s" % podName )
+            output = self.handle.before + self.handle.after
             i = self.handle.expect( [ "not found", "error", "closed/timedout",
                                       self.prompt, "The connection to the server", "Forwarding from" ] )
+            output += self.handle.before + str( self.handle.after )
             # NOTE: This won't clear the buffer entirely, and each time the port forward
             #       is used, another line will be added to the buffer. We need to make
             #       sure we clear the buffer before using this component again.
@@ -1350,10 +1394,11 @@
             if i == 5:
                 # Setup preDisconnect function
                 self.preDisconnect = self.exitFromProcess
+                self.portForwardList = portsList
                 return main.TRUE
             else:
                 main.log.error( self.name + ": Error executing command" )
-                main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+                main.log.debug( self.name + ": " + output )
                 return main.FALSE
         except pexpect.EOF:
             main.log.error( self.name + ": EOF exception found" )
@@ -1367,6 +1412,132 @@
             main.log.exception( self.name + ": Uncaught exception!" )
             return main.FALSE
 
+    def checkPortForward( self, podName, portsList=None, kubeconfig=None, namespace=None ):
+        """
+        Check that kubectl port-forward session is still active and restarts it if it was closed.
+
+
+        Required Arguments:
+        - podName: The name of the pod as a string
+        - portsList: The list of ports to forward, as a string. see kubectl help for details. Deafults to
+                     the last used string on this node.
+        Optional Arguments:
+        - kubeconfig: The path to a kubeconfig file
+        - namespace: The namespace to search in
+        Returns main.TRUE if a port-forward session was created or is still active, main.FALSE on Error
+
+
+        """
+        try:
+            if not portsList:
+                portsList = self.portForwardList
+            self.handle.sendline( "" )
+            i = self.handle.expect( [ self.prompt, pexpect.TIMEOUT ], timeout=5 )
+            output = self.handle.before + str( self.handle.after )
+            main.log.debug( "%s: %s" % ( self.name, output ) )
+            if i == 0:
+                # We are not currently in a port-forwarding session, try to re-establish.
+                return self.kubectlPortForward( podName, portsList, kubeconfig, namespace )
+            elif i == 1:
+                # Still in a command, port-forward is probably still active
+                return main.TRUE
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":     " + self.handle.before )
+            return main.FALSE
+        except pexpect.TIMEOUT:
+            main.log.exception( self.name + ": TIMEOUT exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            return main.FALSE
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            return main.FALSE
+
+    def kubectlCordonNode( self, nodeName, kubeconfig=None, namespace=None, timeout=240, uncordonOnDisconnect=True ):
+        try:
+            cmdStr = "kubectl %s %s cordon %s" % (
+                "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+                "-n %s" % namespace if namespace else "",
+                nodeName )
+            main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+            if uncordonOnDisconnect:
+                self.nodeName = nodeName
+                if kubeconfig:
+                    self.kubeconfig = kubeconfig
+                if namespace:
+                    self.namespace = namespace
+                self.preDisconnect = self.kubectlUncordonNode
+            self.handle.sendline( cmdStr )
+            i = self.handle.expect( [ "not found", "error",
+                                      "The connection to the server",
+                                      "node/%s cordoned" % nodeName,
+                                      "node/%s already cordoned" % nodeName, ],
+                                    timeout=timeout )
+            if i == 3 or i == 4:
+                output = self.handle.before + self.handle.after
+                main.log.debug( self.name + ": " + output )
+                self.clearBuffer()
+                return main.TRUE
+            else:
+                main.log.error( self.name + ": Error executing command" )
+                main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+                self.clearBuffer()
+                return main.FALSE
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":     " + self.handle.before )
+            return main.FALSE
+        except pexpect.TIMEOUT:
+            main.log.exception( self.name + ": TIMEOUT exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            self.clearBuffer()
+            return main.FALSE
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            return main.FALSE
+
+    def kubectlUncordonNode( self, nodeName=None, kubeconfig=None, namespace=None, timeout=240 ):
+        try:
+            if not nodeName:
+                nodeName = getattr( self, "nodeName" )
+            if not kubeconfig:
+                kubeconfig = getattr( self, "kubeconfig", None )
+            if not kubeconfig:
+                namespace = getattr( self, "namespace", None )
+            cmdStr = "kubectl %s %s uncordon %s" % (
+                "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+                "-n %s" % namespace if namespace else "",
+                nodeName )
+            main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+            self.handle.sendline( cmdStr )
+            i = self.handle.expect( [ "not found", "error",
+                                      "The connection to the server",
+                                      "node/%s uncordoned" % nodeName,
+                                      "node/%s already uncordoned" % nodeName, ],
+                                    timeout=timeout )
+            if i == 3 or i == 4:
+                output = self.handle.before + self.handle.after
+                main.log.debug( self.name + ": " + output )
+                self.clearBuffer()
+                return main.TRUE
+            else:
+                main.log.error( self.name + ": Error executing command" )
+                main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+                self.clearBuffer()
+                return main.FALSE
+        except pexpect.EOF:
+            main.log.error( self.name + ": EOF exception found" )
+            main.log.error( self.name + ":     " + self.handle.before )
+            return main.FALSE
+        except pexpect.TIMEOUT:
+            main.log.exception( self.name + ": TIMEOUT exception found" )
+            main.log.error( self.name + ":    " + self.handle.before )
+            self.clearBuffer()
+            return main.FALSE
+        except Exception:
+            main.log.exception( self.name + ": Uncaught exception!" )
+            return main.FALSE
+
     def kubectlDeletePod( self, podName, kubeconfig=None, namespace=None, timeout=240 ):
         try:
             cmdStr = "kubectl %s %s delete pod %s" % (
@@ -1411,7 +1582,7 @@
             self.handle.sendline( cmdStr )
             # Since the command contains the prompt ($), we first expect for the
             # last part of the command and then we expect the actual values
-            self.handle.expect("grep --color=never %s" % podName, timeout=1)
+            self.handle.expect( "grep --color=never %s" % podName, timeout=1 )
             i = self.handle.expect( [ podName + " ready",
                                       self.prompt ],
                                     timeout=timeout )
@@ -1445,4 +1616,4 @@
                 self.handle.expect( self.prompt, timeout=5 )
                 response += self.cleanOutput( self.handle.before )
             except pexpect.TIMEOUT:
-                return response
\ No newline at end of file
+                return response