ONOS Node rolling restart test
- Kill ONOS k8s pods one at a time, while cordoning off the k8s node to
prevent restarts while verifications are happening
- While a node is down, check topology and ping between all hosts
- Uncordon the k8s node to restart the onos k8s pod
Change-Id: I871704068b633721cf79eb747a7c294575415e54
diff --git a/TestON/drivers/common/cli/onosclidriver.py b/TestON/drivers/common/cli/onosclidriver.py
index bafa284..8e6a90a 100755
--- a/TestON/drivers/common/cli/onosclidriver.py
+++ b/TestON/drivers/common/cli/onosclidriver.py
@@ -495,7 +495,21 @@
self.Prompt(),
pexpect.TIMEOUT ] )
response = self.handle.before
- if i == 1:
+ if i == 1: # Not in ONOS CLI
+ # FIXME: This isn't really the correct place for this, but it works for now
+ # Check if port-forward session is still up first
+ if hasattr( main, "Cluster"):
+ ctrl = None
+ for c in main.Cluster.controllers:
+ if c.CLI is self:
+ ctrl = c
+ break
+ if not ctrl:
+ main.log.warn( self.name + ": Could not find this node in Cluster. Can't check port-forward status" )
+ elif ctrl.k8s:
+ ctrl.k8s.checkPortForward( ctrl.k8s.podName,
+ kubeconfig=ctrl.k8s.kubeConfig,
+ namespace=main.params[ 'kubernetes' ][ 'namespace' ] )
main.log.error( self.name + ": onos cli session closed. " )
if self.onosIp:
main.log.warn( "Trying to reconnect " + self.onosIp )
diff --git a/TestON/drivers/common/cli/onosclusterdriver.py b/TestON/drivers/common/cli/onosclusterdriver.py
index b4b6c12..d096d1b 100755
--- a/TestON/drivers/common/cli/onosclusterdriver.py
+++ b/TestON/drivers/common/cli/onosclusterdriver.py
@@ -168,7 +168,7 @@
self.dockerPrompt = self.checkOptions( self.dockerPrompt, "~/onos#" )
self.maxNodes = int( self.checkOptions( self.maxNodes, 100 ) )
self.kubeConfig = self.checkOptions( self.kubeConfig, None )
- self.up4Port = self.checkOptions(self.up4Port, None)
+ self.up4Port = self.checkOptions( self.up4Port, None )
self.name = self.options[ 'name' ]
diff --git a/TestON/drivers/common/clidriver.py b/TestON/drivers/common/clidriver.py
index 48277e5..01242a1 100644
--- a/TestON/drivers/common/clidriver.py
+++ b/TestON/drivers/common/clidriver.py
@@ -35,6 +35,7 @@
def __init__( self ):
super( CLI, self ).__init__()
self.inDocker = False
+ self.portForwardList = None
def checkPrompt( self ):
for key in self.options:
@@ -57,7 +58,7 @@
ssh_newkey = 'Are you sure you want to continue connecting'
refused = "ssh: connect to host " + \
self.ip_address + " port 22: Connection refused"
- ssh_options = "-t -X -A -o ServerAliveInterval=50 -o TCPKeepAlive=yes"
+ ssh_options = "-t -X -A -o ServerAliveInterval=50 -o ServerAliveCountMax=1000 -o TCPKeepAlive=yes"
ssh_destination = self.user_name + "@" + self.ip_address
envVars = { "TERM": "vt100" }
# TODO: Add option to specify which shell/command to use
@@ -1132,7 +1133,7 @@
def kubectlPodNodes( self, dstPath=None, kubeconfig=None, namespace=None ):
"""
- Use kubectl to get the logs from a pod
+ Use kubectl to get the pod to node mappings
Optional Arguments:
- dstPath: The location to save the logs to
- kubeconfig: The path to a kubeconfig file
@@ -1140,7 +1141,6 @@
Returns main.TRUE if dstPath is given, else the output of the command or
main.FALSE on Error
"""
-
try:
self.handle.sendline( "" )
self.handle.expect( self.prompt )
@@ -1172,6 +1172,49 @@
main.log.exception( self.name + ": Uncaught exception!" )
return main.FALSE
+ def kubectlGetPodNode( self, podName, kubeconfig=None, namespace=None ):
+ """
+ Use kubectl to get the node a given pod is running on
+ Arguments:
+ - podName: The name of the pod
+ Optional Arguments:
+ - kubeconfig: The path to a kubeconfig file
+ - namespace: The namespace to search in
+ Returns a string of the node name or None
+ """
+ try:
+ self.handle.sendline( "" )
+ self.handle.expect( self.prompt )
+ main.log.debug( self.handle.before + self.handle.after )
+ cmdStr = "kubectl %s %s get pods %s --output=jsonpath='{.spec.nodeName}{\"\\n\"}'" % (
+ "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+ "-n %s" % namespace if namespace else "",
+ podName )
+ main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+ self.handle.sendline( cmdStr )
+ i = self.handle.expect( [ "not found", "error", "The connection to the server", self.prompt ] )
+ if i == 3:
+ output = self.handle.before
+ main.log.debug( self.name + ": " + output )
+ output = output.splitlines()
+ main.log.warn( output )
+ return output[1] if len( output ) == 3 else None
+ else:
+ main.log.error( self.name + ": Error executing command" )
+ main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+ return None
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return None
+ except pexpect.TIMEOUT:
+ main.log.exception( self.name + ": TIMEOUT exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return None
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ return None
+
def sternLogs( self, podString, dstPath, kubeconfig=None, namespace=None, since='1h', wait=60 ):
"""
Use stern to get the logs from a pod
@@ -1315,11 +1358,11 @@
main.log.exception( self.name + ": Uncaught exception!" )
return main.FALSE
- def kubectlPortForward( self, podName, portsList, kubeconfig=None, namespace=None, ):
+ def kubectlPortForward( self, podName, portsList, kubeconfig=None, namespace=None ):
"""
Use kubectl to setup port forwarding from the local machine to the kubernetes pod
- Note: This command does not return until the port forwarding session is ended.
+ Note: This cli command does not return until the port forwarding session is ended.
Required Arguments:
- podName: The name of the pod as a string
@@ -1327,9 +1370,7 @@
Optional Arguments:
- kubeconfig: The path to a kubeconfig file
- namespace: The namespace to search in
- - app: Get pods belonging to a specific app
- Returns a list containing the names of the pods or
- main.FALSE on Error
+ Returns main.TRUE if a port-forward session was created or main.FALSE on Error
"""
@@ -1341,8 +1382,11 @@
portsList )
main.log.info( self.name + ": sending: " + repr( cmdStr ) )
self.handle.sendline( cmdStr )
+ self.handle.expect( "pod/%s" % podName )
+ output = self.handle.before + self.handle.after
i = self.handle.expect( [ "not found", "error", "closed/timedout",
self.prompt, "The connection to the server", "Forwarding from" ] )
+ output += self.handle.before + str( self.handle.after )
# NOTE: This won't clear the buffer entirely, and each time the port forward
# is used, another line will be added to the buffer. We need to make
# sure we clear the buffer before using this component again.
@@ -1350,10 +1394,11 @@
if i == 5:
# Setup preDisconnect function
self.preDisconnect = self.exitFromProcess
+ self.portForwardList = portsList
return main.TRUE
else:
main.log.error( self.name + ": Error executing command" )
- main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+ main.log.debug( self.name + ": " + output )
return main.FALSE
except pexpect.EOF:
main.log.error( self.name + ": EOF exception found" )
@@ -1367,6 +1412,132 @@
main.log.exception( self.name + ": Uncaught exception!" )
return main.FALSE
+ def checkPortForward( self, podName, portsList=None, kubeconfig=None, namespace=None ):
+ """
+ Check that kubectl port-forward session is still active and restarts it if it was closed.
+
+
+ Required Arguments:
+ - podName: The name of the pod as a string
+ - portsList: The list of ports to forward, as a string. see kubectl help for details. Deafults to
+ the last used string on this node.
+ Optional Arguments:
+ - kubeconfig: The path to a kubeconfig file
+ - namespace: The namespace to search in
+ Returns main.TRUE if a port-forward session was created or is still active, main.FALSE on Error
+
+
+ """
+ try:
+ if not portsList:
+ portsList = self.portForwardList
+ self.handle.sendline( "" )
+ i = self.handle.expect( [ self.prompt, pexpect.TIMEOUT ], timeout=5 )
+ output = self.handle.before + str( self.handle.after )
+ main.log.debug( "%s: %s" % ( self.name, output ) )
+ if i == 0:
+ # We are not currently in a port-forwarding session, try to re-establish.
+ return self.kubectlPortForward( podName, portsList, kubeconfig, namespace )
+ elif i == 1:
+ # Still in a command, port-forward is probably still active
+ return main.TRUE
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return main.FALSE
+ except pexpect.TIMEOUT:
+ main.log.exception( self.name + ": TIMEOUT exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return main.FALSE
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ return main.FALSE
+
+ def kubectlCordonNode( self, nodeName, kubeconfig=None, namespace=None, timeout=240, uncordonOnDisconnect=True ):
+ try:
+ cmdStr = "kubectl %s %s cordon %s" % (
+ "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+ "-n %s" % namespace if namespace else "",
+ nodeName )
+ main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+ if uncordonOnDisconnect:
+ self.nodeName = nodeName
+ if kubeconfig:
+ self.kubeconfig = kubeconfig
+ if namespace:
+ self.namespace = namespace
+ self.preDisconnect = self.kubectlUncordonNode
+ self.handle.sendline( cmdStr )
+ i = self.handle.expect( [ "not found", "error",
+ "The connection to the server",
+ "node/%s cordoned" % nodeName,
+ "node/%s already cordoned" % nodeName, ],
+ timeout=timeout )
+ if i == 3 or i == 4:
+ output = self.handle.before + self.handle.after
+ main.log.debug( self.name + ": " + output )
+ self.clearBuffer()
+ return main.TRUE
+ else:
+ main.log.error( self.name + ": Error executing command" )
+ main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+ self.clearBuffer()
+ return main.FALSE
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return main.FALSE
+ except pexpect.TIMEOUT:
+ main.log.exception( self.name + ": TIMEOUT exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ self.clearBuffer()
+ return main.FALSE
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ return main.FALSE
+
+ def kubectlUncordonNode( self, nodeName=None, kubeconfig=None, namespace=None, timeout=240 ):
+ try:
+ if not nodeName:
+ nodeName = getattr( self, "nodeName" )
+ if not kubeconfig:
+ kubeconfig = getattr( self, "kubeconfig", None )
+ if not kubeconfig:
+ namespace = getattr( self, "namespace", None )
+ cmdStr = "kubectl %s %s uncordon %s" % (
+ "--kubeconfig %s" % kubeconfig if kubeconfig else "",
+ "-n %s" % namespace if namespace else "",
+ nodeName )
+ main.log.info( self.name + ": sending: " + repr( cmdStr ) )
+ self.handle.sendline( cmdStr )
+ i = self.handle.expect( [ "not found", "error",
+ "The connection to the server",
+ "node/%s uncordoned" % nodeName,
+ "node/%s already uncordoned" % nodeName, ],
+ timeout=timeout )
+ if i == 3 or i == 4:
+ output = self.handle.before + self.handle.after
+ main.log.debug( self.name + ": " + output )
+ self.clearBuffer()
+ return main.TRUE
+ else:
+ main.log.error( self.name + ": Error executing command" )
+ main.log.debug( self.name + ": " + self.handle.before + str( self.handle.after ) )
+ self.clearBuffer()
+ return main.FALSE
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ return main.FALSE
+ except pexpect.TIMEOUT:
+ main.log.exception( self.name + ": TIMEOUT exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ self.clearBuffer()
+ return main.FALSE
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ return main.FALSE
+
def kubectlDeletePod( self, podName, kubeconfig=None, namespace=None, timeout=240 ):
try:
cmdStr = "kubectl %s %s delete pod %s" % (
@@ -1411,7 +1582,7 @@
self.handle.sendline( cmdStr )
# Since the command contains the prompt ($), we first expect for the
# last part of the command and then we expect the actual values
- self.handle.expect("grep --color=never %s" % podName, timeout=1)
+ self.handle.expect( "grep --color=never %s" % podName, timeout=1 )
i = self.handle.expect( [ podName + " ready",
self.prompt ],
timeout=timeout )
@@ -1445,4 +1616,4 @@
self.handle.expect( self.prompt, timeout=5 )
response += self.cleanOutput( self.handle.before )
except pexpect.TIMEOUT:
- return response
\ No newline at end of file
+ return response
diff --git a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params
index b582718..4217583 100644
--- a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params
+++ b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params
@@ -91,6 +91,7 @@
<TrafficDiscovery>10</TrafficDiscovery>
</timers>
+
<SLEEP>
<startup>10</startup>
</SLEEP>
diff --git a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params.tucson b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params.tucson
new file mode 100644
index 0000000..5e69e79
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.params.tucson
@@ -0,0 +1,92 @@
+<PARAMS>
+ <testcases>2</testcases>
+
+ <GRAPH>
+ <nodeCluster>pairedleaves</nodeCluster>
+ <builds>20</builds>
+ <jobName>SRpairedLeaves</jobName>
+ <branch>master</branch>
+ </GRAPH>
+
+ <SCALE>
+ <size>3</size>
+ <max>3</max>
+ </SCALE>
+
+ <DEPENDENCY>
+ <useCommonConf>False</useCommonConf>
+ <useCommonTopo>True</useCommonTopo>
+ <useBmv2>True</useBmv2>
+ <bmv2SwitchType>stratum</bmv2SwitchType>
+ <switchPrefix></switchPrefix>
+ <stratumRoot>~/stratum</stratumRoot>
+ <topology>trellis_fabric.py</topology>
+ <lib>routinglib.py,trellislib.py,stratum.py</lib>
+ </DEPENDENCY>
+
+ <persistent_setup>True</persistent_setup>
+
+ <use_stern>True</use_stern>
+
+ <kubernetes>
+ <appName>onos-classic</appName>
+ <namespace>tost</namespace>
+ </kubernetes>
+
+ <PERF>
+ <traffic_host>Compute1 Compute2</traffic_host>
+ <pcap_host>Compute3</pcap_host>
+ <pcap_cmd_arguments>-t e -F pcap -s 100 </pcap_cmd_arguments>
+ <iterations>1</iterations>
+ <topo>
+ <leaf1>
+ <ports>176 180 184 188</ports>
+ <note>eNB</note>
+ </leaf1>
+ <leaf2>
+ <ports>260 268 276 284</ports>
+ <note>upstream</note>
+ </leaf2>
+ </topo>
+ </PERF>
+ <ONOS_Logging>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ </ONOS_Logging>
+ <ONOS_Logging_Reset>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ </ONOS_Logging_Reset>
+
+
+ <ENV>
+ <cellName>productionCell</cellName>
+ <cellApps>drivers,fpm,lldpprovider,hostprovider,netcfghostprovider,drivers.bmv2,org.opencord.fabric-tofino,pipelines.fabric,org.stratumproject.fabric-tna,drivers.barefoot,segmentrouting,t3</cellApps>
+ </ENV>
+
+ <EXTERNAL_APPS>
+ </EXTERNAL_APPS>
+
+ <CTRL>
+ <port>6653</port>
+ </CTRL>
+
+ <timers>
+ <LinkDiscovery>12</LinkDiscovery>
+ <SwitchDiscovery>12</SwitchDiscovery>
+ <TrafficDiscovery>13</TrafficDiscovery>
+ </timers>
+
+ <restartRounds>2</restartRounds>
+
+ <SLEEP>
+ <startup>10</startup>
+ </SLEEP>
+
+ <TOPO>
+ <switchNum>2</switchNum>
+ <linkNum>2</linkNum>
+ </TOPO>
+
+ <ALARM>
+ <minPassPercent>100</minPassPercent>
+ </ALARM>
+</PARAMS>
diff --git a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.py b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.py
index fc3c44c..33aad64 100644
--- a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.py
+++ b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.py
@@ -12,4 +12,56 @@
Perform rolling ONOS failure/recovery test
Collect logs and analyze results
"""
- pass
+ try:
+ from tests.USECASE.SegmentRouting.SRStaging.dependencies.SRStagingTest import SRStagingTest
+ import json
+ except ImportError:
+ main.log.error( "SRStagingTest not found. Exiting the test" )
+ main.cleanAndExit()
+ try:
+ main.funcs
+ except ( NameError, AttributeError ):
+ main.funcs = SRStagingTest()
+
+ descPrefix = "Rolling ONOS Restart"
+ pod = main.params['GRAPH'].get( 'nodeCluster', "hardware" )
+ main.funcs.setupTest( main,
+ topology='0x2',
+ onosNodes=3,
+ description="%s tests on the %s pod" % ( descPrefix, pod ) )
+ switches = int( main.params[ 'TOPO' ][ 'switchNum' ] )
+ links = int( main.params[ 'TOPO' ][ 'linkNum' ] )
+ hosts = [ 'h1', 'h2', 'h3', 'mgmt' ]
+
+ clusterSize = main.Cluster.numCtrls
+ restartRounds = int( main.params.get( 'restartRounds', 1 ) )
+
+ def verifications( main, switches, links, hosts ):
+ """
+ Checks to perform before and after each ONOS node event
+ All asserts should happen within this function
+ """
+ from tests.USECASE.SegmentRouting.dependencies.Testcaselib import Testcaselib as run
+ run.verifyTopology( main, switches, links, main.Cluster.numCtrls )
+ run.pingAllFabricIntfs( main, hosts, dumpFlows=False )
+ run.verifyPing( main, hosts, hosts )
+ verifications( main, switches, links, hosts )
+ # TODO ADD control plane checks: nodes, flows, ...
+ # TODO: Mastership check? look at HA Test
+ # TODO: Any specific fabric checks? APP commands?
+
+ for i in range( 0, clusterSize * restartRounds ):
+ n = i % clusterSize
+ ctrl = main.Cluster.getControllers( n )
+
+ longDesc = "%s - kill %s" % ( descPrefix, ctrl.name )
+ # TODO: verify flow isn't interrupted
+ node = main.funcs.onosDown( main, ctrl, preventRestart=True )
+ verifications( main, switches, links, hosts )
+ main.funcs.onosUp( main, node, ctrl )
+ verifications( main, switches, links, hosts )
+ # Cleanup
+ main.log.warn( json.dumps( main.downtimeResults, indent=4, sort_keys=True ) )
+ main.funcs.cleanup( main )
+
+
diff --git a/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.topo.tucson b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.topo.tucson
new file mode 100644
index 0000000..07c69b4
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/SRStaging/SRrollingRestart/SRrollingRestart.topo.tucson
@@ -0,0 +1,192 @@
+<TOPOLOGY>
+ <COMPONENT>
+ <ONOScell>
+ <host>localhost</host> # ONOS "bench" machine
+ <user>jenkins</user>
+ <password></password>
+ <type>OnosClusterDriver</type>
+ <connect_order>50</connect_order>
+ <jump_host></jump_host>
+ <home>~/onos</home> # defines where onos home is on the build machine. Defaults to "~/onos/" if empty.
+ <COMPONENTS>
+ <kubeConfig>~/.kube/dev-pairedleaves-tucson</kubeConfig> # If set, will attempt to use this file for setting up port-forwarding
+ <useDocker>True</useDocker> # Whether to use docker for ONOS nodes
+ <docker_prompt>\$</docker_prompt>
+ <cluster_name></cluster_name> # Used as a prefix for cluster components. Defaults to 'ONOS'
+ <diff_clihost>True</diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+ <karaf_username>karaf</karaf_username>
+ <karaf_password>karaf</karaf_password>
+ <web_user>karaf</web_user>
+ <web_pass>karaf</web_pass>
+ <karafPrompt_username>karaf</karafPrompt_username>
+ <rest_port></rest_port>
+ <prompt></prompt> # TODO: we technically need a few of these, one per component
+ <onos_home>~/onos/</onos_home> # defines where onos home is on the target cell machine. Defaults to entry in "home" if empty.
+ <nodes> 3 </nodes> # number of nodes in the cluster
+ </COMPONENTS>
+ </ONOScell>
+
+ <Leaf1>
+ <host>10.76.28.70</host>
+ <user>root</user>
+ <password>onl</password>
+ <type>StratumOSSwitchDriver</type>
+ <connect_order>12</connect_order>
+ <jump_host></jump_host>
+ <COMPONENTS>
+ <shortName>leaf1</shortName>
+ <port1></port1>
+ <link1></link1>
+ <port2></port2>
+ <link2></link2>
+ <onosConfigPath></onosConfigPath>
+ <onosConfigFile></onosConfigFile>
+ </COMPONENTS>
+ </Leaf1>
+
+ <Leaf2>
+ <host>10.76.28.71</host>
+ <user>root</user>
+ <password>onl</password>
+ <type>StratumOSSwitchDriver</type>
+ <connect_order>13</connect_order>
+ <jump_host></jump_host>
+ <COMPONENTS>
+ <shortName>leaf2</shortName>
+ <port1></port1>
+ <link1></link1>
+ <port2></port2>
+ <link2></link2>
+ <onosConfigPath></onosConfigPath>
+ <onosConfigFile></onosConfigFile>
+ </COMPONENTS>
+ </Leaf2>
+
+ <Compute1>
+ <host>10.76.28.74</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>HostDriver</type>
+ <connect_order>6</connect_order>
+ <jump_host></jump_host>
+ <COMPONENTS>
+ <mac></mac>
+ <inband>false</inband>
+ <dhcp>True</dhcp>
+ <ip>10.32.11.2</ip>
+ <shortName>h1</shortName>
+ <port1></port1>
+ <link1></link1>
+ <ifaceName>pairbond</ifaceName>
+ <routes>
+ <route1>
+ <network></network>
+ <netmask></netmask>
+ <gw></gw>
+ <interface></interface>
+ </route1>
+ </routes>
+ <sudo_required>true</sudo_required>
+ <scapy_path>/usr/bin/scapy</scapy_path>
+ </COMPONENTS>
+ </Compute1>
+
+ <Compute2>
+ <host>10.76.28.72</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>HostDriver</type>
+ <connect_order>7</connect_order>
+ <jump_host></jump_host>
+ <COMPONENTS>
+ <mac></mac>
+ <inband>false</inband>
+ <dhcp>True</dhcp>
+ <ip>10.32.11.3</ip>
+ <shortName>h2</shortName>
+ <port1></port1>
+ <link1></link1>
+ <ifaceName>pairbond</ifaceName>
+ <routes>
+ <route1>
+ <network></network>
+ <netmask></netmask>
+ <gw></gw>
+ <interface></interface>
+ </route1>
+ </routes>
+ <sudo_required>true</sudo_required>
+ <scapy_path>/usr/bin/scapy</scapy_path>
+ </COMPONENTS>
+ </Compute2>
+
+ <Compute3>
+ <host>10.76.28.68</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>HostDriver</type>
+ <connect_order>8</connect_order>
+ <jump_host></jump_host>
+ <COMPONENTS>
+ <mac></mac>
+ <inband>false</inband>
+ <dhcp>True</dhcp>
+ <ip>10.32.11.194</ip>
+ <shortName>h3</shortName>
+ <port1></port1>
+ <link1></link1>
+ <ifaceName>eno2</ifaceName>
+ <routes>
+ <route1>
+ <network></network>
+ <netmask></netmask>
+ <gw></gw>
+ <interface></interface>
+ </route1>
+ </routes>
+ <sudo_required>true</sudo_required>
+ <scapy_path>/usr/bin/scapy</scapy_path>
+ </COMPONENTS>
+ </Compute3>
+
+ <ManagmentServer>
+ <host>10.76.28.66</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>HostDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ <mac></mac>
+ <inband>false</inband>
+ <dhcp>True</dhcp>
+ <ip>10.32.11.1</ip>
+ <shortName>mgmt</shortName>
+ <port1></port1>
+ <link1></link1>
+ <ifaceName>pairbond</ifaceName>
+ <routes>
+ <route1>
+ <network></network>
+ <netmask></netmask>
+ <gw></gw>
+ <interface></interface>
+ </route1>
+ </routes>
+ <sudo_required>true</sudo_required>
+ <scapy_path>/usr/bin/scapy</scapy_path>
+
+ </COMPONENTS>
+ </ManagmentServer>
+
+ <NetworkBench>
+ <host>10.76.28.66</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>NetworkDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ </COMPONENTS>
+ </NetworkBench>
+
+ </COMPONENT>
+</TOPOLOGY>
diff --git a/TestON/tests/USECASE/SegmentRouting/SRStaging/dependencies/SRStagingTest.py b/TestON/tests/USECASE/SegmentRouting/SRStaging/dependencies/SRStagingTest.py
index 70fec33..5ce8b0d 100644
--- a/TestON/tests/USECASE/SegmentRouting/SRStaging/dependencies/SRStagingTest.py
+++ b/TestON/tests/USECASE/SegmentRouting/SRStaging/dependencies/SRStagingTest.py
@@ -1027,9 +1027,10 @@
main.log.warn( "Did not find a specific switch pod to kill" )
startTime = time.time()
# Delete pod
- main.ONOSbench.handle.sendline( "kubectl --kubeconfig %s delete pod -n %s %s" % ( kubeConfig, namespace, output[0] ) )
- main.ONOSbench.handle.expect( main.ONOSbench.prompt )
- main.log.debug( repr( main.ONOSbench.handle.before ) + repr( main.ONOSbench.handle.after ) )
+ deleted = main.ONOSbench.kubectlDeletePod( output[0], kubeConfig, namespace )
+ utilities.assert_equals( expect=main.TRUE, actual=deleted,
+ onpass="Successfully deleted switch pod",
+ onfail="Failed to delete switch pod" )
# TODO ASSERTS
main.log.info( "Sleeping %s seconds" % sleepTime )
time.sleep( sleepTime )
@@ -1092,15 +1093,95 @@
main.log.exception( "Error in killSwitchAgent" )
@staticmethod
- def onosDown():
+ def onosDown( main, controller, preventRestart=False ):
+ """
+ Brings down an ONOS kubernetes pod. If preventRestart, will attempt to prevent
+ it from coming back on that node by adding a taint.
+ Returns the nodeName of the pod that was killed
+ """
try:
- pass
+ # Get pod name to delete
+ podName = controller.k8s.podName
+ kubeConfig = main.Cluster.active(0).k8s.kubeConfig
+ namespace = main.params[ 'kubernetes' ][ 'namespace' ]
+ if preventRestart:
+ # Cordon off the node so no more pods will be scheduled
+ k8sNode = controller.Bench.kubectlGetPodNode( podName,
+ kubeconfig=kubeConfig,
+ namespace=namespace )
+ main.step( "Cordon off k8s node %s, which is hosting onos k8s pod %s" % ( k8sNode,
+ controller.name ) )
+ cordoned = controller.Bench.kubectlCordonNode( k8sNode,
+ kubeconfig=kubeConfig,
+ namespace=namespace )
+ utilities.assert_equals( expect=main.TRUE, actual=cordoned,
+ onpass="Successfully cordoned k8s node",
+ onfail="Failed to cordon off k8s node" )
+ controller.active = False
+ main.Cluster.setRunningNode( main.Cluster.getRunningPos() )
+ else:
+ k8sNode = None
+ main.step( "Delete onos k8s pod %s" % controller.name )
+ #startTime = time.time()
+ # Delete pod
+ deleted = controller.Bench.kubectlDeletePod( podName, kubeConfig, namespace )
+ utilities.assert_equals( expect=main.TRUE, actual=deleted,
+ onpass="Successfully deleted switch pod",
+ onfail="Failed to delete switch pod" )
+ return k8sNode
except SkipCase:
raise
except Exception:
main.log.exception( "Error in onosDown" )
@staticmethod
+ def onosUp( main, k8sNode, controller ):
+ """
+ Brings up an ONOS kubernetes pod by uncordoning the node
+ """
+ try:
+ kubeConfig = main.Cluster.active(0).k8s.kubeConfig
+ namespace = main.params[ 'kubernetes' ][ 'namespace' ]
+ podName = controller.k8s.podName
+ # Uncordon the node so pod will be scheduled
+ main.step( "Uncordon k8s node %s, which is hosting onos k8s pod %s" % ( k8sNode,
+ controller.name ) )
+ #startTime = time.time()
+ uncordoned = controller.Bench.kubectlUncordonNode( k8sNode,
+ kubeconfig=kubeConfig,
+ namespace=namespace )
+ utilities.assert_equals( expect=main.TRUE, actual=uncordoned,
+ onpass="Successfully uncordoned k8s node",
+ onfail="Failed to uncordon k8s node" )
+
+ # Check pod is ready
+ main.step( "Wait for ONOS pod to restart" )
+ ready = utilities.retry( controller.Bench.kubectlCheckPodReady,
+ main.FALSE,
+ kwargs={ "podName": podName,
+ "kubeconfig": kubeConfig,
+ "namespace": namespace },
+ attempts=50,
+ getRetryingTime=True )
+ utilities.assert_equals( expect=main.TRUE, actual=ready,
+ onpass="Successfully restarted onos pod",
+ onfail="Failed to restart onos pod" )
+ controller.active = True
+ # Set all nodes as "running", then reduce to only "active" nodes
+ main.Cluster.runningNodes = main.Cluster.controllers
+ main.Cluster.setRunningNode( main.Cluster.getRunningPos() )
+ controller.k8s.clearBuffer()
+ controller.k8s.kubectlPortForward( podName,
+ controller.k8s.portForwardList,
+ kubeConfig,
+ namespace )
+ #stopTime = time.time()
+ except SkipCase:
+ raise
+ except Exception:
+ main.log.exception( "Error in onosUp" )
+
+ @staticmethod
def analyzeIperfPcap( main, pcapFile, filterStr, timeout=240, pingOnly=False ):
"""
Given a pcap file, will use tshark to create a csv file with iperf fields.
@@ -1196,7 +1277,7 @@
except SkipCase:
raise
except Exception:
- main.log.exception( "Error in onosDown" )
+ main.log.exception( "Error in analyzePcap" )
return -1
# Remove first and last packets, sometimes there can be a long gap between
# these and the other packets