Adding more simultaneous failure cases to SRRouting
- Cases 601, 640, 641
- Some style fixes
- Refactoring out some common functionality from SR library functions
- Adding more support for configuring cluster size in SR tests
- Make sleeps in SR library functions configurable
- Increase some small timeout values that cause intermitent problems due
to increased latencies
Change-Id: Ic66c637467d7d47e92224c9017df86162f320318
diff --git a/TestON/tests/USECASE/SegmentRouting/dependencies/Testcaselib.py b/TestON/tests/USECASE/SegmentRouting/dependencies/Testcaselib.py
index 0daf074..b008984 100644
--- a/TestON/tests/USECASE/SegmentRouting/dependencies/Testcaselib.py
+++ b/TestON/tests/USECASE/SegmentRouting/dependencies/Testcaselib.py
@@ -19,7 +19,6 @@
along with TestON. If not, see <http://www.gnu.org/licenses/>.
"""
import os
-import imp
import time
import json
import urllib
@@ -52,7 +51,6 @@
main.testSetUp.envSetupDescription( False )
stepResult = main.FALSE
try:
- main.step( "Constructing test variables" )
# Test variables
main.cellName = main.params[ 'ENV' ][ 'cellName' ]
main.apps = main.params[ 'ENV' ][ 'cellApps' ]
@@ -427,7 +425,8 @@
return
@staticmethod
- def pingAll( main, tag="", dumpflows=True, acceptableFailed=0, basedOnIp=False, sleep=10, retryAttempts=1, skipOnFail=False ):
+ def pingAll( main, tag="", dumpflows=True, acceptableFailed=0, basedOnIp=False,
+ sleep=10, retryAttempts=1, skipOnFail=False ):
'''
Verify connectivity between hosts according to the ping chart
acceptableFailed: max number of acceptable failed pings.
@@ -507,81 +506,92 @@
tag + "_GroupsOn" )
@staticmethod
- def killLink( main, end1, end2, switches, links ):
+ def killLink( main, end1, end2, switches, links, sleep=None ):
"""
end1,end2: identify the switches, ex.: 'leaf1', 'spine1'
switches, links: number of expected switches and links after linkDown, ex.: '4', '6'
Kill a link and verify ONOS can see the proper link change
"""
- main.linkSleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ else:
+ sleep = float( sleep )
main.step( "Kill link between %s and %s" % ( end1, end2 ) )
- LinkDown = main.Network.link( END1=end1, END2=end2, OPTION="down" )
- LinkDown = main.Network.link( END2=end1, END1=end2, OPTION="down" )
+ linkDown = main.Network.link( END1=end1, END2=end2, OPTION="down" )
+ linkDown = linkDown and main.Network.link( END2=end1, END1=end2, OPTION="down" )
+ # TODO: Can remove this, since in the retry we will wait anyways if topology is incorrect
main.log.info(
- "Waiting %s seconds for link down to be discovered" % main.linkSleep )
- time.sleep( main.linkSleep )
+ "Waiting %s seconds for link down to be discovered" % sleep )
+ time.sleep( sleep )
topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
main.FALSE,
kwargs={ 'numoswitch': switches,
'numolink': links },
attempts=10,
- sleep=main.linkSleep )
- result = topology & LinkDown
+ sleep=sleep )
+ result = topology and linkDown
utilities.assert_equals( expect=main.TRUE, actual=result,
onpass="Link down successful",
onfail="Failed to turn off link?" )
@staticmethod
- def killLinkBatch( main, links, linksAfter, switches ):
+ def killLinkBatch( main, links, linksAfter, switches, sleep=None ):
"""
links = list of links (src, dst) to bring down.
"""
main.step("Killing a batch of links {0}".format(links))
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ else:
+ sleep = float( sleep )
for end1, end2 in links:
main.Network.link( END1=end1, END2=end2, OPTION="down")
main.Network.link( END1=end2, END2=end1, OPTION="down")
- main.linkSleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ # TODO: Can remove this, since in the retry we will wait anyways if topology is incorrect
main.log.info(
- "Waiting %s seconds for links down to be discovered" % main.linkSleep )
- time.sleep( main.linkSleep )
+ "Waiting %s seconds for links down to be discovered" % sleep )
+ time.sleep( sleep )
topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
main.FALSE,
kwargs={ 'numoswitch': switches,
'numolink': linksAfter },
attempts=10,
- sleep=main.linkSleep )
+ sleep=sleep )
utilities.assert_equals( expect=main.TRUE, actual=topology,
onpass="Link batch down successful",
onfail="Link batch down failed" )
@staticmethod
- def restoreLinkBatch( main, links, linksAfter, switches ):
+ def restoreLinkBatch( main, links, linksAfter, switches, sleep=None ):
"""
links = list of link (src, dst) to bring up again.
"""
main.step("Restoring a batch of links {0}".format(links))
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ else:
+ sleep = float( sleep )
for end1, end2 in links:
main.Network.link( END1=end1, END2=end2, OPTION="up")
main.Network.link( END1=end2, END2=end1, OPTION="up")
- main.linkSleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
main.log.info(
- "Waiting %s seconds for links up to be discovered" % main.linkSleep )
- time.sleep( main.linkSleep )
+ "Waiting %s seconds for links up to be discovered" % sleep )
+ time.sleep( sleep )
topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
main.FALSE,
kwargs={ 'numoswitch': switches,
'numolink': linksAfter },
attempts=10,
- sleep=main.linkSleep )
+ sleep=sleep )
utilities.assert_equals( expect=main.TRUE, actual=topology,
onpass="Link batch up successful",
@@ -635,7 +645,7 @@
@staticmethod
def restoreLink( main, end1, end2, switches, links,
- portUp=False, dpid1='', dpid2='', port1='', port2='' ):
+ portUp=False, dpid1='', dpid2='', port1='', port2='', sleep=None ):
"""
Params:
end1,end2: identify the end switches, ex.: 'leaf1', 'spine1'
@@ -646,23 +656,28 @@
Kill a link and verify ONOS can see the proper link change
"""
main.step( "Restore link between %s and %s" % ( end1, end2 ) )
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'LinkDiscovery' ] )
+ else:
+ sleep = float( sleep )
result = False
count = 0
while True:
count += 1
+ ctrl = main.Cluster.next()
main.Network.link( END1=end1, END2=end2, OPTION="up" )
main.Network.link( END2=end1, END1=end2, OPTION="up" )
main.log.info(
- "Waiting %s seconds for link up to be discovered" % main.linkSleep )
- time.sleep( main.linkSleep )
+ "Waiting %s seconds for link up to be discovered" % sleep )
+ time.sleep( sleep )
if portUp:
ctrl.CLI.portstate( dpid=dpid1, port=port1, state='Enable' )
ctrl.CLI.portstate( dpid=dpid2, port=port2, state='Enable' )
- time.sleep( main.linkSleep )
+ time.sleep( sleep )
- result = main.Cluster.active( 0 ).CLI.checkStatus( numoswitch=switches,
- numolink=links )
+ result = ctrl.CLI.checkStatus( numoswitch=switches,
+ numolink=links )
if count > 5 or result:
break
utilities.assert_equals( expect=main.TRUE, actual=result,
@@ -670,58 +685,67 @@
onfail="Failed to bring link up" )
@staticmethod
- def killSwitch( main, switch, switches, links ):
+ def killSwitch( main, switch, switches, links, sleep=None ):
"""
Params: switches, links: number of expected switches and links after SwitchDown, ex.: '4', '6'
Completely kill a switch and verify ONOS can see the proper change
"""
- main.switchSleep = float( main.params[ 'timers' ][ 'SwitchDiscovery' ] )
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'SwitchDiscovery' ] )
+ else:
+ sleep = float( sleep )
switch = switch if isinstance( switch, list ) else [ switch ]
main.step( "Kill " + str( switch ) )
for s in switch:
main.log.info( "Stopping " + s )
main.Network.switch( SW=s, OPTION="stop" )
# todo make this repeatable
+
+ # TODO: Can remove this, since in the retry we will wait anyways if topology is incorrect
main.log.info( "Waiting %s seconds for switch down to be discovered" % (
- main.switchSleep ) )
- time.sleep( main.switchSleep )
+ sleep ) )
+ time.sleep( sleep )
topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
main.FALSE,
kwargs={ 'numoswitch': switches,
'numolink': links },
attempts=10,
- sleep=main.switchSleep )
+ sleep=sleep )
utilities.assert_equals( expect=main.TRUE, actual=topology,
onpass="Kill switch successful",
onfail="Failed to kill switch?" )
@staticmethod
- def recoverSwitch( main, switch, switches, links, rediscoverHosts=False, hostsToDiscover=[] ):
+ def recoverSwitch( main, switch, switches, links, rediscoverHosts=False, hostsToDiscover=[], sleep=None ):
"""
Params: switches, links: number of expected switches and links after SwitchUp, ex.: '4', '6'
Recover a switch and verify ONOS can see the proper change
"""
- # todo make this repeatable
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'SwitchDiscovery' ] )
+ else:
+ sleep = float( sleep )
+ # TODO make this repeatable
switch = switch if isinstance( switch, list ) else [ switch ]
main.step( "Recovering " + str( switch ) )
for s in switch:
main.log.info( "Starting " + s )
main.Network.switch( SW=s, OPTION="start" )
main.log.info( "Waiting %s seconds for switch up to be discovered" % (
- main.switchSleep ) )
- time.sleep( main.switchSleep )
+ sleep ) )
+ time.sleep( sleep )
if rediscoverHosts:
main.Network.discoverHosts( hostList=hostsToDiscover )
main.log.info( "Waiting %s seconds for hosts to get re-discovered" % (
- main.switchSleep ) )
- time.sleep( main.switchSleep )
+ sleep ) )
+ time.sleep( sleep )
topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
main.FALSE,
kwargs={ 'numoswitch': switches,
'numolink': links },
attempts=10,
- sleep=main.switchSleep )
+ sleep=sleep )
utilities.assert_equals( expect=main.TRUE, actual=topology,
onpass="Switch recovery successful",
onfail="Failed to recover switch?" )
@@ -768,14 +792,62 @@
main.ONOSbench.onosStop( ctrl.ipAddress )
@staticmethod
- def killOnos( main, nodes, switches, links, expNodes ):
+ def verifyNodes( main ):
+ """
+ Verifies Each active node in the cluster has an accurate view of other node's and their status
+
+ Params:
+ nodes, integer array with position of the ONOS nodes in the CLIs array
+ """
+ nodeResults = utilities.retry( main.Cluster.nodesCheck,
+ False,
+ attempts=10,
+ sleep=10 )
+ utilities.assert_equals( expect=True, actual=nodeResults,
+ onpass="Nodes check successful",
+ onfail="Nodes check NOT successful" )
+
+ if not nodeResults:
+ for ctrl in main.Cluster.runningNodes:
+ main.log.debug( "{} components not ACTIVE: \n{}".format(
+ ctrl.name,
+ ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
+ main.log.error( "Failed to kill ONOS, stopping test" )
+ main.cleanAndExit()
+
+ @staticmethod
+ def verifyTopology( main, switches, links, expNodes ):
+ """
+ Verifies that the ONOS cluster has an acuurate view of the topology
+
+ Params:
+ switches, links, expNodes: number of expected switches, links, and nodes at this point in the test ex.: '4', '6', '2'
+ """
+ main.step( "Check number of topology elements" )
+ topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
+ main.FALSE,
+ kwargs={ 'numoswitch': switches,
+ 'numolink': links,
+ 'numoctrl': expNodes },
+ attempts=10,
+ sleep=12 )
+ utilities.assert_equals( expect=main.TRUE, actual=topology,
+ onpass="Number of topology elements are correct",
+ onfail="Unexpected number of links, switches, and/or controllers" )
+
+ @staticmethod
+ def killOnos( main, nodes, switches, links, expNodes, sleep=None ):
"""
Params: nodes, integer array with position of the ONOS nodes in the CLIs array
switches, links, nodes: number of expected switches, links and nodes after KillOnos, ex.: '4', '6'
Completely Kill an ONOS instance and verify the ONOS cluster can see the proper change
"""
+ # TODO: We have enough information in the Cluster instance to remove expNodes from here and verifyTopology
main.step( "Killing ONOS instances with index(es): {}".format( nodes ) )
- main.onosSleep = float( main.params[ 'timers' ][ 'OnosDiscovery' ] )
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'OnosDiscovery' ] )
+ else:
+ sleep = float( sleep )
for i in nodes:
killResult = main.ONOSbench.onosDie( main.Cluster.runningNodes[ i ].ipAddress )
@@ -783,48 +855,27 @@
onpass="ONOS instance Killed",
onfail="Error killing ONOS instance" )
main.Cluster.runningNodes[ i ].active = False
- time.sleep( main.onosSleep )
+ main.Cluster.reset()
+ time.sleep( sleep )
if len( nodes ) < main.Cluster.numCtrls:
-
- nodeResults = utilities.retry( main.Cluster.nodesCheck,
- False,
- attempts=10,
- sleep=10 )
- utilities.assert_equals( expect=True, actual=nodeResults,
- onpass="Nodes check successful",
- onfail="Nodes check NOT successful" )
-
- if not nodeResults:
- for i in nodes:
- ctrl = main.Cluster.runningNodes[ i ]
- main.log.debug( "{} components not ACTIVE: \n{}".format(
- ctrl.name,
- ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
- main.log.error( "Failed to kill ONOS, stopping test" )
- main.cleanAndExit()
-
- topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
- main.FALSE,
- kwargs={ 'numoswitch': switches,
- 'numolink': links,
- 'numoctrl': expNodes },
- attempts=10,
- sleep=12 )
- utilities.assert_equals( expect=main.TRUE, actual=topology,
- onpass="ONOS Instance down successful",
- onfail="Failed to turn off ONOS Instance" )
+ Testcaselib.verifyNodes( main )
+ Testcaselib.verifyTopology( main, switches, links, expNodes )
@staticmethod
- def recoverOnos( main, nodes, switches, links, expNodes ):
+ def recoverOnos( main, nodes, switches, links, expNodes, sleep=None ):
"""
Params: nodes, integer array with position of the ONOS nodes in the CLIs array
switches, links, nodes: number of expected switches, links and nodes after recoverOnos, ex.: '4', '6'
Recover an ONOS instance and verify the ONOS cluster can see the proper change
"""
main.step( "Recovering ONOS instances with index(es): {}".format( nodes ) )
+ if sleep is None:
+ sleep = float( main.params[ 'timers' ][ 'OnosDiscovery' ] )
+ else:
+ sleep = float( sleep )
[ main.ONOSbench.onosStart( main.Cluster.runningNodes[ i ].ipAddress ) for i in nodes ]
- time.sleep( main.onosSleep )
+ time.sleep( sleep )
for i in nodes:
isUp = main.ONOSbench.isup( main.Cluster.runningNodes[ i ].ipAddress )
utilities.assert_equals( expect=main.TRUE, actual=isUp,
@@ -843,34 +894,11 @@
onpass="ONOS CLI is ready",
onfail="ONOS CLI is not ready" )
+ main.Cluster.reset()
main.step( "Checking ONOS nodes" )
- nodeResults = utilities.retry( main.Cluster.nodesCheck,
- False,
- attempts=5,
- sleep=10 )
- utilities.assert_equals( expect=True, actual=nodeResults,
- onpass="Nodes check successful",
- onfail="Nodes check NOT successful" )
+ Testcaselib.verifyNodes( main )
+ Testcaselib.verifyTopology( main, switches, links, expNodes )
- if not nodeResults:
- for i in nodes:
- ctrl = main.Cluster.runningNodes[ i ]
- main.log.debug( "{} components not ACTIVE: \n{}".format(
- ctrl.name,
- ctrl.CLI.sendline( "scr:list | grep -v ACTIVE" ) ) )
- main.log.error( "Failed to start ONOS, stopping test" )
- main.cleanAndExit()
-
- topology = utilities.retry( main.Cluster.active( 0 ).CLI.checkStatus,
- main.FALSE,
- kwargs={ 'numoswitch': switches,
- 'numolink': links,
- 'numoctrl': expNodes },
- attempts=10,
- sleep=12 )
- utilities.assert_equals( expect=main.TRUE, actual=topology,
- onpass="ONOS Instance down successful",
- onfail="Failed to turn off ONOS Instance" )
ready = utilities.retry( main.Cluster.active( 0 ).CLI.summary,
main.FALSE,
attempts=10,
@@ -1103,7 +1131,7 @@
# Send packet and check received packet
expectedResult = expect.pop( 0 ) if isinstance( expect, list ) else expect
t3Cmd = "t3-troubleshoot -vv -sp {} -et ipv{} -d {} -dm {}".format( srcEntry[ "port" ], routeData[ "ipVersion" ],
- routeData[ "group" ], srcEntry[ "Ether" ] )
+ routeData[ "group" ], srcEntry[ "Ether" ] )
trafficResult = main.topo.sendScapyPackets( sender, receiver, pktFilter, pkt, sIface, dIface,
expectedResult, maxRetry, True, t3Cmd )
if not trafficResult: