Merge "[SDFAB-1024] Modify TestON to verify persistence mastership"
diff --git a/TestON/drivers/common/cli/onosclidriver.py b/TestON/drivers/common/cli/onosclidriver.py
index 5694fc6..f4f65db 100755
--- a/TestON/drivers/common/cli/onosclidriver.py
+++ b/TestON/drivers/common/cli/onosclidriver.py
@@ -2676,16 +2676,25 @@
else:
main.cleanAndExit()
- def flows( self, state="any", jsonFormat=True, timeout=60, noExit=False, noCore=False, device=""):
+ def flows( self, state="any", jsonFormat=True, timeout=60, noExit=False, noCore=False, device="" ):
+ return self.dataPlaneEntities( entity="flows", state=state, jsonFormat=jsonFormat,
+ timeout=timeout, noExit=noExit, noCore=noCore, device=device )
+
+ def groups( self, state="any", jsonFormat=True, timeout=60, noExit=False, noCore=False, device="" ):
+ return self.dataPlaneEntities( entity="groups", state=state, jsonFormat=jsonFormat,
+ timeout=timeout, noExit=noExit, noCore=noCore, device=device )
+
+ def dataPlaneEntities( self, entity="flows", state="any", jsonFormat=True,
+ timeout=60, noExit=False, noCore=False, device="" ):
"""
Optional:
* jsonFormat: enable output formatting in json
* noCore: suppress core flows
Description:
- Obtain flows currently installed
+ Obtain dataplaneEntities currently installed
"""
try:
- cmdStr = "flows"
+ cmdStr = entity
if jsonFormat:
cmdStr += " -j"
if noCore:
@@ -2696,7 +2705,7 @@
assert handle is not None, "Error in sendline"
assert "Command not found:" not in handle, handle
if re.search( "Error:", handle ):
- main.log.error( self.name + ": flows() response: " +
+ main.log.error( self.name + ": " + entity + "() response: " +
str( handle ) )
return handle
except AssertionError:
@@ -2782,6 +2791,68 @@
main.log.exception( self.name + ": Uncaught exception!" )
main.cleanAndExit()
+ def checkGroupCount( self, min=0, timeout=60 ):
+ count = self.getTotalGroupsNum( timeout=timeout )
+ count = int( count ) if count else 0
+ main.log.debug( "found {} groups".format( count ) )
+ return count if ( count >= min ) else False
+
+ def checkGroupsState( self, isPENDING=True, timeout=60, noExit=False ):
+ """
+ Description:
+ Check the if all the current groups are in ADDED state
+ We check PENDING_ADD, PENDING_REMOVE, REMOVED, and FAILED groups,
+ if the count of those states is 0, which means all current groups
+ are in ADDED state, and return main.TRUE otherwise return main.FALSE
+ Optional:
+ * isPENDING: whether the PENDING_ADD is also a correct status
+ Return:
+ returnValue - Returns main.TRUE only if all groups are in
+ ADDED state or PENDING_ADD if the isPENDING
+ parameter is set true, return main.FALSE otherwise.
+ """
+ try:
+ states = [ "PENDING_ADD", "PENDING_ADD_RETRY", "PENDING_DELETE", "PENDING_UPDATE", "WAITING_AUDIT_COMPLETE" ]
+ checkedStates = []
+ statesCount = [ 0, 0, 0, 0, 0 ]
+ for s in states:
+ rawGroups = self.groups( state=s, timeout = timeout )
+ if rawGroups:
+ # if we didn't get groups or groups function return None, we should return
+ # main.Flase
+ checkedStates.append( json.loads( rawGroups ) )
+ else:
+ return main.FALSE
+ for i in range( len( states ) ):
+ statesCount[ i ] += int( len(checkedStates[i]) )
+ main.log.info( states[ i ] + " groups: " + str( statesCount[ i ] ) )
+
+ # We want to count PENDING_ADD if isPENDING is true
+ if isPENDING:
+ if statesCount[ 2 ] + statesCount[ 3 ] + statesCount[ 4 ] > 0:
+ return main.FALSE
+ else:
+ if statesCount[ 0 ] + statesCount[ 1 ] + statesCount[ 2 ] + statesCount[ 3 ] + statesCount[ 4 ] > 0:
+ return main.FALSE
+ return main.TRUE
+ except ( TypeError, ValueError ):
+ main.log.exception( "{}: Object not as expected: {!r}".format( self.name, rawGroups ) )
+ return None
+
+ except AssertionError:
+ main.log.exception( "" )
+ return None
+ except pexpect.TIMEOUT:
+ main.log.error( self.name + ": ONOS timeout" )
+ return None
+ except pexpect.EOF:
+ main.log.error( self.name + ": EOF exception found" )
+ main.log.error( self.name + ": " + self.handle.before )
+ main.cleanAndExit()
+ except Exception:
+ main.log.exception( self.name + ": Uncaught exception!" )
+ main.cleanAndExit()
+
def pushTestIntents( self, ingress, egress, batchSize, offset="",
options="", timeout=10, background = False, noExit=False, getResponse=False ):
"""
@@ -2843,31 +2914,37 @@
main.cleanAndExit()
def getTotalFlowsNum( self, timeout=60, noExit=False ):
+ return self.getTotalEntitiesNum( entity="flows", timeout=60, noExit=False )
+
+ def getTotalGroupsNum( self, timeout=60, noExit=False ):
+ return self.getTotalEntitiesNum( entity="groups", timeout=60, noExit=False )
+
+ def getTotalEntitiesNum( self, entity="flows", timeout=60, noExit=False ):
"""
Description:
- Get the number of ADDED flows.
+ Get the number of ADDED entities.
Return:
- The number of ADDED flows
+ The number of ADDED entities
Or return None if any exceptions
"""
try:
- # get total added flows number
- cmd = "flows -c added"
- rawFlows = self.sendline( cmd, timeout=timeout, noExit=noExit )
- if rawFlows:
- rawFlows = rawFlows.split( "\n" )
- totalFlows = 0
- for l in rawFlows:
- totalFlows += int( l.split( "Count=" )[ 1 ] )
+ # get total added entities number
+ cmd = entity + " -c added"
+ rawEntities = self.sendline( cmd, timeout=timeout, noExit=noExit )
+ if rawEntities:
+ rawEntities = rawEntities.split( "\n" )
+ totalEntities = 0
+ for l in rawEntities:
+ totalEntities += int( l.split( "Count=" )[ 1 ] )
else:
main.log.warn( "Response not as expected!" )
return None
- return totalFlows
+ return totalEntities
except IndexError:
main.log.exception( "{}: Object not as expected!".format( self.name ) )
- main.log.debug( "rawFlows: {}".format( rawFlows ) )
+ main.log.debug( "rawEntities: {}".format( rawEntities ) )
return None
except ( TypeError, ValueError ):
main.log.exception( "{}: Object not as expected!".format( self.name ) )
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
new file mode 100644
index 0000000..2bad253
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
@@ -0,0 +1,65 @@
+<PARAMS>
+ <testcases>1</testcases>
+
+ <GRAPH>
+ <nodeCluster>pairedleaves</nodeCluster>
+ <builds>20</builds>
+ <jobName>PMastership</jobName>
+ </GRAPH>
+
+ <persistent_setup>True</persistent_setup>
+
+ <kubernetes>
+ <appName>onos-classic</appName>
+ <namespace>tost</namespace>
+ </kubernetes>
+ <use_stern>True</use_stern>
+
+ <PMastership>
+
+ <PMastership_dataplane_fail>
+ <switch_to_kill>Leaf2</switch_to_kill> <!-- Component name of the switch to kill in CASE 5 -->
+ <k8s_switch_node>leaf2</k8s_switch_node>
+ <k8s_label>node-role.aetherproject.org</k8s_label>
+ <k8s_label_value_test>switch-test</k8s_label_value_test>
+ <k8s_label_value_normal>switch</k8s_label_value_normal>
+ </PMastership_dataplane_fail>
+
+ </PMastership>
+
+ <TOPO>
+ <switchNum>2</switchNum>
+ <linkNum>2</linkNum>
+ </TOPO>
+
+ <ONOS_Logging>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ <org.omecproject.up4>TRACE</org.omecproject.up4>
+ </ONOS_Logging>
+ <ONOS_Logging_Reset>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ <org.omecproject.up4>INFO</org.omecproject.up4>
+ </ONOS_Logging_Reset>
+
+ <ENV>
+ <cellName>productionCell</cellName>
+ <cellApps>drivers,fpm,lldpprovider,hostprovider,netcfghostprovider,drivers.bmv2,org.opencord.fabric-tofino,pipelines.fabric,org.stratumproject.fabric-tna,drivers.barefoot,segmentrouting,up4</cellApps>
+ </ENV>
+
+ <DEPENDENCY>
+ <useCommonConf>False</useCommonConf>
+ <useCommonTopo>True</useCommonTopo>
+ <useBmv2>True</useBmv2>
+ <bmv2SwitchType>stratum</bmv2SwitchType>
+ <switchPrefix></switchPrefix>
+ <stratumRoot>~/stratum</stratumRoot>
+ <topology>trellis_fabric.py</topology>
+ <lib></lib>
+ </DEPENDENCY>
+
+ <SCALE>
+ <size>3</size>
+ <max>3</max>
+ </SCALE>
+
+</PARAMS>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
new file mode 100644
index 0000000..886806e
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
@@ -0,0 +1,287 @@
+class PMastership:
+
+ def __init__(self):
+ self.default = ''
+
+ def CASE1(self, main):
+ main.case("PMastership Test")
+ """
+ Verify there are no pending flows and groups
+ Get flows and group counts
+ Verify that are not 0
+ Get the master of leaf2 (look at the params file for the config)
+ Verify that has the master
+ Kill switch leaf2
+ Set label on switch K8S node to prevent K8S to redeploy stratum
+ Verify there are no pending flows and groups related to segment routing
+ Verify that the master of leaf2 is still the same as before
+ Wait for the switch to be up again
+ Verify there are no pending flows and groups
+ """
+ try:
+ from tests.USECASE.SegmentRouting.dependencies.Testcaselib import \
+ Testcaselib as run
+ from tests.USECASE.SegmentRouting.SRStaging.dependencies.SRStagingTest import \
+ SRStagingTest
+ import time
+ except ImportError as e:
+ main.log.error("Import not found. Exiting the test")
+ main.log.error(e)
+ main.cleanAndExit()
+ # Retrieves the params of the test
+ n_switches = int(main.params["TOPO"]["switchNum"])
+ switch_to_kill = main.params["PMastership"]["PMastership_dataplane_fail"]["switch_to_kill"]
+ k8s_switch_node = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_switch_node"]
+ k8s_label = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label"]
+ k8s_label_value_test = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_test"]
+ k8s_label_value_normal = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_normal"]
+ # Init the main components and variables
+ run.initTest(main)
+ main.log.info(main.Cluster.numCtrls)
+ main.Cluster.setRunningNode(3)
+ run.installOnos(main, skipPackage=True, cliSleep=5)
+ onos_cli = main.Cluster.active(0).CLI
+ kubectl = main.Cluster.active(0).Bench
+ kubeconfig = main.Cluster.active(0).k8s.kubeConfig
+ namespace = main.params['kubernetes']['namespace']
+
+ main.step("Verify there are added flows")
+ initial_flows_count = onos_cli.checkFlowCount()
+ empty = main.TRUE if ( initial_flows_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(initial_flows_count) + " added flows",
+ onfail="There are no added flows",
+ )
+
+ main.step("Verify there are added groups")
+ initial_groups_count = onos_cli.checkGroupCount()
+ empty = main.TRUE if ( initial_groups_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(initial_groups_count) + " added groups",
+ onfail="There are no added groups",
+ )
+
+ no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending flows")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_flows,
+ onpass="There are no pending flows",
+ onfail="There are pending flows",
+ )
+
+ no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending groups")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_groups,
+ onpass="There are no pending groups",
+ onfail="There are pending groups",
+ )
+
+ main.step("Retrieving " + switch_to_kill + " master")
+ initial_master = onos_cli.getMaster("device:" + k8s_switch_node)
+ no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=no_master,
+ onpass=initial_master + " is the master of " + switch_to_kill,
+ onfail="There is no master for " + switch_to_kill,
+ )
+
+ main.step("Set label to switch k8s node and kill Stratum")
+ # K8s node name correspond to the switch name in lowercase
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=kubectl.kubectlSetLabel(
+ nodeName=k8s_switch_node,
+ label=k8s_label,
+ value=k8s_label_value_test,
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ ),
+ onpass="Label has been set correctly on node %s" % k8s_switch_node,
+ onfail="Label has not been set on node %s" % k8s_switch_node
+ )
+
+ try:
+ def checkNumberStratumPods(n_value):
+ pods = kubectl.kubectlGetPodNames(
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ name="stratum"
+ )
+ main.log.info("PODS: " + str(pods))
+ return n_value == len(pods) if pods is not main.FALSE else False
+ # Execute the following in try/except/finally to be sure to restore the
+ # k8s label even in case of unhandled exception.
+
+ # Wait for stratum pod to be removed from the switch
+ removed = utilities.retry(checkNumberStratumPods,
+ False,
+ args=[n_switches - 1],
+ attempts=50)
+ main.log.info("Stratum has been removed from the switch? %s" % removed)
+
+ sleepTime = 20
+ switch_component = getattr(main, switch_to_kill)
+ main.log.info("Sleeping %s seconds for ONOS to react" % sleepTime)
+ time.sleep(sleepTime)
+
+ available = utilities.retry(SRStagingTest.switchIsConnected,
+ True,
+ args=[switch_component],
+ attempts=300,
+ getRetryingTime=True)
+ main.log.info("Switch %s is available in ONOS? %s" % (
+ switch_to_kill, available))
+ utilities.assert_equal(
+ expect=True,
+ actual=not available and removed,
+ onpass="Stratum was removed from switch k8s node",
+ onfail="Stratum was not removed from switch k8s node"
+ )
+
+ main.step("Verify there are no segmentrouting flows after the failure")
+ raw_flows = onos_cli.flows(device="device:" + k8s_switch_node)
+ sr_flows = main.TRUE if "segmentrouting" in raw_flows else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=sr_flows,
+ onpass="There are no segmentrouting flows",
+ onfail="There are segmentrouting flows",
+ )
+
+ main.step("Verify there are no segmentrouting groups after the failure")
+ raw_groups = onos_cli.groups(device="device:" + k8s_switch_node)
+ sr_groups = main.TRUE if "segmentrouting" in raw_groups else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=sr_groups,
+ onpass="There are no segmentrouting groups",
+ onfail="There are segmentrouting groups",
+ )
+
+ main.step("Verify " + initial_master + " is still the master of " + switch_to_kill)
+ after_master = onos_cli.getMaster("device:" + k8s_switch_node)
+ no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=no_master,
+ onpass=initial_master + " is the master of " + switch_to_kill,
+ onfail="There is no master for " + switch_to_kill,
+ )
+
+ same_master = main.TRUE if ( initial_master == after_master ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=same_master,
+ onpass=initial_master + " is still the master of " + switch_to_kill,
+ onfail="Master for " + switch_to_kill + " is " + after_master,
+ )
+
+ except Exception as e:
+ main.log.error("Unhandled exception!")
+ main.log.error(e)
+ finally:
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=kubectl.kubectlSetLabel(
+ nodeName=k8s_switch_node,
+ label=k8s_label,
+ value=k8s_label_value_normal,
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ ),
+ onpass="Label has been set correctly on node %s" % k8s_switch_node,
+ onfail="Label has not been set on node %s" % k8s_switch_node
+ )
+
+ # Wait for stratum pod to be re-deployed on the switch
+ deployed = utilities.retry(checkNumberStratumPods,
+ False,
+ args=[n_switches],
+ attempts=50)
+ main.log.info("Stratum has been redeployed on the switch? %s" % deployed)
+
+ # Wait switch to be back in ONOS
+ available = utilities.retry(SRStagingTest.switchIsConnected,
+ False,
+ args=[switch_component],
+ sleep=2,
+ attempts=300,
+ getRetryingTime=True)
+ main.log.info("Switch %s is available in ONOS? %s" % (
+ switch_to_kill, available))
+ utilities.assert_equal(
+ expect=True,
+ actual=available and deployed,
+ onpass="Switch is back available in ONOS and stratum has been redeployed",
+ onfail="Switch is not available in ONOS, may influence subsequent tests!"
+ )
+
+ sleepTime = 10
+ main.log.info("Sleeping %s seconds for ONOS to react and assure flows/groups are ADDED" % sleepTime)
+ time.sleep(sleepTime)
+
+ main.step("Verify there are added flows after reboot")
+ after_flows_count = onos_cli.checkFlowCount()
+ empty = main.TRUE if ( after_flows_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(after_flows_count) + " added flows",
+ onfail="There are no added flows",
+ )
+
+ main.step("Verify there are added groups after reboot")
+ after_groups_count = onos_cli.checkGroupCount()
+ empty = main.TRUE if ( after_groups_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(after_groups_count) + " added groups",
+ onfail="There are no added groups",
+ )
+
+ no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending flows after reboot")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_flows,
+ onpass="There are no pending flows",
+ onfail="There are pending flows",
+ )
+
+ no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending groups after reboot")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_groups,
+ onpass="There are no pending groups",
+ onfail="There are pending groups",
+ )
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
new file mode 100644
index 0000000..38b0af9
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
@@ -0,0 +1,57 @@
+<TOPOLOGY>
+ <COMPONENT>
+ <ONOScell>
+ <host>localhost</host> # ONOS "bench" machine
+ <user>jenkins</user>
+ <password></password>
+ <type>OnosClusterDriver</type>
+ <connect_order>50</connect_order>
+ <jump_host></jump_host>
+ <home>~/onos</home> # defines where onos home is on the build machine. Defaults to "~/onos/" if empty.
+ <COMPONENTS>
+ <kubeConfig>~/.kube/dev-pairedleaves-tucson</kubeConfig> # If set, will attempt to use this file for setting up port-forwarding
+ <useDocker>True</useDocker> # Whether to use docker for ONOS nodes
+ <docker_prompt>\$</docker_prompt>
+ <cluster_name></cluster_name> # Used as a prefix for cluster components. Defaults to 'ONOS'
+ <diff_clihost>True</diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+ <karaf_username>karaf</karaf_username>
+ <karaf_password>karaf</karaf_password>
+ <web_user>karaf</web_user>
+ <web_pass>karaf</web_pass>
+ <karafPrompt_username>karaf</karafPrompt_username>
+ <rest_port></rest_port>
+ <prompt></prompt> # TODO: we technically need a few of these, one per component
+ <onos_home>~/onos/</onos_home> # defines where onos home is on the target cell machine. Defaults to entry in "home" if empty.
+ <nodes> 1 </nodes> # number of nodes in the cluster
+ <up4_port>51001</up4_port> # Port where the UP4 P4Runtime server is listening
+ </COMPONENTS>
+ </ONOScell>
+
+ <Leaf2>
+ <host>10.76.28.71</host>
+ <user>root</user>
+ <password>onl</password>
+ <type>StratumOSSwitchDriver</type>
+ <connect_order>10</connect_order>
+ <COMPONENTS>
+ <shortName>leaf2</shortName>
+ <port1>2</port1>
+ <link1>Host2</link1>
+ <onosConfigPath></onosConfigPath>
+ <onosConfigFile></onosConfigFile>
+ </COMPONENTS>
+ </Leaf2>
+
+ <!-- This component is not needed, but required to use the Testcaselib -->
+ <NetworkBench>
+ <host>10.76.28.66</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>NetworkDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ </COMPONENTS>
+ </NetworkBench>
+
+ </COMPONENT>
+</TOPOLOGY>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/README.md b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
new file mode 100644
index 0000000..6919afc
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
@@ -0,0 +1,12 @@
+# PMastership System Tests
+
+Tests in this folder are meant to excercise the persistent Mastership
+by simulating dataplane failure and verify that flows and groups
+created by SegmentRouting are correctly purged and that when the
+device is no longer available it still has the same master as before.
+
+# Requirements to run PMastership tests
+
+There are no particular requirements as it mainly relies on the ONOS
+CLI driver and some utility functions to bring down a device and
+manipulate k8s resources
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py