[SDFAB-1024] Modify TestON to verify persistence mastership
Additionally, improve ONOS cli driver
Change-Id: I92a6908fc5e76cdc6538dccf5845ee5a6e99662a
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
new file mode 100644
index 0000000..2bad253
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.params
@@ -0,0 +1,65 @@
+<PARAMS>
+ <testcases>1</testcases>
+
+ <GRAPH>
+ <nodeCluster>pairedleaves</nodeCluster>
+ <builds>20</builds>
+ <jobName>PMastership</jobName>
+ </GRAPH>
+
+ <persistent_setup>True</persistent_setup>
+
+ <kubernetes>
+ <appName>onos-classic</appName>
+ <namespace>tost</namespace>
+ </kubernetes>
+ <use_stern>True</use_stern>
+
+ <PMastership>
+
+ <PMastership_dataplane_fail>
+ <switch_to_kill>Leaf2</switch_to_kill> <!-- Component name of the switch to kill in CASE 5 -->
+ <k8s_switch_node>leaf2</k8s_switch_node>
+ <k8s_label>node-role.aetherproject.org</k8s_label>
+ <k8s_label_value_test>switch-test</k8s_label_value_test>
+ <k8s_label_value_normal>switch</k8s_label_value_normal>
+ </PMastership_dataplane_fail>
+
+ </PMastership>
+
+ <TOPO>
+ <switchNum>2</switchNum>
+ <linkNum>2</linkNum>
+ </TOPO>
+
+ <ONOS_Logging>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ <org.omecproject.up4>TRACE</org.omecproject.up4>
+ </ONOS_Logging>
+ <ONOS_Logging_Reset>
+ <org.onosproject.segmentrouting>DEBUG</org.onosproject.segmentrouting>
+ <org.omecproject.up4>INFO</org.omecproject.up4>
+ </ONOS_Logging_Reset>
+
+ <ENV>
+ <cellName>productionCell</cellName>
+ <cellApps>drivers,fpm,lldpprovider,hostprovider,netcfghostprovider,drivers.bmv2,org.opencord.fabric-tofino,pipelines.fabric,org.stratumproject.fabric-tna,drivers.barefoot,segmentrouting,up4</cellApps>
+ </ENV>
+
+ <DEPENDENCY>
+ <useCommonConf>False</useCommonConf>
+ <useCommonTopo>True</useCommonTopo>
+ <useBmv2>True</useBmv2>
+ <bmv2SwitchType>stratum</bmv2SwitchType>
+ <switchPrefix></switchPrefix>
+ <stratumRoot>~/stratum</stratumRoot>
+ <topology>trellis_fabric.py</topology>
+ <lib></lib>
+ </DEPENDENCY>
+
+ <SCALE>
+ <size>3</size>
+ <max>3</max>
+ </SCALE>
+
+</PARAMS>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
new file mode 100644
index 0000000..886806e
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.py
@@ -0,0 +1,287 @@
+class PMastership:
+
+ def __init__(self):
+ self.default = ''
+
+ def CASE1(self, main):
+ main.case("PMastership Test")
+ """
+ Verify there are no pending flows and groups
+ Get flows and group counts
+ Verify that are not 0
+ Get the master of leaf2 (look at the params file for the config)
+ Verify that has the master
+ Kill switch leaf2
+ Set label on switch K8S node to prevent K8S to redeploy stratum
+ Verify there are no pending flows and groups related to segment routing
+ Verify that the master of leaf2 is still the same as before
+ Wait for the switch to be up again
+ Verify there are no pending flows and groups
+ """
+ try:
+ from tests.USECASE.SegmentRouting.dependencies.Testcaselib import \
+ Testcaselib as run
+ from tests.USECASE.SegmentRouting.SRStaging.dependencies.SRStagingTest import \
+ SRStagingTest
+ import time
+ except ImportError as e:
+ main.log.error("Import not found. Exiting the test")
+ main.log.error(e)
+ main.cleanAndExit()
+ # Retrieves the params of the test
+ n_switches = int(main.params["TOPO"]["switchNum"])
+ switch_to_kill = main.params["PMastership"]["PMastership_dataplane_fail"]["switch_to_kill"]
+ k8s_switch_node = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_switch_node"]
+ k8s_label = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label"]
+ k8s_label_value_test = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_test"]
+ k8s_label_value_normal = main.params["PMastership"]["PMastership_dataplane_fail"]["k8s_label_value_normal"]
+ # Init the main components and variables
+ run.initTest(main)
+ main.log.info(main.Cluster.numCtrls)
+ main.Cluster.setRunningNode(3)
+ run.installOnos(main, skipPackage=True, cliSleep=5)
+ onos_cli = main.Cluster.active(0).CLI
+ kubectl = main.Cluster.active(0).Bench
+ kubeconfig = main.Cluster.active(0).k8s.kubeConfig
+ namespace = main.params['kubernetes']['namespace']
+
+ main.step("Verify there are added flows")
+ initial_flows_count = onos_cli.checkFlowCount()
+ empty = main.TRUE if ( initial_flows_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(initial_flows_count) + " added flows",
+ onfail="There are no added flows",
+ )
+
+ main.step("Verify there are added groups")
+ initial_groups_count = onos_cli.checkGroupCount()
+ empty = main.TRUE if ( initial_groups_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(initial_groups_count) + " added groups",
+ onfail="There are no added groups",
+ )
+
+ no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending flows")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_flows,
+ onpass="There are no pending flows",
+ onfail="There are pending flows",
+ )
+
+ no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending groups")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_groups,
+ onpass="There are no pending groups",
+ onfail="There are pending groups",
+ )
+
+ main.step("Retrieving " + switch_to_kill + " master")
+ initial_master = onos_cli.getMaster("device:" + k8s_switch_node)
+ no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=no_master,
+ onpass=initial_master + " is the master of " + switch_to_kill,
+ onfail="There is no master for " + switch_to_kill,
+ )
+
+ main.step("Set label to switch k8s node and kill Stratum")
+ # K8s node name correspond to the switch name in lowercase
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=kubectl.kubectlSetLabel(
+ nodeName=k8s_switch_node,
+ label=k8s_label,
+ value=k8s_label_value_test,
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ ),
+ onpass="Label has been set correctly on node %s" % k8s_switch_node,
+ onfail="Label has not been set on node %s" % k8s_switch_node
+ )
+
+ try:
+ def checkNumberStratumPods(n_value):
+ pods = kubectl.kubectlGetPodNames(
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ name="stratum"
+ )
+ main.log.info("PODS: " + str(pods))
+ return n_value == len(pods) if pods is not main.FALSE else False
+ # Execute the following in try/except/finally to be sure to restore the
+ # k8s label even in case of unhandled exception.
+
+ # Wait for stratum pod to be removed from the switch
+ removed = utilities.retry(checkNumberStratumPods,
+ False,
+ args=[n_switches - 1],
+ attempts=50)
+ main.log.info("Stratum has been removed from the switch? %s" % removed)
+
+ sleepTime = 20
+ switch_component = getattr(main, switch_to_kill)
+ main.log.info("Sleeping %s seconds for ONOS to react" % sleepTime)
+ time.sleep(sleepTime)
+
+ available = utilities.retry(SRStagingTest.switchIsConnected,
+ True,
+ args=[switch_component],
+ attempts=300,
+ getRetryingTime=True)
+ main.log.info("Switch %s is available in ONOS? %s" % (
+ switch_to_kill, available))
+ utilities.assert_equal(
+ expect=True,
+ actual=not available and removed,
+ onpass="Stratum was removed from switch k8s node",
+ onfail="Stratum was not removed from switch k8s node"
+ )
+
+ main.step("Verify there are no segmentrouting flows after the failure")
+ raw_flows = onos_cli.flows(device="device:" + k8s_switch_node)
+ sr_flows = main.TRUE if "segmentrouting" in raw_flows else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=sr_flows,
+ onpass="There are no segmentrouting flows",
+ onfail="There are segmentrouting flows",
+ )
+
+ main.step("Verify there are no segmentrouting groups after the failure")
+ raw_groups = onos_cli.groups(device="device:" + k8s_switch_node)
+ sr_groups = main.TRUE if "segmentrouting" in raw_groups else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=sr_groups,
+ onpass="There are no segmentrouting groups",
+ onfail="There are segmentrouting groups",
+ )
+
+ main.step("Verify " + initial_master + " is still the master of " + switch_to_kill)
+ after_master = onos_cli.getMaster("device:" + k8s_switch_node)
+ no_master = main.TRUE if ( initial_master is None ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=no_master,
+ onpass=initial_master + " is the master of " + switch_to_kill,
+ onfail="There is no master for " + switch_to_kill,
+ )
+
+ same_master = main.TRUE if ( initial_master == after_master ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=same_master,
+ onpass=initial_master + " is still the master of " + switch_to_kill,
+ onfail="Master for " + switch_to_kill + " is " + after_master,
+ )
+
+ except Exception as e:
+ main.log.error("Unhandled exception!")
+ main.log.error(e)
+ finally:
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=kubectl.kubectlSetLabel(
+ nodeName=k8s_switch_node,
+ label=k8s_label,
+ value=k8s_label_value_normal,
+ kubeconfig=kubeconfig,
+ namespace=namespace,
+ ),
+ onpass="Label has been set correctly on node %s" % k8s_switch_node,
+ onfail="Label has not been set on node %s" % k8s_switch_node
+ )
+
+ # Wait for stratum pod to be re-deployed on the switch
+ deployed = utilities.retry(checkNumberStratumPods,
+ False,
+ args=[n_switches],
+ attempts=50)
+ main.log.info("Stratum has been redeployed on the switch? %s" % deployed)
+
+ # Wait switch to be back in ONOS
+ available = utilities.retry(SRStagingTest.switchIsConnected,
+ False,
+ args=[switch_component],
+ sleep=2,
+ attempts=300,
+ getRetryingTime=True)
+ main.log.info("Switch %s is available in ONOS? %s" % (
+ switch_to_kill, available))
+ utilities.assert_equal(
+ expect=True,
+ actual=available and deployed,
+ onpass="Switch is back available in ONOS and stratum has been redeployed",
+ onfail="Switch is not available in ONOS, may influence subsequent tests!"
+ )
+
+ sleepTime = 10
+ main.log.info("Sleeping %s seconds for ONOS to react and assure flows/groups are ADDED" % sleepTime)
+ time.sleep(sleepTime)
+
+ main.step("Verify there are added flows after reboot")
+ after_flows_count = onos_cli.checkFlowCount()
+ empty = main.TRUE if ( after_flows_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(after_flows_count) + " added flows",
+ onfail="There are no added flows",
+ )
+
+ main.step("Verify there are added groups after reboot")
+ after_groups_count = onos_cli.checkGroupCount()
+ empty = main.TRUE if ( after_groups_count == 0 ) else main.FALSE
+ utilities.assert_equal(
+ expect=main.FALSE,
+ actual=empty,
+ onpass="There are " + str(after_groups_count) + " added groups",
+ onfail="There are no added groups",
+ )
+
+ no_pending_flows = utilities.retry(onos_cli.checkFlowsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending flows after reboot")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_flows,
+ onpass="There are no pending flows",
+ onfail="There are pending flows",
+ )
+
+ no_pending_groups = utilities.retry(onos_cli.checkGroupsState,
+ [False, None],
+ kwargs={"isPENDING": False},
+ attempts=20,
+ getRetryingTime=True)
+
+ main.step("Verify there are no pending groups after reboot")
+ utilities.assert_equal(
+ expect=main.TRUE,
+ actual=no_pending_groups,
+ onpass="There are no pending groups",
+ onfail="There are pending groups",
+ )
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
new file mode 100644
index 0000000..38b0af9
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/PMastership.topo
@@ -0,0 +1,57 @@
+<TOPOLOGY>
+ <COMPONENT>
+ <ONOScell>
+ <host>localhost</host> # ONOS "bench" machine
+ <user>jenkins</user>
+ <password></password>
+ <type>OnosClusterDriver</type>
+ <connect_order>50</connect_order>
+ <jump_host></jump_host>
+ <home>~/onos</home> # defines where onos home is on the build machine. Defaults to "~/onos/" if empty.
+ <COMPONENTS>
+ <kubeConfig>~/.kube/dev-pairedleaves-tucson</kubeConfig> # If set, will attempt to use this file for setting up port-forwarding
+ <useDocker>True</useDocker> # Whether to use docker for ONOS nodes
+ <docker_prompt>\$</docker_prompt>
+ <cluster_name></cluster_name> # Used as a prefix for cluster components. Defaults to 'ONOS'
+ <diff_clihost>True</diff_clihost> # if it has different host other than localhost for CLI. True or empty. OC# will be used if True.
+ <karaf_username>karaf</karaf_username>
+ <karaf_password>karaf</karaf_password>
+ <web_user>karaf</web_user>
+ <web_pass>karaf</web_pass>
+ <karafPrompt_username>karaf</karafPrompt_username>
+ <rest_port></rest_port>
+ <prompt></prompt> # TODO: we technically need a few of these, one per component
+ <onos_home>~/onos/</onos_home> # defines where onos home is on the target cell machine. Defaults to entry in "home" if empty.
+ <nodes> 1 </nodes> # number of nodes in the cluster
+ <up4_port>51001</up4_port> # Port where the UP4 P4Runtime server is listening
+ </COMPONENTS>
+ </ONOScell>
+
+ <Leaf2>
+ <host>10.76.28.71</host>
+ <user>root</user>
+ <password>onl</password>
+ <type>StratumOSSwitchDriver</type>
+ <connect_order>10</connect_order>
+ <COMPONENTS>
+ <shortName>leaf2</shortName>
+ <port1>2</port1>
+ <link1>Host2</link1>
+ <onosConfigPath></onosConfigPath>
+ <onosConfigFile></onosConfigFile>
+ </COMPONENTS>
+ </Leaf2>
+
+ <!-- This component is not needed, but required to use the Testcaselib -->
+ <NetworkBench>
+ <host>10.76.28.66</host>
+ <user>jenkins</user>
+ <password></password>
+ <type>NetworkDriver</type>
+ <connect_order>1</connect_order>
+ <COMPONENTS>
+ </COMPONENTS>
+ </NetworkBench>
+
+ </COMPONENT>
+</TOPOLOGY>
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/README.md b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
new file mode 100644
index 0000000..6919afc
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/README.md
@@ -0,0 +1,12 @@
+# PMastership System Tests
+
+Tests in this folder are meant to excercise the persistent Mastership
+by simulating dataplane failure and verify that flows and groups
+created by SegmentRouting are correctly purged and that when the
+device is no longer available it still has the same master as before.
+
+# Requirements to run PMastership tests
+
+There are no particular requirements as it mainly relies on the ONOS
+CLI driver and some utility functions to bring down a device and
+manipulate k8s resources
diff --git a/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/TestON/tests/USECASE/SegmentRouting/PMastership/__init__.py